first commit

This commit is contained in:
H
2026-04-03 13:01:19 +08:00
commit 538eced414
2575 changed files with 645911 additions and 0 deletions

View File

@@ -0,0 +1,306 @@
import { logEvent } from '../services/analytics/index.js'
import { logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import { errorMessage } from '../utils/errors.js'
import { jsonParse } from '../utils/slowOperations.js'
/** Format a millisecond duration as a human-readable string (e.g. "5m 30s"). */
/** 将毫秒持续时间格式化为人类可读的字符串(例如 "5m 30s")。 */
function formatDuration(ms: number): string {
if (ms < 60_000) return `${Math.round(ms / 1000)}s`
const m = Math.floor(ms / 60_000)
const s = Math.round((ms % 60_000) / 1000)
return s > 0 ? `${m}m ${s}s` : `${m}m`
}
/**
* Decode a JWT's payload segment without verifying the signature.
* Strips the `sk-ant-si-` session-ingress prefix if present.
* Returns the parsed JSON payload as `unknown`, or `null` if the
* token is malformed or the payload is not valid JSON.
*/
/**
* 在不验证签名的情况下解码 JWT 的 payload 段。
* 如果存在,剥离 `sk-ant-si-` session-ingress 前缀。
* 返回解析的 JSON payload 为 `unknown`,如果
* token 格式错误或 payload 不是有效 JSON 则返回 `null`。
*/
export function decodeJwtPayload(token: string): unknown | null {
const jwt = token.startsWith('sk-ant-si-')
? token.slice('sk-ant-si-'.length)
: token
const parts = jwt.split('.')
if (parts.length !== 3 || !parts[1]) return null
try {
return jsonParse(Buffer.from(parts[1], 'base64url').toString('utf8'))
} catch {
return null
}
}
/**
* Decode the `exp` (expiry) claim from a JWT without verifying the signature.
* @returns The `exp` value in Unix seconds, or `null` if unparseable
*/
/**
* 在不验证签名的情况下从 JWT 解码 `exp`过期claim。
* @returns Unix 秒中的 `exp` 值,如果无法解析则返回 `null`
*/
export function decodeJwtExpiry(token: string): number | null {
const payload = decodeJwtPayload(token)
if (
payload !== null &&
typeof payload === 'object' &&
'exp' in payload &&
typeof payload.exp === 'number'
) {
return payload.exp
}
return null
}
/** Refresh buffer: request a new token before expiry. */
/** 刷新缓冲区:在过期前请求新 token。 */
const TOKEN_REFRESH_BUFFER_MS = 5 * 60 * 1000
/** Fallback refresh interval when the new token's expiry is unknown. */
/** 当新 token 的过期时间未知时的回退刷新间隔。 */
const FALLBACK_REFRESH_INTERVAL_MS = 30 * 60 * 1000 // 30 minutes
/** Max consecutive failures before giving up on the refresh chain. */
/** 在放弃刷新链之前最大连续失败次数。 */
const MAX_REFRESH_FAILURES = 3
/** Retry delay when getAccessToken returns undefined. */
/** 当 getAccessToken 返回 undefined 时的重试延迟。 */
const REFRESH_RETRY_DELAY_MS = 60_000
/**
* Creates a token refresh scheduler that proactively refreshes session tokens
* before they expire. Used by both the standalone bridge and the REPL bridge.
*
* When a token is about to expire, the scheduler calls `onRefresh` with the
* session ID and the bridge's OAuth access token. The caller is responsible
* for delivering the token to the appropriate transport (child process stdin
* for standalone bridge, WebSocket reconnect for REPL bridge).
*/
/**
* 创建一个 token 刷新调度器,在 session tokens 过期前主动刷新。
* 独立 bridge 和 REPL bridge 都使用。
*
* 当 token 即将过期时,调度器使用 session ID 和 bridge 的 OAuth access token
* 调用 `onRefresh`。调用者负责将 token 传递到适当的传输
*(独立 bridge 的子进程 stdinREPL bridge 的 WebSocket 重连)。
*/
export function createTokenRefreshScheduler({
getAccessToken,
onRefresh,
label,
refreshBufferMs = TOKEN_REFRESH_BUFFER_MS,
}: {
getAccessToken: () => string | undefined | Promise<string | undefined>
onRefresh: (sessionId: string, oauthToken: string) => void
label: string
/** How long before expiry to fire refresh. Defaults to 5 min. */
/** 过期前多久触发刷新。默认为 5 分钟。 */
refreshBufferMs?: number
}): {
schedule: (sessionId: string, token: string) => void
scheduleFromExpiresIn: (sessionId: string, expiresInSeconds: number) => void
cancel: (sessionId: string) => void
cancelAll: () => void
} {
const timers = new Map<string, ReturnType<typeof setTimeout>>()
const failureCounts = new Map<string, number>()
// Generation counter per session — incremented by schedule() and cancel()
// so that in-flight async doRefresh() calls can detect when they've been
// superseded and should skip setting follow-up timers.
// 每个会话的 generation 计数器 — 由 schedule() 和 cancel() 递增,
// 以便 in-flight async doRefresh() 调用可以检测何时被取代,
// 并应跳过设置后续计时器。
const generations = new Map<string, number>()
function nextGeneration(sessionId: string): number {
const gen = (generations.get(sessionId) ?? 0) + 1
generations.set(sessionId, gen)
return gen
}
function schedule(sessionId: string, token: string): void {
const expiry = decodeJwtExpiry(token)
if (!expiry) {
// Token is not a decodable JWT (e.g. an OAuth token passed from the
// REPL bridge WebSocket open handler). Preserve any existing timer
// (such as the follow-up refresh set by doRefresh) so the refresh
// chain is not broken.
// Token 不是可解码的 JWT例如从 REPL bridge WebSocket 打开处理程序
// 传递的 OAuth token。保留任何现有计时器
//(如 doRefresh 设置的后续刷新),以便刷新链不断裂。
logForDebugging(
`[${label}:token] Could not decode JWT expiry for sessionId=${sessionId}, token prefix=${token.slice(0, 15)}…, keeping existing timer`,
)
return
}
// Clear any existing refresh timer — we have a concrete expiry to replace it.
// 清除任何现有刷新计时器 — 我们有具体过期时间来替换它。
const existing = timers.get(sessionId)
if (existing) {
clearTimeout(existing)
}
// Bump generation to invalidate any in-flight async doRefresh.
// 增加 generation 以使任何 in-flight async doRefresh 无效。
const gen = nextGeneration(sessionId)
const expiryDate = new Date(expiry * 1000).toISOString()
const delayMs = expiry * 1000 - Date.now() - refreshBufferMs
if (delayMs <= 0) {
logForDebugging(
`[${label}:token] Token for sessionId=${sessionId} expires=${expiryDate} (past or within buffer), refreshing immediately`,
)
void doRefresh(sessionId, gen)
return
}
logForDebugging(
`[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires=${expiryDate}, buffer=${refreshBufferMs / 1000}s)`,
)
const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
timers.set(sessionId, timer)
}
/**
* Schedule refresh using an explicit TTL (seconds until expiry) rather
* than decoding a JWT's exp claim. Used by callers whose JWT is opaque
* (e.g. POST /v1/code/sessions/{id}/bridge returns expires_in directly).
*/
/**
* 使用明确的 TTL到过期的秒数调度刷新
* 而不是解码 JWT 的 exp claim。用于 JWT 不透明的调用者
*(例如 POST /v1/code/sessions/{id}/bridge 直接返回 expires_in
*/
function scheduleFromExpiresIn(
sessionId: string,
expiresInSeconds: number,
): void {
const existing = timers.get(sessionId)
if (existing) clearTimeout(existing)
const gen = nextGeneration(sessionId)
// Clamp to 30s floor — if refreshBufferMs exceeds the server's expires_in
// (e.g. very large buffer for frequent-refresh testing, or server shortens
// expires_in unexpectedly), unclamped delayMs ≤ 0 would tight-loop.
// 钳制到 30s 下限 — 如果 refreshBufferMs 超过服务器的 expires_in
//(例如用于频繁刷新测试的非常大缓冲区,或服务器意外缩短
// expires_in未钳制的 delayMs ≤ 0 会 tight-loop。
const delayMs = Math.max(expiresInSeconds * 1000 - refreshBufferMs, 30_000)
logForDebugging(
`[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires_in=${expiresInSeconds}s, buffer=${refreshBufferMs / 1000}s)`,
)
const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
timers.set(sessionId, timer)
}
async function doRefresh(sessionId: string, gen: number): Promise<void> {
let oauthToken: string | undefined
try {
oauthToken = await getAccessToken()
} catch (err) {
logForDebugging(
`[${label}:token] getAccessToken threw for sessionId=${sessionId}: ${errorMessage(err)}`,
{ level: 'error' },
)
}
// If the session was cancelled or rescheduled while we were awaiting,
// the generation will have changed — bail out to avoid orphaned timers.
// 如果会话在我们等待时被取消或重新调度generation 将已更改 —
// 跳出以避免孤立的计时器。
if (generations.get(sessionId) !== gen) {
logForDebugging(
`[${label}:token] doRefresh for sessionId=${sessionId} stale (gen ${gen} vs ${generations.get(sessionId)}), skipping`,
)
return
}
if (!oauthToken) {
const failures = (failureCounts.get(sessionId) ?? 0) + 1
failureCounts.set(sessionId, failures)
logForDebugging(
`[${label}:token] No OAuth token available for refresh, sessionId=${sessionId} (failure ${failures}/${MAX_REFRESH_FAILURES})`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'bridge_token_refresh_no_oauth')
// Schedule a retry so the refresh chain can recover if the token
// becomes available again (e.g. transient cache clear during refresh).
// Cap retries to avoid spamming on genuine failures.
// 调度重试,以便如果 token 再次可用(例如刷新期间瞬态缓存清除),
// 刷新链可以恢复。限制重试次数以避免在真正失败时轰炸。
if (failures < MAX_REFRESH_FAILURES) {
const retryTimer = setTimeout(
doRefresh,
REFRESH_RETRY_DELAY_MS,
sessionId,
gen,
)
timers.set(sessionId, retryTimer)
}
return
}
// Reset failure counter on successful token retrieval
// 成功获取 token 时重置失败计数器
failureCounts.delete(sessionId)
logForDebugging(
`[${label}:token] Refreshing token for sessionId=${sessionId}: new token prefix=${oauthToken.slice(0, 15)}`,
)
logEvent('tengu_bridge_token_refreshed', {})
onRefresh(sessionId, oauthToken)
// Schedule a follow-up refresh so long-running sessions stay authenticated.
// Without this, the initial one-shot timer leaves the session vulnerable
// to token expiry if it runs past the first refresh window.
// 调度后续刷新,以便长运行会话保持认证。
// 没有这个,初始一次性计时器使会话容易在运行超过第一个刷新窗口时
// token 过期。
const timer = setTimeout(
doRefresh,
FALLBACK_REFRESH_INTERVAL_MS,
sessionId,
gen,
)
timers.set(sessionId, timer)
logForDebugging(
`[${label}:token] Scheduled follow-up refresh for sessionId=${sessionId} in ${formatDuration(FALLBACK_REFRESH_INTERVAL_MS)}`,
)
}
function cancel(sessionId: string): void {
// Bump generation to invalidate any in-flight async doRefresh.
// 增加 generation 以使任何 in-flight async doRefresh 无效。
nextGeneration(sessionId)
const timer = timers.get(sessionId)
if (timer) {
clearTimeout(timer)
timers.delete(sessionId)
}
failureCounts.delete(sessionId)
}
function cancelAll(): void {
// Bump all generations so in-flight doRefresh calls are invalidated.
// 增加所有 generations以便 in-flight doRefresh 调用无效。
for (const sessionId of generations.keys()) {
nextGeneration(sessionId)
}
for (const timer of timers.values()) {
clearTimeout(timer)
}
timers.clear()
failureCounts.clear()
}
return { schedule, scheduleFromExpiresIn, cancel, cancelAll }
}