import { logEvent } from '../services/analytics/index.js' import { logForDebugging } from '../utils/debug.js' import { logForDiagnosticsNoPII } from '../utils/diagLogs.js' import { errorMessage } from '../utils/errors.js' import { jsonParse } from '../utils/slowOperations.js' /** Format a millisecond duration as a human-readable string (e.g. "5m 30s"). */ /** 将毫秒持续时间格式化为人类可读的字符串(例如 "5m 30s")。 */ function formatDuration(ms: number): string { if (ms < 60_000) return `${Math.round(ms / 1000)}s` const m = Math.floor(ms / 60_000) const s = Math.round((ms % 60_000) / 1000) return s > 0 ? `${m}m ${s}s` : `${m}m` } /** * Decode a JWT's payload segment without verifying the signature. * Strips the `sk-ant-si-` session-ingress prefix if present. * Returns the parsed JSON payload as `unknown`, or `null` if the * token is malformed or the payload is not valid JSON. */ /** * 在不验证签名的情况下解码 JWT 的 payload 段。 * 如果存在,剥离 `sk-ant-si-` session-ingress 前缀。 * 返回解析的 JSON payload 为 `unknown`,如果 * token 格式错误或 payload 不是有效 JSON 则返回 `null`。 */ export function decodeJwtPayload(token: string): unknown | null { const jwt = token.startsWith('sk-ant-si-') ? token.slice('sk-ant-si-'.length) : token const parts = jwt.split('.') if (parts.length !== 3 || !parts[1]) return null try { return jsonParse(Buffer.from(parts[1], 'base64url').toString('utf8')) } catch { return null } } /** * Decode the `exp` (expiry) claim from a JWT without verifying the signature. * @returns The `exp` value in Unix seconds, or `null` if unparseable */ /** * 在不验证签名的情况下从 JWT 解码 `exp`(过期)claim。 * @returns Unix 秒中的 `exp` 值,如果无法解析则返回 `null` */ export function decodeJwtExpiry(token: string): number | null { const payload = decodeJwtPayload(token) if ( payload !== null && typeof payload === 'object' && 'exp' in payload && typeof payload.exp === 'number' ) { return payload.exp } return null } /** Refresh buffer: request a new token before expiry. */ /** 刷新缓冲区:在过期前请求新 token。 */ const TOKEN_REFRESH_BUFFER_MS = 5 * 60 * 1000 /** Fallback refresh interval when the new token's expiry is unknown. */ /** 当新 token 的过期时间未知时的回退刷新间隔。 */ const FALLBACK_REFRESH_INTERVAL_MS = 30 * 60 * 1000 // 30 minutes /** Max consecutive failures before giving up on the refresh chain. */ /** 在放弃刷新链之前最大连续失败次数。 */ const MAX_REFRESH_FAILURES = 3 /** Retry delay when getAccessToken returns undefined. */ /** 当 getAccessToken 返回 undefined 时的重试延迟。 */ const REFRESH_RETRY_DELAY_MS = 60_000 /** * Creates a token refresh scheduler that proactively refreshes session tokens * before they expire. Used by both the standalone bridge and the REPL bridge. * * When a token is about to expire, the scheduler calls `onRefresh` with the * session ID and the bridge's OAuth access token. The caller is responsible * for delivering the token to the appropriate transport (child process stdin * for standalone bridge, WebSocket reconnect for REPL bridge). */ /** * 创建一个 token 刷新调度器,在 session tokens 过期前主动刷新。 * 独立 bridge 和 REPL bridge 都使用。 * * 当 token 即将过期时,调度器使用 session ID 和 bridge 的 OAuth access token * 调用 `onRefresh`。调用者负责将 token 传递到适当的传输 *(独立 bridge 的子进程 stdin,REPL bridge 的 WebSocket 重连)。 */ export function createTokenRefreshScheduler({ getAccessToken, onRefresh, label, refreshBufferMs = TOKEN_REFRESH_BUFFER_MS, }: { getAccessToken: () => string | undefined | Promise onRefresh: (sessionId: string, oauthToken: string) => void label: string /** How long before expiry to fire refresh. Defaults to 5 min. */ /** 过期前多久触发刷新。默认为 5 分钟。 */ refreshBufferMs?: number }): { schedule: (sessionId: string, token: string) => void scheduleFromExpiresIn: (sessionId: string, expiresInSeconds: number) => void cancel: (sessionId: string) => void cancelAll: () => void } { const timers = new Map>() const failureCounts = new Map() // Generation counter per session — incremented by schedule() and cancel() // so that in-flight async doRefresh() calls can detect when they've been // superseded and should skip setting follow-up timers. // 每个会话的 generation 计数器 — 由 schedule() 和 cancel() 递增, // 以便 in-flight async doRefresh() 调用可以检测何时被取代, // 并应跳过设置后续计时器。 const generations = new Map() function nextGeneration(sessionId: string): number { const gen = (generations.get(sessionId) ?? 0) + 1 generations.set(sessionId, gen) return gen } function schedule(sessionId: string, token: string): void { const expiry = decodeJwtExpiry(token) if (!expiry) { // Token is not a decodable JWT (e.g. an OAuth token passed from the // REPL bridge WebSocket open handler). Preserve any existing timer // (such as the follow-up refresh set by doRefresh) so the refresh // chain is not broken. // Token 不是可解码的 JWT(例如从 REPL bridge WebSocket 打开处理程序 // 传递的 OAuth token)。保留任何现有计时器 //(如 doRefresh 设置的后续刷新),以便刷新链不断裂。 logForDebugging( `[${label}:token] Could not decode JWT expiry for sessionId=${sessionId}, token prefix=${token.slice(0, 15)}…, keeping existing timer`, ) return } // Clear any existing refresh timer — we have a concrete expiry to replace it. // 清除任何现有刷新计时器 — 我们有具体过期时间来替换它。 const existing = timers.get(sessionId) if (existing) { clearTimeout(existing) } // Bump generation to invalidate any in-flight async doRefresh. // 增加 generation 以使任何 in-flight async doRefresh 无效。 const gen = nextGeneration(sessionId) const expiryDate = new Date(expiry * 1000).toISOString() const delayMs = expiry * 1000 - Date.now() - refreshBufferMs if (delayMs <= 0) { logForDebugging( `[${label}:token] Token for sessionId=${sessionId} expires=${expiryDate} (past or within buffer), refreshing immediately`, ) void doRefresh(sessionId, gen) return } logForDebugging( `[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires=${expiryDate}, buffer=${refreshBufferMs / 1000}s)`, ) const timer = setTimeout(doRefresh, delayMs, sessionId, gen) timers.set(sessionId, timer) } /** * Schedule refresh using an explicit TTL (seconds until expiry) rather * than decoding a JWT's exp claim. Used by callers whose JWT is opaque * (e.g. POST /v1/code/sessions/{id}/bridge returns expires_in directly). */ /** * 使用明确的 TTL(到过期的秒数)调度刷新, * 而不是解码 JWT 的 exp claim。用于 JWT 不透明的调用者 *(例如 POST /v1/code/sessions/{id}/bridge 直接返回 expires_in)。 */ function scheduleFromExpiresIn( sessionId: string, expiresInSeconds: number, ): void { const existing = timers.get(sessionId) if (existing) clearTimeout(existing) const gen = nextGeneration(sessionId) // Clamp to 30s floor — if refreshBufferMs exceeds the server's expires_in // (e.g. very large buffer for frequent-refresh testing, or server shortens // expires_in unexpectedly), unclamped delayMs ≤ 0 would tight-loop. // 钳制到 30s 下限 — 如果 refreshBufferMs 超过服务器的 expires_in //(例如用于频繁刷新测试的非常大缓冲区,或服务器意外缩短 // expires_in),未钳制的 delayMs ≤ 0 会 tight-loop。 const delayMs = Math.max(expiresInSeconds * 1000 - refreshBufferMs, 30_000) logForDebugging( `[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires_in=${expiresInSeconds}s, buffer=${refreshBufferMs / 1000}s)`, ) const timer = setTimeout(doRefresh, delayMs, sessionId, gen) timers.set(sessionId, timer) } async function doRefresh(sessionId: string, gen: number): Promise { let oauthToken: string | undefined try { oauthToken = await getAccessToken() } catch (err) { logForDebugging( `[${label}:token] getAccessToken threw for sessionId=${sessionId}: ${errorMessage(err)}`, { level: 'error' }, ) } // If the session was cancelled or rescheduled while we were awaiting, // the generation will have changed — bail out to avoid orphaned timers. // 如果会话在我们等待时被取消或重新调度,generation 将已更改 — // 跳出以避免孤立的计时器。 if (generations.get(sessionId) !== gen) { logForDebugging( `[${label}:token] doRefresh for sessionId=${sessionId} stale (gen ${gen} vs ${generations.get(sessionId)}), skipping`, ) return } if (!oauthToken) { const failures = (failureCounts.get(sessionId) ?? 0) + 1 failureCounts.set(sessionId, failures) logForDebugging( `[${label}:token] No OAuth token available for refresh, sessionId=${sessionId} (failure ${failures}/${MAX_REFRESH_FAILURES})`, { level: 'error' }, ) logForDiagnosticsNoPII('error', 'bridge_token_refresh_no_oauth') // Schedule a retry so the refresh chain can recover if the token // becomes available again (e.g. transient cache clear during refresh). // Cap retries to avoid spamming on genuine failures. // 调度重试,以便如果 token 再次可用(例如刷新期间瞬态缓存清除), // 刷新链可以恢复。限制重试次数以避免在真正失败时轰炸。 if (failures < MAX_REFRESH_FAILURES) { const retryTimer = setTimeout( doRefresh, REFRESH_RETRY_DELAY_MS, sessionId, gen, ) timers.set(sessionId, retryTimer) } return } // Reset failure counter on successful token retrieval // 成功获取 token 时重置失败计数器 failureCounts.delete(sessionId) logForDebugging( `[${label}:token] Refreshing token for sessionId=${sessionId}: new token prefix=${oauthToken.slice(0, 15)}…`, ) logEvent('tengu_bridge_token_refreshed', {}) onRefresh(sessionId, oauthToken) // Schedule a follow-up refresh so long-running sessions stay authenticated. // Without this, the initial one-shot timer leaves the session vulnerable // to token expiry if it runs past the first refresh window. // 调度后续刷新,以便长运行会话保持认证。 // 没有这个,初始一次性计时器使会话容易在运行超过第一个刷新窗口时 // token 过期。 const timer = setTimeout( doRefresh, FALLBACK_REFRESH_INTERVAL_MS, sessionId, gen, ) timers.set(sessionId, timer) logForDebugging( `[${label}:token] Scheduled follow-up refresh for sessionId=${sessionId} in ${formatDuration(FALLBACK_REFRESH_INTERVAL_MS)}`, ) } function cancel(sessionId: string): void { // Bump generation to invalidate any in-flight async doRefresh. // 增加 generation 以使任何 in-flight async doRefresh 无效。 nextGeneration(sessionId) const timer = timers.get(sessionId) if (timer) { clearTimeout(timer) timers.delete(sessionId) } failureCounts.delete(sessionId) } function cancelAll(): void { // Bump all generations so in-flight doRefresh calls are invalidated. // 增加所有 generations,以便 in-flight doRefresh 调用无效。 for (const sessionId of generations.keys()) { nextGeneration(sessionId) } for (const timer of timers.values()) { clearTimeout(timer) } timers.clear() failureCounts.clear() } return { schedule, scheduleFromExpiresIn, cancel, cancelAll } }