From bebe8c1bb5d445e663fe5f009835170834e7e675 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 1 May 2026 14:52:53 +0800 Subject: [PATCH] docs: add LLM Gateway design document Design a unified LLM Gateway with: - Multi-format API support (OpenAI, Anthropic, Responses API) - 5 provider adapters (OpenAI, Anthropic, Azure, Gemini, Bedrock) - Model aliasing, routing, and load balancing - RPM/TPM rate limiting and budget control (key/project level) - Fallback/retry with circuit breaker - Request logging and usage statistics - Admin API for provider/key/model management Tech stack: Python (FastAPI) + SQLite Co-Authored-By: Claude Opus 4.7 --- docs/plans/2026-05-01-llm-gateway-design.md | 1016 +++++++++++++++++++ memory/2026-05-01.md | 23 + 2 files changed, 1039 insertions(+) create mode 100644 docs/plans/2026-05-01-llm-gateway-design.md diff --git a/docs/plans/2026-05-01-llm-gateway-design.md b/docs/plans/2026-05-01-llm-gateway-design.md new file mode 100644 index 0000000..541e1f1 --- /dev/null +++ b/docs/plans/2026-05-01-llm-gateway-design.md @@ -0,0 +1,1016 @@ +# LLM Gateway 设计文档 + +## 背景与目标 + +### 背景 +企业使用多个 LLM Provider 时面临: +- API 格式不统一(OpenAI、Anthropic、Google 各有差异) +- 多账号/多 API Key 管理复杂 +- 缺乏统一的成本控制和用量追踪 +- Provider 故障时缺乏自动降级机制 + +### 目标 +构建统一的 LLM Gateway,提供: +1. **统一 API 入口**:支持 OpenAI-compatible、OpenAI Responses API、Anthropic Messages API 三种请求格式 +2. **多 Provider 适配**:一期支持 OpenAI、Anthropic、Azure OpenAI、Google Gemini、AWS Bedrock +3. **灵活路由**:模型别名、负载均衡、fallback/retry +4. **成本管控**:Key/Project 两级预算控制、RPM/TPM 限流 +5. **可观测性**:请求日志、usage/cost 统计、审计日志 + +### 非目标(二期) +- Structured output 校验 +- 插件系统 +- 账单结算 +- 组织级 RBAC +- 内容审计 +- Webhook + +--- + +## 架构设计 + +### 整体架构 + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Client Layer │ +│ SDK (OpenAI/Anthropic) │ HTTP Client │ Management API │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ API Gateway │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ FastAPI Application (Python 3.11+) │ │ +│ │ ├── POST /v1/chat/completions (OpenAI-compatible) │ │ +│ │ ├── POST /v1/responses (OpenAI Responses API) │ │ +│ │ ├── POST /v1/messages (Anthropic Messages API) │ │ +│ │ └── Admin API (Provider/Key/Model/Usage) │ │ +│ └─────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Core Services │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ +│ │ Transformer │ │ Router │ │ Load Balancer │ │ +│ │ (格式转换) │ │ (模型路由) │ │ (加权轮询+健康检查) │ │ +│ └──────────────┘ └──────────────┘ └──────────────────────┘ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ +│ │Rate Limiter │ │ Budget │ │ Fallback/Retry │ │ +│ │(RPM/TPM) │ │ Controller │ │ (指数退避+circuit) │ │ +│ └──────────────┘ └──────────────┘ └──────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Provider Adapters │ +│ ┌────────┐ ┌───────────┐ ┌────────────┐ ┌────────┐ ┌────────┐ │ +│ │ OpenAI │ │ Anthropic │ │Azure OpenAI│ │ Gemini │ │Bedrock │ │ +│ └────────┘ └───────────┘ └────────────┘ └────────┘ └────────┘ │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Data Layer │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ SQLite (配置、日志、统计、审计) │ │ +│ └─────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### 请求处理流程 + +``` +Request → Auth → Transform → Rate Limit → Budget Check → Route → +Load Balance → Provider Call → Log → Response + ↑ ↓ + └──────── Fallback/Retry ←──────────────┘ +``` + +--- + +## 模块设计 + +### 1. API Layer + +#### 1.1 统一请求端点 + +| 端点 | 请求格式 | 说明 | +|------|----------|------| +| `POST /v1/chat/completions` | OpenAI Chat Completions | 主入口,兼容所有 OpenAI SDK | +| `POST /v1/responses` | OpenAI Responses API | 新版 OpenAI API 格式 | +| `POST /v1/messages` | Anthropic Messages | 兼容 Anthropic SDK | + +#### 1.2 认证方式 + +```http +Authorization: Bearer +# 或 +X-API-Key: +``` + +Virtual Key 格式:`sk-{prefix}_{random}`(如 `sk-proj_abc123`) + +#### 1.3 Admin API + +``` +# Provider 管理 +GET /admin/providers +POST /admin/providers +PUT /admin/providers/{id} +DELETE /admin/providers/{id} + +# Model Alias 管理 +GET /admin/models +POST /admin/models/aliases +PUT /admin/models/aliases/{id} +DELETE /admin/models/aliases/{id} + +# API Key 管理 +GET /admin/keys +POST /admin/keys +PUT /admin/keys/{id} +DELETE /admin/keys/{id} + +# Project 管理 +GET /admin/projects +POST /admin/projects +PUT /admin/projects/{id} + +# Usage Dashboard +GET /admin/usage/stats +GET /admin/usage/logs +GET /admin/usage/costs + +# Health Check +GET /health +GET /admin/providers/{id}/health +``` + +--- + +### 2. Request Transformer + +负责不同请求格式之间的转换。 + +#### 2.1 OpenAI → Anthropic 转换 + +```python +# OpenAI 请求 +{ + "model": "claude-3-sonnet", + "messages": [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hello"} + ], + "max_tokens": 1024, + "temperature": 0.7 +} + +# 转换为 Anthropic 请求 +{ + "model": "claude-3-sonnet-20240229", + "system": "You are helpful.", + "messages": [ + {"role": "user", "content": "Hello"} + ], + "max_tokens": 1024, + "temperature": 0.7 +} +``` + +#### 2.2 Anthropic → OpenAI 转换 + +```python +# Anthropic 请求 +{ + "model": "claude-3-sonnet-20240229", + "system": "You are helpful.", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 1024 +} + +# 转换为 OpenAI 格式发送给 OpenAI Provider +{ + "model": "gpt-4", + "messages": [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hello"} + ], + "max_tokens": 1024 +} +``` + +#### 2.3 转换矩阵 + +| 入口格式 → 目标 Provider | OpenAI | Anthropic | Azure | Gemini | Bedrock | +|--------------------------|--------|-----------|-------|--------|---------| +| OpenAI Chat | 直接 | 转换 | 直接 | 转换 | 转换 | +| OpenAI Responses | 转换 | 转换 | 转换 | 转换 | 转换 | +| Anthropic Messages | 转换 | 直接 | 转换 | 转换 | 转换 | + +--- + +### 3. Router(模型别名与路由) + +#### 3.1 模型别名配置 + +```yaml +# 示例配置 +model_aliases: + # 简单别名 + "gpt-4": "openai/gpt-4-turbo" + "claude": "anthropic/claude-3-sonnet-20240229" + + # 路由组(负载均衡) + "smart-model": + routing: + - provider: openai + model: gpt-4-turbo + weight: 50 + - provider: anthropic + model: claude-3-sonnet-20240229 + weight: 50 + + # Fallback 链 + "reliable-model": + routing: + primary: + provider: openai + model: gpt-4-turbo + fallback: + - provider: anthropic + model: claude-3-sonnet-20240229 + - provider: azure + model: gpt-4-deployment +``` + +#### 3.2 路由逻辑 + +``` +1. 解析请求中的 model 字段 +2. 查找模型别名配置 +3. 如果是路由组: + a. 按 weight 加权选择 Provider + b. 检查 Provider 健康状态 + c. 返回目标 Provider + Model +4. 如果是 Fallback 链: + a. 优先选择 primary + b. 失败时按顺序尝试 fallback +``` + +--- + +### 4. Load Balancer + +#### 4.1 策略 + +| 策略 | 说明 | 适用场景 | +|------|------|----------| +| 加权轮询 | 按 weight 比例分配 | 默认策略 | +| 最少连接 | 选当前请求数最少的 | 长连接场景 | +| 延迟优先 | 选平均延迟最低的 | 对延迟敏感 | + +#### 4.2 健康检查 + +```python +class HealthChecker: + """ + 定期检查 Provider 可用性 + - 每 30s 检查一次 + - 连续 3 次失败标记为 unhealthy + - 连续 2 次成功恢复为 healthy + """ + check_interval: int = 30 # seconds + failure_threshold: int = 3 + recovery_threshold: int = 2 + + async def check_provider(provider: Provider) -> HealthStatus: + # 发送轻量级请求(如 models 列表) + # 返回 healthy / unhealthy / degraded +``` + +#### 4.3 Circuit Breaker + +```python +class CircuitBreaker: + """ + 熔断器,防止级联故障 + - CLOSED: 正常状态 + - OPEN: 熔断状态,直接拒绝请求 + - HALF_OPEN: 半开状态,允许少量请求探测 + """ + state: CircuitState + failure_count: int + failure_threshold: int = 5 + recovery_timeout: int = 30 # seconds +``` + +--- + +### 5. Rate Limiter + +#### 5.1 限流维度 + +| 维度 | 说明 | 实现 | +|------|------|------| +| RPM (Requests Per Minute) | 请求数限制 | Token Bucket | +| TPM (Tokens Per Minute) | Token 数限制 | Token Bucket | +| 并发数 | 同时进行的请求数 | Semaphore | + +#### 5.2 限流层级 + +``` +1. Global (全局) - 整个 Gateway 的总限制 +2. Provider 级 - 每个 Provider 的限制 +3. Key 级 - 每个 Virtual Key 的限制 +4. Project 级 - 每个 Project 的限制 +``` + +#### 5.3 响应头 + +```http +X-RateLimit-Limit-Requests: 60 +X-RateLimit-Limit-Tokens: 150000 +X-RateLimit-Remaining-Requests: 59 +X-RateLimit-Remaining-Tokens: 149984 +X-RateLimit-Reset-Requests: 1s +X-RateLimit-Reset-Tokens: 6m0s +``` + +#### 5.4 SQLite 实现 + +```python +# 使用 SQLite 的时间戳窗口实现 +CREATE TABLE rate_limit_counters ( + key TEXT PRIMARY KEY, + request_count INTEGER, + token_count INTEGER, + window_start TIMESTAMP, + window_duration INTEGER -- seconds +); +``` + +--- + +### 6. Budget Controller + +#### 6.1 预算层级 + +``` +Organization (二期) + └── Project + └── API Key +``` + +#### 6.2 预算配置 + +```python +class Budget: + key_id: str | None # Key 级预算 + project_id: str | None # Project 级预算 + + # 预算类型 + hard_limit: Decimal # 硬限制,超过直接拒绝 + soft_limit: Decimal # 软限制,触发告警 + + # 周期 + period: BudgetPeriod # daily / weekly / monthly + + # 当前用量 + current_usage: Decimal +``` + +#### 6.3 检查流程 + +``` +1. 解析 Virtual Key +2. 查询 Key 级预算 +3. 查询 Project 级预算 +4. 检查是否超限 +5. 超过 hard_limit 返回 402 Payment Required +6. 超过 soft_limit 记录告警日志 +``` + +--- + +### 7. Fallback & Retry + +#### 7.1 Retry 策略 + +```python +class RetryPolicy: + max_retries: int = 3 + initial_delay: float = 1.0 # seconds + max_delay: float = 30.0 + exponential_base: float = 2.0 + retryable_errors: list[str] = [ + "rate_limit_exceeded", + "timeout", + "service_unavailable", + "internal_error" + ] +``` + +#### 7.2 Fallback 配置 + +```yaml +fallback: + enabled: true + # 按错误类型配置 fallback + on_error: + rate_limit: + - provider: anthropic + model: claude-3-sonnet + timeout: + - provider: azure + model: gpt-4 + service_unavailable: + - provider: google + model: gemini-pro +``` + +--- + +### 8. Provider Adapters + +#### 8.1 Adapter 接口 + +```python +from abc import ABC, abstractmethod +from typing import AsyncIterator + +class ProviderAdapter(ABC): + @abstractmethod + async def chat_completions( + self, + request: ChatCompletionRequest + ) -> ChatCompletionResponse: + """非流式请求""" + pass + + @abstractmethod + async def stream_chat_completions( + self, + request: ChatCompletionRequest + ) -> AsyncIterator[ChatCompletionChunk]: + """流式请求""" + pass + + @abstractmethod + async def count_tokens( + self, + request: ChatCompletionRequest + ) -> int: + """计算 token 数""" + pass + + @abstractmethod + async def check_health(self) -> HealthStatus: + """健康检查""" + pass +``` + +#### 8.2 一期支持的 Provider + +| Provider | API 格式 | 特殊处理 | +|----------|----------|----------| +| OpenAI | Native | 无 | +| Anthropic | Messages API | system 字段提取、content 格式转换 | +| Azure OpenAI | OpenAI-compatible | deployment_name、api_base 配置 | +| Google Gemini | Gemini API | 内容格式转换、safety settings | +| AWS Bedrock | Bedrock Runtime | model_id 格式、AWS 认证 | + +--- + +### 9. 日志与统计 + +#### 9.1 请求日志 + +```python +class RequestLog: + id: UUID + timestamp: datetime + + # 请求信息 + virtual_key_id: UUID + project_id: UUID + provider: str + model: str + model_alias: str | None + + # 请求内容 + request_type: str # chat / completion / embedding + input_tokens: int + output_tokens: int + total_tokens: int + + # 响应信息 + status_code: int + latency_ms: int + finish_reason: str + + # 成本 + cost_usd: Decimal + + # 元数据 + metadata: dict # 可选的业务标签 +``` + +#### 9.2 使用统计 + +```sql +-- 按时间聚合统计 +CREATE TABLE usage_stats ( + id INTEGER PRIMARY KEY, + timestamp TIMESTAMP, + granularity TEXT, -- hour / day / month + virtual_key_id UUID, + project_id UUID, + provider TEXT, + model TEXT, + + request_count INTEGER, + input_tokens BIGINT, + output_tokens BIGINT, + total_tokens BIGINT, + cost_usd DECIMAL(10, 6), + + avg_latency_ms INTEGER, + error_count INTEGER +); +``` + +#### 9.3 审计日志 + +```python +class AuditLog: + id: UUID + timestamp: datetime + actor: str # who + action: str # what + resource: str # which + resource_id: str + changes: dict # before/after + ip_address: str + user_agent: str +``` + +--- + +### 10. 管理后台 + +一期仅实现 Admin API,提供以下功能: + +#### 10.1 Provider 管理 + +```python +class ProviderConfig: + id: UUID + name: str # openai, anthropic, azure, google, bedrock + enabled: bool + + # API 配置 + api_base: str + api_key: str # 加密存储 + api_version: str | None + + # Provider 特定配置 + config: dict # 如 Azure 的 deployment_name + + # 限流配置 + rpm_limit: int | None + tpm_limit: int | None + + # 健康状态 + health_status: str # healthy / unhealthy / degraded + last_check: datetime +``` + +#### 10.2 Model Alias 管理 + +```python +class ModelAlias: + id: UUID + alias: str # 用户调用时的模型名 + provider: str + model: str # Provider 的实际模型名 + enabled: bool + + # 路由配置 + routing_type: str # simple / load_balance / fallback + routing_config: dict + + # 成本配置 + input_price_per_1k: Decimal + output_price_per_1k: Decimal +``` + +#### 10.3 API Key 管理 + +```python +class APIKey: + id: UUID + key: str # sk-{prefix}_{random},哈希存储 + prefix: str # sk-proj_xxx 中的 proj + name: str + project_id: UUID + + enabled: bool + expires_at: datetime | None + + # 限流 + rpm_limit: int | None + tpm_limit: int | None + + # 预算 + budget_limit: Decimal | None + budget_period: str | None # daily / monthly + + # 权限 + allowed_models: list[str] | None # None 表示不限制 + + # 统计 + current_usage: Decimal + total_requests: int +``` + +#### 10.4 Usage Dashboard API + +``` +GET /admin/usage/stats?period=day&group_by=model +Response: +{ + "period": "2024-01-15", + "total_requests": 15234, + "total_tokens": 1234567, + "total_cost_usd": 123.45, + "by_model": [ + {"model": "gpt-4", "requests": 5000, "tokens": 500000, "cost": 50.00}, + {"model": "claude-3-sonnet", "requests": 10234, "tokens": 734567, "cost": 73.45} + ], + "by_provider": [...], + "by_project": [...] +} +``` + +--- + +## 数据模型 + +### SQLite Schema + +```sql +-- Provider 配置 +CREATE TABLE providers ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + enabled BOOLEAN DEFAULT TRUE, + api_base TEXT NOT NULL, + api_key_encrypted TEXT NOT NULL, + api_version TEXT, + config TEXT, -- JSON + rpm_limit INTEGER, + tpm_limit INTEGER, + health_status TEXT DEFAULT 'healthy', + last_health_check TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- 模型别名 +CREATE TABLE model_aliases ( + id TEXT PRIMARY KEY, + alias TEXT NOT NULL UNIQUE, + provider TEXT NOT NULL, + model TEXT NOT NULL, + enabled BOOLEAN DEFAULT TRUE, + routing_type TEXT DEFAULT 'simple', + routing_config TEXT, -- JSON + input_price_per_1k DECIMAL(10, 6), + output_price_per_1k DECIMAL(10, 6), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- 项目 +CREATE TABLE projects ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + description TEXT, + budget_limit DECIMAL(10, 2), + budget_period TEXT, + enabled BOOLEAN DEFAULT TRUE, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- API Key +CREATE TABLE api_keys ( + id TEXT PRIMARY KEY, + key_hash TEXT NOT NULL UNIQUE, + key_prefix TEXT NOT NULL, + name TEXT NOT NULL, + project_id TEXT REFERENCES projects(id), + enabled BOOLEAN DEFAULT TRUE, + expires_at TIMESTAMP, + rpm_limit INTEGER, + tpm_limit INTEGER, + budget_limit DECIMAL(10, 2), + budget_period TEXT, + allowed_models TEXT, -- JSON array + current_usage DECIMAL(10, 2) DEFAULT 0, + total_requests INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- 请求日志 +CREATE TABLE request_logs ( + id TEXT PRIMARY KEY, + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + virtual_key_id TEXT REFERENCES api_keys(id), + project_id TEXT REFERENCES projects(id), + provider TEXT NOT NULL, + model TEXT NOT NULL, + model_alias TEXT, + request_type TEXT, + input_tokens INTEGER, + output_tokens INTEGER, + total_tokens INTEGER, + status_code INTEGER, + latency_ms INTEGER, + finish_reason TEXT, + cost_usd DECIMAL(10, 6), + metadata TEXT -- JSON +); + +-- 使用统计(按小时聚合) +CREATE TABLE usage_stats_hourly ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TIMESTAMP NOT NULL, + virtual_key_id TEXT, + project_id TEXT, + provider TEXT, + model TEXT, + request_count INTEGER DEFAULT 0, + input_tokens BIGINT DEFAULT 0, + output_tokens BIGINT DEFAULT 0, + total_tokens BIGINT DEFAULT 0, + cost_usd DECIMAL(10, 6) DEFAULT 0, + avg_latency_ms INTEGER, + error_count INTEGER DEFAULT 0, + UNIQUE(timestamp, virtual_key_id, provider, model) +); + +-- 审计日志 +CREATE TABLE audit_logs ( + id TEXT PRIMARY KEY, + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + actor TEXT NOT NULL, + action TEXT NOT NULL, + resource TEXT NOT NULL, + resource_id TEXT, + changes TEXT, -- JSON + ip_address TEXT, + user_agent TEXT +); + +-- 限流计数器 +CREATE TABLE rate_limit_counters ( + key TEXT PRIMARY KEY, + request_count INTEGER DEFAULT 0, + token_count INTEGER DEFAULT 0, + window_start TIMESTAMP NOT NULL, + window_duration INTEGER NOT NULL +); + +-- 索引 +CREATE INDEX idx_request_logs_timestamp ON request_logs(timestamp); +CREATE INDEX idx_request_logs_key_id ON request_logs(virtual_key_id); +CREATE INDEX idx_request_logs_project_id ON request_logs(project_id); +CREATE INDEX idx_usage_stats_timestamp ON usage_stats_hourly(timestamp); +CREATE INDEX idx_audit_logs_timestamp ON audit_logs(timestamp); +``` + +--- + +## 错误处理 + +### 错误码规范 + +| HTTP 状态码 | 错误类型 | 说明 | +|-------------|----------|------| +| 400 | invalid_request_error | 请求参数错误 | +| 401 | authentication_error | 认证失败 | +| 403 | permission_error | 权限不足 | +| 402 | budget_exceeded_error | 预算超限 | +| 429 | rate_limit_error | 触发限流 | +| 502 | provider_error | Provider 返回错误 | +| 503 | service_unavailable | 服务不可用 | +| 504 | timeout_error | 请求超时 | + +### 错误响应格式 + +```json +{ + "error": { + "type": "rate_limit_error", + "message": "Rate limit exceeded: 60 requests per minute", + "code": "rate_limit_exceeded", + "details": { + "limit": 60, + "remaining": 0, + "reset_at": "2024-01-15T10:30:00Z" + } + } +} +``` + +--- + +## 测试策略 + +### 单元测试 + +- Transformer 各格式转换 +- Router 路由逻辑 +- Rate Limiter 计数逻辑 +- Budget Controller 检查逻辑 +- Circuit Breaker 状态转换 + +### 集成测试 + +- 端到端请求流程 +- Provider Adapter 与真实 API 交互(Mock 或 Sandbox) +- 限流和预算的端到端验证 + +### 性能测试 + +- 并发请求处理能力 +- 延迟分布(P50/P95/P99) +- SQLite 高并发写入性能 + +--- + +## 技术选型 + +| 组件 | 技术选型 | 理由 | +|------|----------|------| +| 语言 | Python 3.11+ | 开发效率高,生态丰富 | +| Web 框架 | FastAPI | 高性能,原生异步,自动文档 | +| HTTP 客户端 | httpx | 异步支持好 | +| 数据库 | SQLite | 轻量级,零配置,一期首选 | +| ORM | SQLAlchemy 2.0 | 异步支持,成熟稳定 | +| 数据验证 | Pydantic v2 | FastAPI 原生集成 | +| 配置管理 | Pydantic Settings | 类型安全 | +| 日志 | structlog | 结构化日志 | +| 测试 | pytest + pytest-asyncio | 异步测试支持 | + +--- + +## 部署方案 + +### Docker 部署 + +```dockerfile +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8000 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] +``` + +### Docker Compose + +```yaml +version: '3.8' + +services: + gateway: + build: . + ports: + - "8000:8000" + volumes: + - ./data:/app/data + environment: + - DATABASE_URL=sqlite:///data/gateway.db + - MASTER_KEY=${MASTER_KEY} + restart: unless-stopped +``` + +--- + +## 安全考虑 + +### API Key 存储 + +- 使用 bcrypt 哈希存储 +- 明文 Key 仅在创建时显示一次 + +### Provider API Key + +- 使用 AES-256 加密存储 +- 加密密钥通过环境变量注入 + +### 输入验证 + +- 所有输入使用 Pydantic 验证 +- 防止 SQL 注入(ORM 参数化查询) +- 防止请求走私(严格解析) + +### 日志脱敏 + +- 不记录完整请求/响应内容 +- 不记录 API Key 明文 + +--- + +## 性能目标 + +| 指标 | 目标值 | +|------|--------| +| 网关延迟开销 | < 20ms (P95) | +| 吞吐量 | 100+ RPS (单实例) | +| 可用性 | 99.9% (多 Provider 冗余) | + +--- + +## 项目结构 + +``` +llm-gateway/ +├── app/ +│ ├── __init__.py +│ ├── main.py # FastAPI 应用入口 +│ ├── config.py # 配置管理 +│ ├── api/ +│ │ ├── __init__.py +│ │ ├── v1/ +│ │ │ ├── __init__.py +│ │ │ ├── chat.py # /v1/chat/completions +│ │ │ ├── responses.py # /v1/responses +│ │ │ └── messages.py # /v1/messages +│ │ └── admin/ +│ │ ├── __init__.py +│ │ ├── providers.py +│ │ ├── models.py +│ │ ├── keys.py +│ │ ├── projects.py +│ │ └── usage.py +│ ├── core/ +│ │ ├── __init__.py +│ │ ├── transformer.py # 请求格式转换 +│ │ ├── router.py # 模型路由 +│ │ ├── load_balancer.py # 负载均衡 +│ │ ├── rate_limiter.py # 限流 +│ │ ├── budget.py # 预算控制 +│ │ ├── fallback.py # Fallback/Retry +│ │ └── circuit_breaker.py +│ ├── adapters/ +│ │ ├── __init__.py +│ │ ├── base.py # Adapter 基类 +│ │ ├── openai.py +│ │ ├── anthropic.py +│ │ ├── azure.py +│ │ ├── gemini.py +│ │ └── bedrock.py +│ ├── models/ +│ │ ├── __init__.py +│ │ ├── provider.py +│ │ ├── api_key.py +│ │ ├── project.py +│ │ ├── model_alias.py +│ │ └── usage.py +│ ├── db/ +│ │ ├── __init__.py +│ │ ├── database.py # 数据库连接 +│ │ └── migrations/ # 数据库迁移 +│ └── utils/ +│ ├── __init__.py +│ ├── crypto.py # 加密工具 +│ └── logging.py # 日志配置 +├── tests/ +│ ├── __init__.py +│ ├── conftest.py +│ ├── unit/ +│ └── integration/ +├── requirements.txt +├── Dockerfile +├── docker-compose.yml +└── README.md +``` + +--- + +## 实施计划 + +详见 `docs/plans/2026-05-01-llm-gateway-plan.md` diff --git a/memory/2026-05-01.md b/memory/2026-05-01.md index 1724328..0effd40 100644 --- a/memory/2026-05-01.md +++ b/memory/2026-05-01.md @@ -3,3 +3,26 @@ ## 纪要 - 工作区已初始化,可在此持续记录当天上下文。 + +## 决策 + +### LLM Gateway 技术选型 + +- **结论**:采用 Python (FastAPI) + SQLite 技术栈实现 LLM Gateway +- **理由**: + 1. Python 开发效率高,一期功能多,需快速落地 + 2. SQLite 零配置,轻量级,适合一期验证 + 3. FastAPI 原生异步,自动文档,生态成熟 +- **影响**:后续可平滑迁移到 PostgreSQL + Redis 方案 +- **替代方案**:Go(高性能但开发成本高)、Rust(极致性能但开发周期长) + +### LLM Gateway 架构设计 + +- **结论**:采用统一 API 入口 + 多 Provider Adapter 架构 +- **理由**: + 1. 支持 OpenAI-compatible、OpenAI Responses API、Anthropic Messages API 三种格式 + 2. 通过 Transformer 层实现格式互转 + 3. Router 层实现模型别名和路由 + 4. Load Balancer + Circuit Breaker 实现高可用 +- **一期 Provider**:OpenAI、Anthropic、Azure OpenAI、Google Gemini、AWS Bedrock +- **二期扩展**:Structured output、插件系统、账单结算、组织级 RBAC