root 8348520bdf feat: implement LLM Gateway with multi-provider support
Implement a unified LLM Gateway supporting multiple API formats and providers:

Features:
- OpenAI Chat Completions, Responses API, and Anthropic Messages API
- Provider adapters for OpenAI, Anthropic, Azure OpenAI, Google Gemini, AWS Bedrock
- Model aliasing with weighted round-robin load balancing
- Virtual API keys with RPM/TPM rate limiting
- Budget control at key and project levels
- Request logging, usage statistics, and audit logs
- Fallback/retry with circuit breaker pattern
- Admin CRUD APIs for providers, projects, keys, models, usage
- Provider health checks

Tech stack:
- FastAPI with async SQLAlchemy 2.0
- SQLite with aiosqlite
- bcrypt for API key hashing, AES-256 for provider key encryption
- Docker containerization

Tests: 18 passing integration tests for admin API endpoints

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-01 15:39:21 +08:00

101 lines
2.7 KiB
Python

"""OpenAI API request and response schemas."""
from typing import Any, Literal
from pydantic import BaseModel, Field
# === Request Models ===
class OpenAIChatMessage(BaseModel):
"""OpenAI chat message."""
role: Literal["system", "user", "assistant", "tool", "function"]
content: str | list[dict[str, Any]] | None = None
name: str | None = None
tool_calls: list[dict[str, Any]] | None = None
tool_call_id: str | None = None
class OpenAIChatCompletionRequest(BaseModel):
"""OpenAI Chat Completions API request."""
model: str
messages: list[OpenAIChatMessage]
temperature: float | None = Field(None, ge=0, le=2)
top_p: float | None = Field(None, ge=0, le=1)
n: int | None = Field(None, ge=1)
stream: bool = False
stop: str | list[str] | None = None
max_tokens: int | None = Field(None, ge=1)
presence_penalty: float | None = Field(None, ge=-2, le=2)
frequency_penalty: float | None = Field(None, ge=-2, le=2)
logit_bias: dict[str, float] | None = None
user: str | None = None
tools: list[dict[str, Any]] | None = None
tool_choice: str | dict[str, Any] | None = None
response_format: dict[str, Any] | None = None
class OpenAIResponseRequest(BaseModel):
"""OpenAI Responses API request (new format)."""
model: str
input: str | list[dict[str, Any]]
instructions: str | None = None
temperature: float | None = Field(None, ge=0, le=2)
max_output_tokens: int | None = Field(None, ge=1)
tools: list[dict[str, Any]] | None = None
tool_choice: str | dict[str, Any] | None = None
metadata: dict[str, Any] | None = None
# === Response Models ===
class OpenAIChatCompletionChoice(BaseModel):
"""OpenAI chat completion choice."""
index: int
message: OpenAIChatMessage
finish_reason: str | None
class OpenAIUsage(BaseModel):
"""Token usage information."""
prompt_tokens: int
completion_tokens: int
total_tokens: int
class OpenAIChatCompletionResponse(BaseModel):
"""OpenAI Chat Completions API response."""
id: str
object: Literal["chat.completion"] = "chat.completion"
created: int
model: str
choices: list[OpenAIChatCompletionChoice]
usage: OpenAIUsage | None = None
system_fingerprint: str | None = None
class OpenAIStreamChoice(BaseModel):
"""OpenAI streaming choice."""
index: int
delta: dict[str, Any]
finish_reason: str | None
class OpenAIChatCompletionChunk(BaseModel):
"""OpenAI Chat Completions streaming chunk."""
id: str
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
created: int
model: str
choices: list[OpenAIStreamChoice]
system_fingerprint: str | None = None