Implement a unified LLM Gateway supporting multiple API formats and providers: Features: - OpenAI Chat Completions, Responses API, and Anthropic Messages API - Provider adapters for OpenAI, Anthropic, Azure OpenAI, Google Gemini, AWS Bedrock - Model aliasing with weighted round-robin load balancing - Virtual API keys with RPM/TPM rate limiting - Budget control at key and project levels - Request logging, usage statistics, and audit logs - Fallback/retry with circuit breaker pattern - Admin CRUD APIs for providers, projects, keys, models, usage - Provider health checks Tech stack: - FastAPI with async SQLAlchemy 2.0 - SQLite with aiosqlite - bcrypt for API key hashing, AES-256 for provider key encryption - Docker containerization Tests: 18 passing integration tests for admin API endpoints Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
101 lines
2.7 KiB
Python
101 lines
2.7 KiB
Python
"""OpenAI API request and response schemas."""
|
|
from typing import Any, Literal
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
# === Request Models ===
|
|
|
|
|
|
class OpenAIChatMessage(BaseModel):
|
|
"""OpenAI chat message."""
|
|
|
|
role: Literal["system", "user", "assistant", "tool", "function"]
|
|
content: str | list[dict[str, Any]] | None = None
|
|
name: str | None = None
|
|
tool_calls: list[dict[str, Any]] | None = None
|
|
tool_call_id: str | None = None
|
|
|
|
|
|
class OpenAIChatCompletionRequest(BaseModel):
|
|
"""OpenAI Chat Completions API request."""
|
|
|
|
model: str
|
|
messages: list[OpenAIChatMessage]
|
|
temperature: float | None = Field(None, ge=0, le=2)
|
|
top_p: float | None = Field(None, ge=0, le=1)
|
|
n: int | None = Field(None, ge=1)
|
|
stream: bool = False
|
|
stop: str | list[str] | None = None
|
|
max_tokens: int | None = Field(None, ge=1)
|
|
presence_penalty: float | None = Field(None, ge=-2, le=2)
|
|
frequency_penalty: float | None = Field(None, ge=-2, le=2)
|
|
logit_bias: dict[str, float] | None = None
|
|
user: str | None = None
|
|
tools: list[dict[str, Any]] | None = None
|
|
tool_choice: str | dict[str, Any] | None = None
|
|
response_format: dict[str, Any] | None = None
|
|
|
|
|
|
class OpenAIResponseRequest(BaseModel):
|
|
"""OpenAI Responses API request (new format)."""
|
|
|
|
model: str
|
|
input: str | list[dict[str, Any]]
|
|
instructions: str | None = None
|
|
temperature: float | None = Field(None, ge=0, le=2)
|
|
max_output_tokens: int | None = Field(None, ge=1)
|
|
tools: list[dict[str, Any]] | None = None
|
|
tool_choice: str | dict[str, Any] | None = None
|
|
metadata: dict[str, Any] | None = None
|
|
|
|
|
|
# === Response Models ===
|
|
|
|
|
|
class OpenAIChatCompletionChoice(BaseModel):
|
|
"""OpenAI chat completion choice."""
|
|
|
|
index: int
|
|
message: OpenAIChatMessage
|
|
finish_reason: str | None
|
|
|
|
|
|
class OpenAIUsage(BaseModel):
|
|
"""Token usage information."""
|
|
|
|
prompt_tokens: int
|
|
completion_tokens: int
|
|
total_tokens: int
|
|
|
|
|
|
class OpenAIChatCompletionResponse(BaseModel):
|
|
"""OpenAI Chat Completions API response."""
|
|
|
|
id: str
|
|
object: Literal["chat.completion"] = "chat.completion"
|
|
created: int
|
|
model: str
|
|
choices: list[OpenAIChatCompletionChoice]
|
|
usage: OpenAIUsage | None = None
|
|
system_fingerprint: str | None = None
|
|
|
|
|
|
class OpenAIStreamChoice(BaseModel):
|
|
"""OpenAI streaming choice."""
|
|
|
|
index: int
|
|
delta: dict[str, Any]
|
|
finish_reason: str | None
|
|
|
|
|
|
class OpenAIChatCompletionChunk(BaseModel):
|
|
"""OpenAI Chat Completions streaming chunk."""
|
|
|
|
id: str
|
|
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
|
created: int
|
|
model: str
|
|
choices: list[OpenAIStreamChoice]
|
|
system_fingerprint: str | None = None
|