root 656f596d7e feat: implement AI legal assistant system MVP
Core modules:
- Laws: CRUD, search, AI-powered QA
- Analysis: legal research and case management
- Contracts: lifecycle management with templates
- Signatures: electronic signature workflow

Infrastructure:
- FastAPI + SQLite + async SQLAlchemy
- Docker deployment support
- 54 unit tests passing

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-01 03:34:44 +08:00

108 lines
3.3 KiB
Python

"""Vector service for embedding and similarity search."""
import numpy as np
from typing import List, Dict, Any, Optional
import httpx
from app.core.config import settings
class VectorService:
"""Service for vector embeddings and similarity search."""
def __init__(self):
self.api_base = settings.EMBEDDING_API_BASE
self.api_key = settings.EMBEDDING_API_KEY or settings.LLM_API_KEY
self.model = settings.EMBEDDING_MODEL
self.dimension = settings.EMBEDDING_DIMENSION
async def get_embedding(self, text: str) -> List[float]:
"""Get embedding for a text using external API."""
if not self.api_key:
# Return mock embedding for testing
return [0.0] * self.dimension
async with httpx.AsyncClient() as client:
response = await client.post(
f"{self.api_base}/embeddings",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json={
"model": self.model,
"input": text,
},
timeout=30.0,
)
response.raise_for_status()
data = response.json()
return data["data"][0]["embedding"]
async def get_embeddings(self, texts: List[str]) -> List[List[float]]:
"""Get embeddings for multiple texts."""
if not self.api_key:
return [[0.0] * self.dimension for _ in texts]
async with httpx.AsyncClient() as client:
response = await client.post(
f"{self.api_base}/embeddings",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json={
"model": self.model,
"input": texts,
},
timeout=60.0,
)
response.raise_for_status()
data = response.json()
return [item["embedding"] for item in data["data"]]
def cosine_similarity(
self,
vec1: List[float],
vec2: List[float]
) -> float:
"""Calculate cosine similarity between two vectors."""
arr1 = np.array(vec1)
arr2 = np.array(vec2)
dot_product = np.dot(arr1, arr2)
norm1 = np.linalg.norm(arr1)
norm2 = np.linalg.norm(arr2)
if norm1 == 0 or norm2 == 0:
return 0.0
return float(dot_product / (norm1 * norm2))
def search_similar(
self,
query_embedding: List[float],
stored_vectors: List[Dict[str, Any]],
top_k: int = 5
) -> List[Dict[str, Any]]:
"""Search for similar vectors."""
results = []
for item in stored_vectors:
similarity = self.cosine_similarity(
query_embedding,
item["embedding"]
)
results.append({
"id": item["id"],
"similarity": similarity,
})
# Sort by similarity descending
results.sort(key=lambda x: x["similarity"], reverse=True)
return results[:top_k]
# Singleton instance
vector_service = VectorService()