Update project
This commit is contained in:
263
backend/app/services/capital_image_service.py
Normal file
263
backend/app/services/capital_image_service.py
Normal file
@ -0,0 +1,263 @@
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
from fastapi import HTTPException, UploadFile
|
||||
|
||||
from app.core.config import BASE_DIR, CAPITAL_IMAGE_DB_FILE, CAPITAL_IMAGE_UPLOADS_DIR
|
||||
from app.repositories.monitoring_repository import MonitoringRepository
|
||||
from app.repositories.capital_image_repository import CapitalImageRepository
|
||||
|
||||
|
||||
def _extract_json_block(content: str) -> dict:
|
||||
fenced_match = re.search(r"```json\s*(\{.*?\})\s*```", content, flags=re.DOTALL)
|
||||
if fenced_match:
|
||||
return json.loads(fenced_match.group(1))
|
||||
|
||||
object_match = re.search(r"(\{.*\})", content, flags=re.DOTALL)
|
||||
if object_match:
|
||||
return json.loads(object_match.group(1))
|
||||
|
||||
raise ValueError("No JSON object found in model output")
|
||||
|
||||
|
||||
class CapitalImageService:
|
||||
def __init__(self) -> None:
|
||||
self.repository = CapitalImageRepository(CAPITAL_IMAGE_DB_FILE)
|
||||
self.monitoring_repository = MonitoringRepository()
|
||||
|
||||
def list_records(self, trade_date: str | None = None, subject: str | None = None) -> dict:
|
||||
items = [
|
||||
self._serialize_record(record)
|
||||
for record in self.repository.list_records(trade_date=trade_date, subject=subject)
|
||||
]
|
||||
return {"items": items, "total": len(items)}
|
||||
|
||||
def get_record(self, record_id: str) -> dict:
|
||||
record = self.repository.get_record(record_id)
|
||||
if record is None:
|
||||
raise HTTPException(status_code=404, detail="Record not found")
|
||||
return self._serialize_record(record)
|
||||
|
||||
async def create_record(
|
||||
self,
|
||||
upload_file: UploadFile,
|
||||
trade_date: str | None = None,
|
||||
subject: str | None = None,
|
||||
) -> dict:
|
||||
suffix = Path(upload_file.filename or "upload.jpg").suffix or ".jpg"
|
||||
record_id = uuid4().hex
|
||||
image_name = upload_file.filename or f"{record_id}{suffix}"
|
||||
stored_path = CAPITAL_IMAGE_UPLOADS_DIR / f"{record_id}{suffix.lower()}"
|
||||
|
||||
binary = await upload_file.read()
|
||||
stored_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
stored_path.write_bytes(binary)
|
||||
|
||||
extraction = self._extract_from_image(
|
||||
image_bytes=binary,
|
||||
original_filename=image_name,
|
||||
stored_path=stored_path,
|
||||
trade_date=trade_date,
|
||||
subject=subject,
|
||||
)
|
||||
|
||||
now = datetime.now().isoformat(timespec="seconds")
|
||||
payload = {
|
||||
"id": record_id,
|
||||
"trade_date": extraction.get("trade_date") or trade_date,
|
||||
"subject": extraction.get("subject") or subject,
|
||||
"snapshot_time": extraction.get("snapshot_time"),
|
||||
"main_force_amount_yi": extraction.get("main_force_amount_yi"),
|
||||
"institution_amount_yi": extraction.get("institution_amount_yi"),
|
||||
"large_household_amount_yi": extraction.get("large_household_amount_yi"),
|
||||
"retail_amount_yi": extraction.get("retail_amount_yi"),
|
||||
"overall_trend": extraction.get("overall_trend"),
|
||||
"intraday_summary": extraction.get("intraday_summary"),
|
||||
"review_status": extraction.get("review_status", "pending_review"),
|
||||
"extraction_method": extraction.get("extraction_method", "fallback"),
|
||||
"image_name": image_name,
|
||||
"image_path": str(stored_path),
|
||||
"raw_extraction": extraction,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
record = self.repository.insert_record(payload)
|
||||
return {"item": self._serialize_record(record)}
|
||||
|
||||
def _extract_from_image(
|
||||
self,
|
||||
image_bytes: bytes,
|
||||
original_filename: str,
|
||||
stored_path: Path,
|
||||
trade_date: str | None,
|
||||
subject: str | None,
|
||||
) -> dict:
|
||||
llm_config = self._get_llm_config()
|
||||
if llm_config["api_key"]:
|
||||
try:
|
||||
return self._extract_via_model(
|
||||
image_bytes=image_bytes,
|
||||
trade_date=trade_date,
|
||||
subject=subject,
|
||||
llm_config=llm_config,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover
|
||||
return {
|
||||
**self._build_fallback_payload(original_filename, trade_date, subject),
|
||||
"review_status": "pending_review",
|
||||
"extraction_method": "fallback_after_model_error",
|
||||
"model_error": str(exc),
|
||||
}
|
||||
|
||||
sidecar_payload = self._load_sidecar_payload(original_filename)
|
||||
if sidecar_payload is not None:
|
||||
return sidecar_payload
|
||||
|
||||
return self._build_fallback_payload(original_filename, trade_date, subject)
|
||||
|
||||
def _extract_via_model(
|
||||
self,
|
||||
image_bytes: bytes,
|
||||
trade_date: str | None,
|
||||
subject: str | None,
|
||||
llm_config: dict,
|
||||
) -> dict:
|
||||
api_key = llm_config["api_key"]
|
||||
base_url = llm_config["base_url"].rstrip("/")
|
||||
model = llm_config["model"]
|
||||
encoded_image = base64.b64encode(image_bytes).decode("utf-8")
|
||||
prompt = """
|
||||
You are extracting structured data from a Chinese stock capital flow screenshot.
|
||||
Return only JSON with these keys:
|
||||
trade_date, subject, snapshot_time, main_force_amount_yi, institution_amount_yi,
|
||||
large_household_amount_yi, retail_amount_yi, overall_trend, intraday_summary,
|
||||
review_status, extraction_method.
|
||||
|
||||
Rules:
|
||||
1. intraday_summary must describe only the intraday capital-flow trend, not repeat raw numbers.
|
||||
2. overall_trend should be a short phrase like "震荡上行", "冲高回落", "弱势下探", "午后修复".
|
||||
3. If a number is not clearly visible, set it to null.
|
||||
4. review_status should be "extracted".
|
||||
5. extraction_method should be "vision_model".
|
||||
6. If trade_date is absent in the image, keep null.
|
||||
"""
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You extract structured JSON from Chinese capital-flow screenshots."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{encoded_image}",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
request = urllib.request.Request(
|
||||
url=f"{base_url}/chat/completions",
|
||||
data=json.dumps(payload).encode("utf-8"),
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=180) as response:
|
||||
response_payload = json.loads(response.read().decode("utf-8"))
|
||||
except urllib.error.HTTPError as exc: # pragma: no cover
|
||||
error_text = exc.read().decode("utf-8", errors="ignore")
|
||||
raise RuntimeError(f"Model request failed: {error_text}") from exc
|
||||
|
||||
choices = response_payload.get("choices", [])
|
||||
content = ""
|
||||
if choices:
|
||||
content = choices[0].get("message", {}).get("content", "")
|
||||
|
||||
parsed = _extract_json_block(content)
|
||||
if subject and not parsed.get("subject"):
|
||||
parsed["subject"] = subject
|
||||
if trade_date and not parsed.get("trade_date"):
|
||||
parsed["trade_date"] = trade_date
|
||||
return parsed
|
||||
|
||||
def _get_llm_config(self) -> dict:
|
||||
config = self.monitoring_repository.get_system_config()
|
||||
return {
|
||||
"provider": config.get("llm_provider", "openai_compatible"),
|
||||
"api_key": config.get("llm_api_key", ""),
|
||||
"base_url": config.get("llm_base_url", "https://api.openai.com/v1"),
|
||||
"model": config.get("llm_vision_model", "gpt-4.1-mini"),
|
||||
}
|
||||
|
||||
def _load_sidecar_payload(self, original_filename: str) -> dict | None:
|
||||
candidate_paths = [
|
||||
BASE_DIR.parent / "zijin" / f"{Path(original_filename).stem}.json",
|
||||
BASE_DIR / "data" / "capital_images" / f"{Path(original_filename).stem}.json",
|
||||
]
|
||||
for candidate in candidate_paths:
|
||||
if candidate.exists():
|
||||
payload = json.loads(candidate.read_text(encoding="utf-8"))
|
||||
capital_flow = payload.get("capital_flow_amounts", {})
|
||||
overall_trend = payload.get("overall_trend", {})
|
||||
intraday_summary = overall_trend.get("summary") or payload.get("llm_summary")
|
||||
return {
|
||||
"trade_date": payload.get("date"),
|
||||
"subject": payload.get("subject"),
|
||||
"snapshot_time": payload.get("snapshot_time"),
|
||||
"main_force_amount_yi": capital_flow.get("main_force_yi"),
|
||||
"institution_amount_yi": capital_flow.get("institution_yi"),
|
||||
"large_household_amount_yi": capital_flow.get("large_household_yi"),
|
||||
"retail_amount_yi": capital_flow.get("retail_yi"),
|
||||
"overall_trend": overall_trend.get("direction"),
|
||||
"intraday_summary": intraday_summary,
|
||||
"review_status": "sidecar_loaded",
|
||||
"extraction_method": "sidecar_json",
|
||||
"sidecar_path": str(candidate),
|
||||
}
|
||||
return None
|
||||
|
||||
def _build_fallback_payload(
|
||||
self,
|
||||
original_filename: str,
|
||||
trade_date: str | None,
|
||||
subject: str | None,
|
||||
) -> dict:
|
||||
return {
|
||||
"trade_date": trade_date,
|
||||
"subject": subject,
|
||||
"snapshot_time": None,
|
||||
"main_force_amount_yi": None,
|
||||
"institution_amount_yi": None,
|
||||
"large_household_amount_yi": None,
|
||||
"retail_amount_yi": None,
|
||||
"overall_trend": "待识别",
|
||||
"intraday_summary": "当前未配置视觉模型,图片已保存,待接入大模型后补充日内资金走势总结。",
|
||||
"review_status": "pending_review",
|
||||
"extraction_method": "storage_only",
|
||||
"fallback_reason": f"No vision model configured for {original_filename}",
|
||||
}
|
||||
|
||||
def _serialize_record(self, record: dict) -> dict:
|
||||
return {
|
||||
**record,
|
||||
"image_url": f"/capital-images/uploads/{Path(record['image_path']).name}",
|
||||
}
|
||||
|
||||
|
||||
capital_image_service = CapitalImageService()
|
||||
Reference in New Issue
Block a user