Update project

This commit is contained in:
wanghep
2026-04-08 20:04:40 +08:00
parent 2eab960303
commit 862235ea89
130 changed files with 60206 additions and 231 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,263 @@
import base64
import json
import re
import urllib.error
import urllib.request
from datetime import datetime
from pathlib import Path
from uuid import uuid4
from fastapi import HTTPException, UploadFile
from app.core.config import BASE_DIR, CAPITAL_IMAGE_DB_FILE, CAPITAL_IMAGE_UPLOADS_DIR
from app.repositories.monitoring_repository import MonitoringRepository
from app.repositories.capital_image_repository import CapitalImageRepository
def _extract_json_block(content: str) -> dict:
fenced_match = re.search(r"```json\s*(\{.*?\})\s*```", content, flags=re.DOTALL)
if fenced_match:
return json.loads(fenced_match.group(1))
object_match = re.search(r"(\{.*\})", content, flags=re.DOTALL)
if object_match:
return json.loads(object_match.group(1))
raise ValueError("No JSON object found in model output")
class CapitalImageService:
def __init__(self) -> None:
self.repository = CapitalImageRepository(CAPITAL_IMAGE_DB_FILE)
self.monitoring_repository = MonitoringRepository()
def list_records(self, trade_date: str | None = None, subject: str | None = None) -> dict:
items = [
self._serialize_record(record)
for record in self.repository.list_records(trade_date=trade_date, subject=subject)
]
return {"items": items, "total": len(items)}
def get_record(self, record_id: str) -> dict:
record = self.repository.get_record(record_id)
if record is None:
raise HTTPException(status_code=404, detail="Record not found")
return self._serialize_record(record)
async def create_record(
self,
upload_file: UploadFile,
trade_date: str | None = None,
subject: str | None = None,
) -> dict:
suffix = Path(upload_file.filename or "upload.jpg").suffix or ".jpg"
record_id = uuid4().hex
image_name = upload_file.filename or f"{record_id}{suffix}"
stored_path = CAPITAL_IMAGE_UPLOADS_DIR / f"{record_id}{suffix.lower()}"
binary = await upload_file.read()
stored_path.parent.mkdir(parents=True, exist_ok=True)
stored_path.write_bytes(binary)
extraction = self._extract_from_image(
image_bytes=binary,
original_filename=image_name,
stored_path=stored_path,
trade_date=trade_date,
subject=subject,
)
now = datetime.now().isoformat(timespec="seconds")
payload = {
"id": record_id,
"trade_date": extraction.get("trade_date") or trade_date,
"subject": extraction.get("subject") or subject,
"snapshot_time": extraction.get("snapshot_time"),
"main_force_amount_yi": extraction.get("main_force_amount_yi"),
"institution_amount_yi": extraction.get("institution_amount_yi"),
"large_household_amount_yi": extraction.get("large_household_amount_yi"),
"retail_amount_yi": extraction.get("retail_amount_yi"),
"overall_trend": extraction.get("overall_trend"),
"intraday_summary": extraction.get("intraday_summary"),
"review_status": extraction.get("review_status", "pending_review"),
"extraction_method": extraction.get("extraction_method", "fallback"),
"image_name": image_name,
"image_path": str(stored_path),
"raw_extraction": extraction,
"created_at": now,
"updated_at": now,
}
record = self.repository.insert_record(payload)
return {"item": self._serialize_record(record)}
def _extract_from_image(
self,
image_bytes: bytes,
original_filename: str,
stored_path: Path,
trade_date: str | None,
subject: str | None,
) -> dict:
llm_config = self._get_llm_config()
if llm_config["api_key"]:
try:
return self._extract_via_model(
image_bytes=image_bytes,
trade_date=trade_date,
subject=subject,
llm_config=llm_config,
)
except Exception as exc: # pragma: no cover
return {
**self._build_fallback_payload(original_filename, trade_date, subject),
"review_status": "pending_review",
"extraction_method": "fallback_after_model_error",
"model_error": str(exc),
}
sidecar_payload = self._load_sidecar_payload(original_filename)
if sidecar_payload is not None:
return sidecar_payload
return self._build_fallback_payload(original_filename, trade_date, subject)
def _extract_via_model(
self,
image_bytes: bytes,
trade_date: str | None,
subject: str | None,
llm_config: dict,
) -> dict:
api_key = llm_config["api_key"]
base_url = llm_config["base_url"].rstrip("/")
model = llm_config["model"]
encoded_image = base64.b64encode(image_bytes).decode("utf-8")
prompt = """
You are extracting structured data from a Chinese stock capital flow screenshot.
Return only JSON with these keys:
trade_date, subject, snapshot_time, main_force_amount_yi, institution_amount_yi,
large_household_amount_yi, retail_amount_yi, overall_trend, intraday_summary,
review_status, extraction_method.
Rules:
1. intraday_summary must describe only the intraday capital-flow trend, not repeat raw numbers.
2. overall_trend should be a short phrase like "震荡上行", "冲高回落", "弱势下探", "午后修复".
3. If a number is not clearly visible, set it to null.
4. review_status should be "extracted".
5. extraction_method should be "vision_model".
6. If trade_date is absent in the image, keep null.
"""
payload = {
"model": model,
"messages": [
{
"role": "system",
"content": "You extract structured JSON from Chinese capital-flow screenshots."
},
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{encoded_image}",
},
},
],
}
],
}
request = urllib.request.Request(
url=f"{base_url}/chat/completions",
data=json.dumps(payload).encode("utf-8"),
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
method="POST",
)
try:
with urllib.request.urlopen(request, timeout=180) as response:
response_payload = json.loads(response.read().decode("utf-8"))
except urllib.error.HTTPError as exc: # pragma: no cover
error_text = exc.read().decode("utf-8", errors="ignore")
raise RuntimeError(f"Model request failed: {error_text}") from exc
choices = response_payload.get("choices", [])
content = ""
if choices:
content = choices[0].get("message", {}).get("content", "")
parsed = _extract_json_block(content)
if subject and not parsed.get("subject"):
parsed["subject"] = subject
if trade_date and not parsed.get("trade_date"):
parsed["trade_date"] = trade_date
return parsed
def _get_llm_config(self) -> dict:
config = self.monitoring_repository.get_system_config()
return {
"provider": config.get("llm_provider", "openai_compatible"),
"api_key": config.get("llm_api_key", ""),
"base_url": config.get("llm_base_url", "https://api.openai.com/v1"),
"model": config.get("llm_vision_model", "gpt-4.1-mini"),
}
def _load_sidecar_payload(self, original_filename: str) -> dict | None:
candidate_paths = [
BASE_DIR.parent / "zijin" / f"{Path(original_filename).stem}.json",
BASE_DIR / "data" / "capital_images" / f"{Path(original_filename).stem}.json",
]
for candidate in candidate_paths:
if candidate.exists():
payload = json.loads(candidate.read_text(encoding="utf-8"))
capital_flow = payload.get("capital_flow_amounts", {})
overall_trend = payload.get("overall_trend", {})
intraday_summary = overall_trend.get("summary") or payload.get("llm_summary")
return {
"trade_date": payload.get("date"),
"subject": payload.get("subject"),
"snapshot_time": payload.get("snapshot_time"),
"main_force_amount_yi": capital_flow.get("main_force_yi"),
"institution_amount_yi": capital_flow.get("institution_yi"),
"large_household_amount_yi": capital_flow.get("large_household_yi"),
"retail_amount_yi": capital_flow.get("retail_yi"),
"overall_trend": overall_trend.get("direction"),
"intraday_summary": intraday_summary,
"review_status": "sidecar_loaded",
"extraction_method": "sidecar_json",
"sidecar_path": str(candidate),
}
return None
def _build_fallback_payload(
self,
original_filename: str,
trade_date: str | None,
subject: str | None,
) -> dict:
return {
"trade_date": trade_date,
"subject": subject,
"snapshot_time": None,
"main_force_amount_yi": None,
"institution_amount_yi": None,
"large_household_amount_yi": None,
"retail_amount_yi": None,
"overall_trend": "待识别",
"intraday_summary": "当前未配置视觉模型,图片已保存,待接入大模型后补充日内资金走势总结。",
"review_status": "pending_review",
"extraction_method": "storage_only",
"fallback_reason": f"No vision model configured for {original_filename}",
}
def _serialize_record(self, record: dict) -> dict:
return {
**record,
"image_url": f"/capital-images/uploads/{Path(record['image_path']).name}",
}
capital_image_service = CapitalImageService()

View File

@ -0,0 +1,390 @@
from __future__ import annotations
from datetime import datetime
from uuid import uuid4
from zoneinfo import ZoneInfo
from app.clients.ths_etf_client import ThsEtfClient
from app.repositories.monitoring_repository import MonitoringRepository
from app.services.email_notification_service import email_notification_service
ETF_GROUPS = {
"broad": [
{"code": "510050", "label": "上证50ETF", "market": "17"},
{"code": "510300", "label": "沪深300ETF", "market": "17"},
{"code": "510500", "label": "中证500ETF", "market": "17"},
{"code": "588000", "label": "科创50ETF", "market": "17"},
{"code": "159845", "label": "中证1000ETF", "market": "33"},
{"code": "159532", "label": "中证2000ETF", "market": "33"},
],
"sector": [
{"code": "512880", "label": "证券ETF", "market": "17"},
{"code": "512800", "label": "银行ETF", "market": "17"},
{"code": "159819", "label": "人工智能ETF", "market": "33"},
{"code": "513180", "label": "恒生科技ETF", "market": "17"},
{"code": "512480", "label": "半导体ETF", "market": "17"},
],
}
class EtfMonitorService:
def __init__(self) -> None:
self.client = ThsEtfClient()
self.repository = MonitoringRepository()
self.tz = ZoneInfo("Asia/Shanghai")
def _now(self) -> datetime:
return datetime.now(self.tz)
def _today(self) -> str:
return self._now().date().isoformat()
@staticmethod
def _safe_float(value: str | float | int | None) -> float | None:
if value in (None, "", "-"):
return None
return float(value)
@staticmethod
def _safe_int(value: str | float | int | None) -> int | None:
if value in (None, "", "-"):
return None
return int(float(value))
@staticmethod
def _detail_url(code: str) -> str:
return f"https://fund.10jqka.com.cn/{code}/"
@staticmethod
def _source_url(code: str) -> str:
return f"https://basic.10jqka.com.cn/{code}/"
def _normalize_turnover(self, value: str | float | int | None) -> float | None:
parsed = self._safe_float(value)
if parsed is None:
return None
return round(parsed / 100000000, 4)
def _parse_intraday_points(self, raw: dict) -> list[dict]:
raw_data = raw.get("data") or ""
if not raw_data:
return []
points: list[dict] = []
trade_date = raw.get("date")
for item in raw_data.split(";"):
parts = item.split(",")
if len(parts) < 5:
continue
hhmm = parts[0]
points.append(
{
"timestamp": f"{trade_date[:4]}-{trade_date[4:6]}-{trade_date[6:8]}T{hhmm[:2]}:{hhmm[2:]}:00+08:00",
"price": self._safe_float(parts[1]),
"volume": self._safe_int(parts[2]),
"avg_price": self._safe_float(parts[3]),
"turnover_amount": self._safe_int(parts[4]),
}
)
return points
@staticmethod
def _compute_change(points: list[dict], minutes: int) -> float | None:
if len(points) <= minutes:
return None
latest = points[-1].get("price")
previous = points[-1 - minutes].get("price")
if latest in (None, 0) or previous in (None, 0):
return None
return round((float(latest) / float(previous) - 1) * 100, 4)
def _build_record(self, definition: dict) -> tuple[dict, dict]:
code = definition["code"]
market = definition["market"]
profile_payload = self.client.fetch_profile(code)
quote_payload = self.client.fetch_today_quote(market, code)
intraday_payload = self.client.fetch_intraday_time(market, code)
profile = profile_payload.get("data") or {}
points = self._parse_intraday_points(intraday_payload)
latest_point = points[-1] if points else {}
previous_close = self._safe_float(intraday_payload.get("pre")) or self._safe_float(profile.get("net"))
latest_price = self._safe_float(quote_payload.get("11")) or latest_point.get("price")
if latest_price is None:
latest_price = previous_close
change_percent = None
if latest_price not in (None, 0) and previous_close not in (None, 0):
change_percent = round((float(latest_price) / float(previous_close) - 1) * 100, 4)
updated_at = self._now().isoformat(timespec="seconds")
snapshot_time = None
if points:
snapshot_time = points[-1]["timestamp"]
elif quote_payload.get("dt"):
dt = str(quote_payload["dt"]).zfill(4)
snapshot_time = f"{self._today()}T{dt[:2]}:{dt[2:]}:00+08:00"
record = {
"trade_date": self._today(),
"code": code,
"name": definition["label"],
"fund_name": profile.get("name") or definition["label"],
"detail_url": self._detail_url(code),
"source_url": self._source_url(code),
"latest_price": latest_price,
"change_percent": change_percent,
"change_amount": round(float(latest_price) - float(previous_close), 4)
if latest_price is not None and previous_close is not None
else None,
"previous_close": previous_close,
"open_price": self._safe_float(quote_payload.get("7")),
"high_price": self._safe_float(quote_payload.get("8")),
"low_price": self._safe_float(quote_payload.get("9")),
"volume": self._safe_int(quote_payload.get("13")),
"turnover_amount": self._normalize_turnover(quote_payload.get("19")),
"turnover_rate": self._safe_float(quote_payload.get("1968584")),
"change_percent_1m": self._compute_change(points, 1),
"change_percent_3m": self._compute_change(points, 3),
"change_percent_4m": self._compute_change(points, 4),
"updated_at": updated_at,
"snapshot_time": snapshot_time,
"source_name": "同花顺",
"precision": "realtime_exact",
"is_trading": bool(intraday_payload.get("isTrading")),
}
raw_payload = {
"profile": profile_payload,
"quote": quote_payload,
"intraday": intraday_payload,
}
return record, raw_payload
def _save_daily_records(self, group: str, records: list[dict], *, precision: str) -> None:
payload = {
"trade_date": self._today(),
"updated_at": self._now().isoformat(timespec="seconds"),
"source_name": "同花顺",
"source_url": "https://fund.10jqka.com.cn/",
"precision": precision,
"records": sorted(records, key=lambda item: item["code"]),
}
self.repository.save_document(f"etf_{group}_daily", payload["trade_date"], payload, sort_value=payload["trade_date"])
def _send_alert_if_needed(self, group: str, record: dict) -> None:
config = self.repository.get_system_config()
if not config.get("email_enabled"):
return
threshold = float(config.get("etf_3min_change_alert_percent", 0.8))
cooldown_minutes = int(config.get("etf_alert_cooldown_minutes", 10))
change_3m = record.get("change_percent_3m")
if change_3m is None or abs(change_3m) < threshold:
return
alert_state = self.repository.get_document("etf_alert_state", self._today(), {})
record_key = f"{group}:{record['code']}:{'up' if change_3m > 0 else 'down'}"
last_sent_at = alert_state.get(record_key)
now = self._now()
if last_sent_at:
elapsed = now - datetime.fromisoformat(last_sent_at)
if elapsed.total_seconds() < cooldown_minutes * 60:
return
direction = "上涨" if change_3m > 0 else "下跌"
subject = f"[ETF监控] {record['name']} 3分钟{direction} {change_3m:+.2f}%"
body = "\n".join(
[
"ETF 异动提醒",
"",
f"分组: {'宽基ETF' if group == 'broad' else '板块ETF'}",
f"名称: {record['name']}",
f"代码: {record['code']}",
f"最新价: {record['latest_price'] or '-'}",
f"当日涨跌幅: {record['change_percent'] or '-'}%",
f"3分钟涨跌幅: {change_3m:+.2f}%",
f"4分钟涨跌幅: {record.get('change_percent_4m') if record.get('change_percent_4m') is not None else '-'}%",
f"成交额(亿元): {record['turnover_amount'] or '-'}",
f"时间: {record.get('snapshot_time') or record.get('updated_at') or '-'}",
"",
f"详情页: {record['detail_url']}",
]
)
try:
email_notification_service.send(
smtp_host=config.get("smtp_host", ""),
smtp_port=int(config.get("smtp_port", 465)),
smtp_username=config.get("smtp_username", ""),
smtp_password=config.get("smtp_password", ""),
sender_email=config.get("sender_email", ""),
recipients=config.get("recipients", []),
subject=subject,
text_body=body,
)
push_status = "sent"
error_message = None
except Exception as exc:
push_status = "failed"
error_message = str(exc)
self.repository.append_push_record(
{
"id": f"push-{uuid4().hex[:12]}",
"triggered_at": now.isoformat(timespec="seconds"),
"push_type": "email",
"rule_code": "etf_3min_change",
"trigger_value_hkd_billion": None,
"description": f"{record['name']} 3分钟{direction}触发 ETF 监控阈值",
"email_subject": subject,
"email_summary": f"{record['name']} 3分钟涨跌幅 {change_3m:+.2f}%",
"status": push_status,
"error_message": error_message,
}
)
alert_state[record_key] = now.isoformat(timespec="seconds")
self.repository.save_document("etf_alert_state", self._today(), alert_state, sort_value=self._today())
def sync_group_realtime(self, group: str) -> dict:
records: list[dict] = []
raw_payloads: dict[str, dict] = {}
for definition in ETF_GROUPS[group]:
record, raw_payload = self._build_record(definition)
records.append(record)
raw_payloads[definition["code"]] = raw_payload
self._send_alert_if_needed(group, record)
payload = {
"trade_date": self._today(),
"updated_at": self._now().isoformat(timespec="seconds"),
"source_name": "同花顺",
"source_url": "https://fund.10jqka.com.cn/",
"precision": "realtime_exact",
"group": group,
"records": records,
}
self.repository.save_document(f"etf_{group}_realtime", payload["trade_date"], payload, sort_value=payload["trade_date"])
self.repository.save_document(f"etf_{group}_latest_success", "default", payload, sort_value=payload["trade_date"])
self.repository.save_raw_payload(f"etf_{group}_realtime_{payload['trade_date']}", raw_payloads)
self._save_daily_records(group, records, precision="realtime_exact")
return payload
def _parse_history_rows(self, definition: dict) -> list[dict]:
code = definition["code"]
market = definition["market"]
payload = self.client.fetch_history(market, code)
raw = payload.get(f"{market}_{code}", {})
rows = raw.get("data") or ""
if not rows:
return []
records: list[dict] = []
for row in rows.split(";"):
parts = row.split(",")
if len(parts) < 8:
continue
trade_date = f"{parts[0][:4]}-{parts[0][4:6]}-{parts[0][6:8]}"
if trade_date < "2026-01-01":
continue
close_price = self._safe_float(parts[4])
previous_close = self._safe_float(parts[1])
records.append(
{
"trade_date": trade_date,
"code": code,
"name": definition["label"],
"fund_name": raw.get("name") or definition["label"],
"detail_url": self._detail_url(code),
"source_url": self._source_url(code),
"latest_price": close_price,
"change_percent": round((float(close_price) / float(previous_close) - 1) * 100, 4)
if close_price is not None and previous_close not in (None, 0)
else None,
"change_amount": round(float(close_price) - float(previous_close), 4)
if close_price is not None and previous_close is not None
else None,
"previous_close": previous_close,
"open_price": self._safe_float(parts[1]),
"high_price": self._safe_float(parts[2]),
"low_price": self._safe_float(parts[3]),
"volume": self._safe_int(parts[5]),
"turnover_amount": self._normalize_turnover(parts[6]),
"turnover_rate": self._safe_float(parts[7]),
"change_percent_1m": None,
"change_percent_3m": None,
"change_percent_4m": None,
"updated_at": self._now().isoformat(timespec="seconds"),
"snapshot_time": None,
"source_name": "同花顺",
"precision": "historical_exact",
"is_trading": False,
}
)
return records
def backfill_group_daily(self, group: str) -> dict:
by_date: dict[str, list[dict]] = {}
for definition in ETF_GROUPS[group]:
for record in self._parse_history_rows(definition):
by_date.setdefault(record["trade_date"], []).append(record)
for trade_date, records in by_date.items():
payload = {
"trade_date": trade_date,
"updated_at": self._now().isoformat(timespec="seconds"),
"source_name": "同花顺",
"source_url": "https://fund.10jqka.com.cn/",
"precision": "historical_exact",
"records": sorted(records, key=lambda item: item["code"]),
}
self.repository.save_document(f"etf_{group}_daily", trade_date, payload, sort_value=trade_date)
meta = {
"group": group,
"updated_at": self._now().isoformat(timespec="seconds"),
"trade_day_count": len(by_date),
"etf_count": len(ETF_GROUPS[group]),
"start_date": "2026-01-01",
}
self.repository.save_document("etf_history_meta", group, meta, sort_value=meta["updated_at"])
return meta
def ensure_history_backfilled(self) -> None:
for group in ETF_GROUPS:
meta = self.repository.get_document("etf_history_meta", group, {})
if meta.get("start_date") == "2026-01-01" and meta.get("trade_day_count"):
continue
self.backfill_group_daily(group)
def get_group_realtime(self, group: str) -> dict:
payload = self.repository.get_document(f"etf_{group}_realtime", self._today(), {})
if payload:
return payload
fallback = self.repository.get_document(f"etf_{group}_latest_success", "default", {})
if fallback:
return fallback
return {
"trade_date": self._today(),
"updated_at": None,
"source_name": "同花顺",
"source_url": "https://fund.10jqka.com.cn/",
"precision": "unavailable",
"group": group,
"records": [],
}
def get_group_daily(self, group: str, trade_date: str | None = None) -> dict:
target_date = trade_date or self._today()
payload = self.repository.get_document(f"etf_{group}_daily", target_date, {})
if payload:
return payload
return {
"trade_date": target_date,
"updated_at": None,
"source_name": "同花顺",
"source_url": "https://fund.10jqka.com.cn/",
"precision": "unavailable",
"group": group,
"records": [],
}
etf_monitor_service = EtfMonitorService()

View File

@ -0,0 +1,205 @@
import base64
import json
import re
import urllib.error
import urllib.request
from datetime import datetime
from pathlib import Path
from uuid import uuid4
from fastapi import HTTPException, UploadFile
from app.core.config import MAIN_CAPITAL_FLOW_DB_FILE, MAIN_CAPITAL_FLOW_UPLOADS_DIR
from app.repositories.main_capital_flow_repository import MainCapitalFlowRepository
from app.repositories.monitoring_repository import MonitoringRepository
def _extract_json_block(content: str) -> dict:
fenced_match = re.search(r"```json\s*(\{.*?\})\s*```", content, flags=re.DOTALL)
if fenced_match:
return json.loads(fenced_match.group(1))
object_match = re.search(r"(\{.*\})", content, flags=re.DOTALL)
if object_match:
return json.loads(object_match.group(1))
raise ValueError("No JSON object found in model output")
class MainCapitalFlowService:
def __init__(self) -> None:
self.repository = MainCapitalFlowRepository(MAIN_CAPITAL_FLOW_DB_FILE)
self.monitoring_repository = MonitoringRepository()
def list_records(self) -> dict:
items = [self._serialize_record(record) for record in self.repository.list_records()]
return {"items": items, "total": len(items)}
def get_record(self, record_id: str) -> dict:
record = self.repository.get_record(record_id)
if record is None:
raise HTTPException(status_code=404, detail="Record not found")
return self._serialize_record(record)
def delete_record(self, record_id: str) -> dict:
record = self.repository.delete_record(record_id)
if record is None:
raise HTTPException(status_code=404, detail="Record not found")
image_path = Path(record["image_path"])
if image_path.exists():
image_path.unlink(missing_ok=True)
return {"deleted": True, "id": record_id}
async def recognize_image(
self,
upload_file: UploadFile,
trade_date: str | None = None,
subject: str | None = None,
) -> dict:
suffix = Path(upload_file.filename or "upload.jpg").suffix or ".jpg"
temp_image_name = f"temp_{uuid4().hex}{suffix.lower()}"
stored_path = MAIN_CAPITAL_FLOW_UPLOADS_DIR / temp_image_name
image_name = upload_file.filename or temp_image_name
binary = await upload_file.read()
stored_path.parent.mkdir(parents=True, exist_ok=True)
stored_path.write_bytes(binary)
extraction = self._extract_via_model(binary, trade_date=trade_date, subject=subject)
return {
"temp_image_name": temp_image_name,
"image_name": image_name,
"image_url": self._build_image_url(stored_path),
"trade_date": extraction.get("trade_date") or trade_date,
"subject": extraction.get("subject") or subject,
"snapshot_time": extraction.get("snapshot_time"),
"institution_amount_yi": extraction.get("institution_amount_yi"),
"main_force_amount_yi": extraction.get("main_force_amount_yi"),
"large_household_amount_yi": extraction.get("large_household_amount_yi"),
"retail_amount_yi": extraction.get("retail_amount_yi"),
"trend": extraction.get("overall_trend"),
"summary": extraction.get("intraday_summary"),
"raw_extraction": extraction,
}
def create_record(self, payload: dict) -> dict:
if self.repository.get_by_trade_date(payload["trade_date"]):
raise HTTPException(status_code=409, detail="该日期记录已存在")
image_path = MAIN_CAPITAL_FLOW_UPLOADS_DIR / payload["temp_image_name"]
if not image_path.exists():
raise HTTPException(status_code=400, detail="识别图片不存在,请重新上传")
now = datetime.now().isoformat(timespec="seconds")
record = self.repository.insert_record(
{
"id": uuid4().hex,
"trade_date": payload["trade_date"],
"subject": payload.get("subject"),
"snapshot_time": payload.get("snapshot_time"),
"institution_amount_yi": payload.get("institution_amount_yi"),
"main_force_amount_yi": payload.get("main_force_amount_yi"),
"large_household_amount_yi": payload.get("large_household_amount_yi"),
"retail_amount_yi": payload.get("retail_amount_yi"),
"trend": payload.get("trend"),
"summary": payload["summary"],
"image_name": payload["image_name"],
"image_path": str(image_path),
"raw_extraction": payload.get("raw_extraction", {}),
"created_at": now,
"updated_at": now,
}
)
return {"item": self._serialize_record(record)}
def _extract_via_model(
self,
image_bytes: bytes,
trade_date: str | None,
subject: str | None,
) -> dict:
llm_config = self._get_llm_config()
if not llm_config["api_key"]:
raise HTTPException(status_code=500, detail="未配置视觉模型 API")
encoded_image = base64.b64encode(image_bytes).decode("utf-8")
prompt = """
You are extracting structured data from a Chinese stock capital flow screenshot.
Return only JSON with these keys:
trade_date, subject, snapshot_time, institution_amount_yi, main_force_amount_yi,
large_household_amount_yi, retail_amount_yi, overall_trend, intraday_summary.
Rules:
1. intraday_summary must describe only the intraday capital-flow trend and must not repeat the raw amounts.
2. overall_trend should be a short Chinese phrase like "震荡上行", "午后修复", "冲高回落", "弱势下探".
3. If a field is not clearly visible, set it to null.
4. If trade_date is absent in the image, keep it null.
5. Return JSON only.
"""
request_payload = {
"model": llm_config["model"],
"messages": [
{
"role": "system",
"content": "You extract structured JSON from Chinese capital-flow screenshots."
},
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{encoded_image}",
},
},
],
},
],
}
request = urllib.request.Request(
url=f"{llm_config['base_url'].rstrip('/')}/chat/completions",
data=json.dumps(request_payload).encode("utf-8"),
headers={
"Authorization": f"Bearer {llm_config['api_key']}",
"Content-Type": "application/json",
},
method="POST",
)
try:
with urllib.request.urlopen(request, timeout=180) as response:
response_payload = json.loads(response.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
error_text = exc.read().decode("utf-8", errors="ignore")
raise HTTPException(status_code=502, detail=f"模型识别失败: {error_text}") from exc
choices = response_payload.get("choices", [])
content = choices[0].get("message", {}).get("content", "") if choices else ""
parsed = _extract_json_block(content)
if subject and not parsed.get("subject"):
parsed["subject"] = subject
if trade_date and not parsed.get("trade_date"):
parsed["trade_date"] = trade_date
return parsed
def _get_llm_config(self) -> dict:
config = self.monitoring_repository.get_system_config()
return {
"api_key": config.get("llm_api_key", ""),
"base_url": config.get("llm_base_url", "https://api.openai.com/v1"),
"model": config.get("llm_vision_model", "gpt-4.1-mini"),
}
def _build_image_url(self, path: Path) -> str:
return f"/main-capital-flow-images/{path.name}"
def _serialize_record(self, record: dict) -> dict:
return {
**record,
"image_url": self._build_image_url(Path(record["image_path"])),
}
main_capital_flow_service = MainCapitalFlowService()

View File

@ -7,6 +7,7 @@ from zoneinfo import ZoneInfo
from app.repositories.monitoring_repository import MonitoringRepository
from app.services.ashare_flow_service import ashare_flow_service
from app.services.eastmoney_sync_service import eastmoney_sync_service
from app.services.etf_monitor_service import etf_monitor_service
from app.services.market_clock import get_market_state
@ -16,7 +17,8 @@ class SyncScheduler:
self.tz = ZoneInfo("Asia/Shanghai")
self._thread: threading.Thread | None = None
self._stop_event = threading.Event()
self._failure_count = 0
self._market_failure_count = 0
self._etf_failure_count = 0
def start(self) -> None:
if self._thread and self._thread.is_alive():
@ -31,37 +33,61 @@ class SyncScheduler:
self._thread.join(timeout=2)
def _run(self) -> None:
history_ready = False
while not self._stop_event.is_set():
now = datetime.now(self.tz)
state = get_market_state(now)
interval_seconds = self._get_wait_seconds(now, state)
if state in {"trading_am", "trading_pm", "finalizing"}:
if not history_ready:
try:
etf_monitor_service.ensure_history_backfilled()
history_ready = True
except Exception:
self._etf_failure_count += 1
try:
eastmoney_sync_service.sync()
ashare_flow_service.sync_index_realtime()
ashare_flow_service.sync_sector_realtime()
self._failure_count = 0
self._market_failure_count = 0
except Exception:
self._failure_count += 1
interval_seconds = max(interval_seconds, min(180, 30 * self._failure_count))
self._market_failure_count += 1
interval_seconds = max(interval_seconds, min(180, 30 * self._market_failure_count))
if self._is_etf_enabled():
try:
etf_monitor_service.sync_group_realtime("broad")
etf_monitor_service.sync_group_realtime("sector")
self._etf_failure_count = 0
except Exception:
self._etf_failure_count += 1
interval_seconds = max(interval_seconds, min(180, 15 * self._etf_failure_count))
else:
self._failure_count = 0
self._market_failure_count = 0
self._etf_failure_count = 0
self._stop_event.wait(interval_seconds)
def _get_wait_seconds(self, now: datetime, state: str) -> int:
config = self.repository.get_system_config()
realtime_interval = max(int(config.get("realtime_collection_interval_seconds", 60)), 15)
etf_interval = max(int(config.get("etf_realtime_interval_seconds", realtime_interval)), 15)
active_interval = min(realtime_interval, etf_interval) if self._is_etf_enabled() else realtime_interval
if state in {"trading_am", "trading_pm", "finalizing"}:
return realtime_interval
return active_interval
if state == "midday_break":
return self._seconds_until(now, time(13, 0))
if state == "pre_open":
return self._seconds_until(now, time(9, 30))
return self._seconds_until_next_day_open(now)
def _is_etf_enabled(self) -> bool:
config = self.repository.get_system_config()
return bool(config.get("etf_enabled", True))
def _seconds_until(self, now: datetime, target_time: time) -> int:
target = datetime.combine(now.date(), target_time, tzinfo=self.tz)
delta = (target - now).total_seconds()