Initial commit

This commit is contained in:
wanghep
2026-03-20 22:59:54 +08:00
commit 68b9e253e2
63 changed files with 8116 additions and 0 deletions

View File

@ -0,0 +1,733 @@
from __future__ import annotations
import json
import re
from collections import Counter
from datetime import datetime, time, timedelta
from typing import Iterable
from urllib.parse import unquote, urlparse
from zoneinfo import ZoneInfo
import requests
from bs4 import BeautifulSoup
from app.models import (
Account,
ClsNewsDocument,
ClsNewsItem,
ClsNewsSummary,
ClsSectorImpact,
DailyInputAccount,
DailyInputDocument,
DailyInputUpsertPayload,
OpinionArticle,
ReportDocument,
ReportListItem,
)
from app.services.storage import (
fetch_accounts,
fetch_cls_news_document,
fetch_daily_input_document,
fetch_report_document,
fetch_report_list,
save_accounts,
save_cls_news_document,
save_daily_input_document,
save_report_document,
)
SHANGHAI = ZoneInfo("Asia/Shanghai")
CLS_REFRESH_INTERVAL = timedelta(minutes=3)
CLS_TELEGRAPH_URL = "https://m.cls.cn/telegraph"
HTTP_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
),
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
}
SENTIMENT_BULL = "\u770b\u591a"
SENTIMENT_BEAR = "\u770b\u7a7a"
SENTIMENT_NEUTRAL = "\u4e2d\u6027"
ACCOUNTS: list[Account] = [
Account(
id="touzi-mingjian",
name="\u6295\u8d44\u660e\u89c1",
description="\u504f\u4e3b\u9898\u8f6e\u52a8\u4e0e\u4e3b\u7ebf\u5224\u65ad\uff0c\u9002\u5408\u8ddf\u8e2a\u5e02\u573a\u504f\u597d\u53d8\u5316\u3002",
),
Account(
id="aigujun-2020",
name="\u7231\u80a1\u541b2020",
description="\u5173\u6ce8\u60c5\u7eea\u3001\u70ed\u70b9\u6269\u6563\u4e0e\u4ea4\u6613\u7ec6\u8282\u3002",
),
Account(
id="mazhiming-shouping",
name="\u9a6c\u5fd7\u660e\u6536\u8bc4",
description="\u65e5\u5185\u6536\u8bc4\u4e0e\u60c5\u7eea\u53d8\u5316\u603b\u7ed3\u3002",
),
Account(
id="laobai-guandian",
name="\u8001\u767d\u5206\u6790\u5ba4\u89c2\u70b9",
description="\u504f\u7b56\u7565\u62c6\u89e3\u548c\u5173\u952e\u677f\u5757\u8ddf\u8e2a\u3002",
),
]
ACCOUNT_FOCUS = {
"touzi-mingjian": ["AI", "\u7b97\u529b", "\u673a\u5668\u4eba"],
"aigujun-2020": ["CPO", "\u5b58\u50a8\u82af\u7247", "\u65b0\u80fd\u6e90"],
"mazhiming-shouping": ["AI", "\u5238\u5546", "\u6c7d\u8f66"],
"laobai-guandian": ["\u673a\u5668\u4eba", "\u534a\u5bfc\u4f53", "\u65b0\u80fd\u6e90"],
}
SECTOR_KEYWORDS = {
"AI": ["ai", "\u4eba\u5de5\u667a\u80fd", "\u5927\u6a21\u578b", "\u6a21\u578b"],
"\u7b97\u529b": ["\u7b97\u529b", "compute", "server", "gpu"],
"CPO": ["cpo", "\u5149\u6a21\u5757", "\u9ad8\u901f\u4e92\u8054"],
"\u5b58\u50a8\u82af\u7247": ["\u5b58\u50a8", "memory", "dram", "nand"],
"\u534a\u5bfc\u4f53": ["\u534a\u5bfc\u4f53", "chip", "wafer", "\u6676\u5706"],
"\u5238\u5546": ["\u5238\u5546", "broker", "\u8bc1\u5238"],
"\u77f3\u6cb9\u5929\u7136\u6c14": ["\u77f3\u6cb9", "\u5929\u7136\u6c14", "\u6cb9\u6c14", "\u80fd\u6e90\u4ef7\u683c"],
"\u65b0\u80fd\u6e90": ["\u65b0\u80fd\u6e90", "\u9502\u7535", "\u5149\u4f0f", "\u50a8\u80fd"],
"\u519b\u5de5": ["\u519b\u5de5", "\u536b\u661f", "\u822a\u5929"],
"\u673a\u5668\u4eba": ["\u673a\u5668\u4eba", "robot", "\u81ea\u52a8\u5316"],
"\u6c7d\u8f66": ["\u6c7d\u8f66", "\u8f66\u4f01", "\u667a\u9a7e", "\u6574\u8f66"],
"\u533b\u836f": ["\u533b\u836f", "\u521b\u65b0\u836f", "\u533b\u7597"],
}
POSITIVE_KEYWORDS = [
"\u673a\u4f1a",
"\u4fee\u590d",
"\u589e\u5f3a",
"\u4e3b\u7ebf",
"\u589e\u91cf",
"\u53cd\u5f39",
"\u7a81\u7834",
"\u79ef\u6781",
"up",
"bull",
]
NEGATIVE_KEYWORDS = [
"\u98ce\u9669",
"\u627f\u538b",
"\u8c28\u614e",
"\u56de\u8c03",
"\u7f29\u91cf",
"\u89c2\u671b",
"\u5206\u6b67",
"bear",
"down",
]
ARTICLE_TYPE_PATTERNS = [
("\u6536\u8bc4", "\u5e02\u573a\u6536\u8bc4"),
("\u5348", "\u76d8\u4e2d\u89c2\u5bdf"),
("\u7b56\u7565", "\u7b56\u7565\u8ddf\u8e2a"),
("\u590d\u76d8", "\u76d8\u9762\u590d\u76d8"),
("\u884c\u4e1a", "\u884c\u4e1a\u89c2\u5bdf"),
]
CLS_NEWS_TEMPLATES = [
{
"title": "\u8d22\u8054\u793e\u76d8\u524d\u7cbe\u9009\uff1a\u7b97\u529b\u94fe\u56de\u6696\uff0c\u8d44\u91d1\u91cd\u65b0\u805a\u7126\u9ad8\u666f\u6c14\u65b9\u5411",
"summary": "\u9694\u591c\u5e02\u573a\u98ce\u9669\u504f\u597d\u56de\u5347\uff0c\u7b97\u529b\u4e0e\u670d\u52a1\u5668\u94fe\u6761\u83b7\u8d44\u91d1\u91cd\u65b0\u914d\u7f6e\uff0c\u60c5\u7eea\u4fee\u590d\u5148\u4e8e\u6210\u4ea4\u5168\u9762\u653e\u5927\u3002",
"sectors": ["\u7b97\u529b", "AI"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/compute-rebound",
},
{
"title": "AI Daily\uff1aCPO \u4e0e\u5b58\u50a8\u82af\u7247\u540c\u6b65\u8d70\u5f3a\uff0c\u666f\u6c14\u5ea6\u7ebf\u7d22\u5ef6\u7eed",
"summary": "\u9ad8\u901f\u4e92\u8054\u4e0e\u5b58\u50a8\u62a5\u4ef7\u9884\u671f\u652f\u6491\u677f\u5757\u8868\u73b0\uff0c\u8d44\u91d1\u66f4\u503e\u5411\u4e8e\u56f4\u7ed5\u786e\u5b9a\u6027\u73af\u8282\u96c6\u4e2d\u3002",
"sectors": ["CPO", "\u5b58\u50a8\u82af\u7247"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/ai-daily-cpo-memory",
},
{
"title": "\u8d22\u8054\u793e\u884c\u4e1a\u89c2\u5bdf\uff1a\u673a\u5668\u4eba\u94fe\u6761\u5206\u5316\uff0c\u8ba2\u5355\u5151\u73b0\u6210\u4e3a\u77ed\u671f\u7126\u70b9",
"summary": "\u673a\u5668\u4eba\u65b9\u5411\u5185\u90e8\u5f00\u59cb\u51fa\u73b0\u5151\u73b0\u4e0e\u6362\u624b\uff0c\u5e02\u573a\u4ece\u6982\u5ff5\u6269\u6563\u8f6c\u5411\u4e1a\u7ee9\u4e0e\u8ba2\u5355\u9a8c\u8bc1\u3002",
"sectors": ["\u673a\u5668\u4eba"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/robotics-orders",
},
{
"title": "\u8d22\u8054\u793e\u80fd\u6e90\u8ffd\u8e2a\uff1a\u6cb9\u6c14\u677f\u5757\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5207\u5411\u9632\u5fa1\u54c1\u79cd",
"summary": "\u539f\u6cb9\u4ef7\u683c\u7ef4\u6301\u9ad8\u4f4d\u540e\uff0c\u6cb9\u6c14\u65b9\u5411\u51fa\u73b0\u9ad8\u4f4d\u9707\u8361\uff0c\u90e8\u5206\u8d44\u91d1\u8f6c\u5411\u533b\u836f\u7b49\u9632\u5b88\u677f\u5757\u3002",
"sectors": ["\u77f3\u6cb9\u5929\u7136\u6c14", "\u533b\u836f"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/energy-rotation",
},
{
"title": "AI Daily\uff1a\u6c7d\u8f66\u4e0e\u667a\u9a7e\u5ef6\u7eed\u5206\u6b67\uff0c\u4e3b\u7ebf\u4ecd\u9700\u7b49\u5f85\u9500\u91cf\u6570\u636e\u9a8c\u8bc1",
"summary": "\u6574\u8f66\u4e0e\u667a\u9a7e\u65b9\u5411\u5173\u6ce8\u5ea6\u4ecd\u9ad8\uff0c\u4f46\u5e02\u573a\u5bf9\u4f30\u503c\u6269\u5f20\u5df2\u6709\u4fdd\u7559\uff0c\u7b49\u5f85\u9500\u91cf\u548c\u8ba2\u5355\u6570\u636e\u786e\u8ba4\u3002",
"sectors": ["\u6c7d\u8f66"],
"sentiment": SENTIMENT_BEAR,
"reference_url": "https://www.cls.cn/detail/auto-data-watch",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u534a\u5bfc\u4f53\u8bbe\u5907\u65b9\u5411\u8d70\u5f3a\uff0c\u673a\u6784\u79f0\u56fd\u4ea7\u66ff\u4ee3\u8282\u594f\u63d0\u901f",
"summary": "\u6676\u5706\u5236\u9020\u4e0e\u8bbe\u5907\u94fe\u6761\u51fa\u73b0\u5f02\u52a8\uff0c\u5e02\u573a\u56f4\u7ed5\u56fd\u4ea7\u66ff\u4ee3\u548c\u8d44\u672c\u5f00\u652f\u6062\u590d\u91cd\u65b0\u5b9a\u4ef7\u3002",
"sectors": ["\u534a\u5bfc\u4f53"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/semi-equipment-up",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u5238\u5546\u677f\u5757\u5348\u540e\u62c9\u5347\uff0c\u5e02\u573a\u60c5\u7eea\u6709\u6240\u4fee\u590d",
"summary": "\u6307\u6570\u9707\u8361\u8fc7\u7a0b\u4e2d\u5238\u5546\u627f\u62c5\u60c5\u7eea\u4fee\u590d\u529f\u80fd\uff0c\u5e26\u52a8\u90e8\u5206\u9ad8\u5f39\u6027\u65b9\u5411\u56de\u6696\u3002",
"sectors": ["\u5238\u5546"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/broker-rebound",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u521b\u65b0\u836f\u65b9\u5411\u6301\u7eed\u6d3b\u8dc3\uff0c\u8d44\u91d1\u8f6c\u5411\u9632\u5b88\u4e0e\u6210\u957f\u517c\u987e",
"summary": "\u533b\u836f\u677f\u5757\u83b7\u5f97\u589e\u91cf\u8d44\u91d1\u5173\u6ce8\uff0c\u521b\u65b0\u836f\u548c\u5668\u68b0\u7ec6\u5206\u8868\u73b0\u66f4\u5f3a\u3002",
"sectors": ["\u533b\u836f"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/medical-active",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u65b0\u80fd\u6e90\u94fe\u6761\u5206\u5316\u52a0\u5267\uff0c\u673a\u6784\u63d0\u9192\u5173\u6ce8\u4ea7\u80fd\u51fa\u6e05\u8282\u594f",
"summary": "\u65b0\u80fd\u6e90\u677f\u5757\u5185\u90e8\u8f6e\u52a8\u660e\u663e\uff0c\u8d44\u91d1\u66f4\u504f\u5411\u4f4e\u4f4d\u73af\u8282\u548c\u6210\u672c\u6539\u5584\u65b9\u5411\u3002",
"sectors": ["\u65b0\u80fd\u6e90"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/new-energy-split",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u519b\u5de5\u677f\u5757\u76d8\u4e2d\u5f02\u52a8\uff0c\u8ba2\u5355\u5151\u73b0\u9884\u671f\u91cd\u65b0\u5347\u6e29",
"summary": "\u519b\u5de5\u94fe\u6761\u76d8\u4e2d\u8d70\u5f3a\uff0c\u5e02\u573a\u5173\u6ce8\u540e\u7eed\u8ba2\u5355\u5151\u73b0\u4e0e\u4f30\u503c\u5207\u6362\u7a7a\u95f4\u3002",
"sectors": ["\u519b\u5de5"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/defense-orders",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u673a\u5668\u4eba\u677f\u5757\u51b2\u9ad8\u56de\u843d\uff0c\u77ed\u7ebf\u535a\u5f08\u60c5\u7eea\u5347\u6e29",
"summary": "\u673a\u5668\u4eba\u65b9\u5411\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5728\u9898\u6750\u6269\u6563\u4e0e\u5151\u73b0\u538b\u529b\u4e4b\u95f4\u53cd\u590d\u5207\u6362\u3002",
"sectors": ["\u673a\u5668\u4eba"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/robotics-intraday",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u5b58\u50a8\u82af\u7247\u62a5\u4ef7\u9884\u671f\u7ee7\u7eed\u4e0a\u4fee\uff0c\u4ea7\u4e1a\u94fe\u666f\u6c14\u5ea6\u53d7\u5173\u6ce8",
"summary": "\u5b58\u50a8\u73af\u8282\u4ef7\u683c\u4fee\u590d\u903b\u8f91\u5ef6\u7eed\uff0c\u5e02\u573a\u91cd\u65b0\u4ea4\u6613\u4f9b\u9700\u6539\u5584\u4e0e\u76c8\u5229\u5f39\u6027\u3002",
"sectors": ["\u5b58\u50a8\u82af\u7247"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/memory-price-up",
},
]
SAMPLE_INPUTS = {
1: {
"touzi-mingjian": ["https://mp.weixin.qq.com/s/semiconductor-capacity-and-chip-cycle"],
"aigujun-2020": ["https://mp.weixin.qq.com/s/storage-chip-price-repair"],
"mazhiming-shouping": ["https://mp.weixin.qq.com/s/market-close-sector-rotation"],
"laobai-guandian": ["https://mp.weixin.qq.com/s/robotics-and-energy-balance"],
},
}
def now_local() -> datetime:
return datetime.now(SHANGHAI)
def iso_timestamp(value: datetime | None = None) -> str:
return (value or now_local()).replace(microsecond=0).isoformat()
def ensure_local_timezone(value: datetime) -> datetime:
if value.tzinfo is None:
return value.replace(tzinfo=SHANGHAI)
return value.astimezone(SHANGHAI)
def normalize_whitespace(value: str) -> str:
return re.sub(r"\s+", " ", value).strip()
def extract_json_object(script_text: str, marker: str) -> str:
marker_index = script_text.find(marker)
if marker_index < 0:
raise RuntimeError(f"Marker not found: {marker}")
start = script_text.find("{", marker_index)
if start < 0:
raise RuntimeError(f"JSON object start not found for marker: {marker}")
depth = 0
in_string = False
escaped = False
for index in range(start, len(script_text)):
char = script_text[index]
if in_string:
if escaped:
escaped = False
elif char == "\\":
escaped = True
elif char == '"':
in_string = False
continue
if char == '"':
in_string = True
continue
if char == "{":
depth += 1
continue
if char == "}":
depth -= 1
if depth == 0:
return script_text[start : index + 1]
raise RuntimeError(f"JSON object end not found for marker: {marker}")
def parse_telegraph_timestamp(date_str: str, time_str: str) -> str:
normalized_time = time_str if len(time_str.split(":")) == 3 else f"{time_str}:00"
return datetime.fromisoformat(f"{date_str}T{normalized_time}").replace(tzinfo=SHANGHAI).isoformat(timespec="seconds")
def split_title_and_summary(content: str) -> tuple[str, str]:
cleaned = normalize_whitespace(content)
bracket_match = re.match(r"^[\[({\u3010\u3016](.+?)[\])}\u3011\u3017][\uff1a: ]*(.*)$", cleaned)
if bracket_match:
title = normalize_whitespace(bracket_match.group(1))
summary = normalize_whitespace(bracket_match.group(2) or cleaned)
return title[:80], summary or title
sentence_parts = re.split(r"[。;;!?]", cleaned, maxsplit=1)
title = sentence_parts[0][:80]
summary = cleaned if len(cleaned) <= 220 else f"{cleaned[:217]}..."
return title, summary
def build_fallback_cls_items(reference_time: datetime) -> list[ClsNewsItem]:
items: list[ClsNewsItem] = []
for index, template in enumerate(CLS_NEWS_TEMPLATES):
published_at = (reference_time - timedelta(minutes=index * 95 + 8)).replace(microsecond=0).isoformat()
items.append(
ClsNewsItem(
id=f"cls-{index + 1}",
title=template["title"],
published_at=published_at,
source="\u8d22\u8054\u793e" if index % 2 == 0 else "\u8d22\u8054\u793e AI Daily",
summary=template["summary"],
reference_url=template["reference_url"],
sectors=template["sectors"],
sentiment=template["sentiment"],
)
)
return sorted(items, key=lambda item: item.published_at, reverse=True)
def fetch_cls_telegraph_items(reference_time: datetime) -> list[ClsNewsItem]:
session = requests.Session()
session.trust_env = False
response = session.get(CLS_TELEGRAPH_URL, headers=HTTP_HEADERS, timeout=15)
response.raise_for_status()
response.encoding = "utf-8"
soup = BeautifulSoup(response.text, "html.parser")
next_data_script = None
for script in soup.find_all("script"):
script_text = script.string or script.get_text()
if "__NEXT_DATA__ =" in script_text:
next_data_script = script_text
break
if not next_data_script:
raise RuntimeError("Missing __NEXT_DATA__ payload on cls.cn")
next_data = json.loads(extract_json_object(next_data_script, "__NEXT_DATA__ ="))
roll_data = (
next_data.get("props", {})
.get("initialState", {})
.get("roll_data", [])
)
if not isinstance(roll_data, list) or not roll_data:
raise RuntimeError("Missing roll_data in cls.cn payload")
target_date = reference_time.date()
items: list[ClsNewsItem] = []
seen_ids: set[int] = set()
latest_limit = 80
for entry in roll_data:
if len(items) >= latest_limit:
break
item_id = int(entry.get("id") or 0)
if not item_id or item_id in seen_ids:
continue
seen_ids.add(item_id)
timestamp = int(entry.get("modified_time") or entry.get("ctime") or 0)
if not timestamp:
continue
published_dt = datetime.fromtimestamp(timestamp, tz=SHANGHAI)
if published_dt.date() != target_date:
continue
raw_content = normalize_whitespace(
entry.get("content")
or entry.get("brief")
or entry.get("title")
or ""
)
if len(raw_content) < 8:
continue
title = normalize_whitespace(entry.get("title") or "")
if not title:
title, _ = split_title_and_summary(raw_content)
summary = normalize_whitespace(entry.get("brief") or "")
if not summary:
_, summary = split_title_and_summary(raw_content)
source = normalize_whitespace(entry.get("author") or "\u8d22\u8054\u793e7x24")
reference_url = normalize_whitespace(entry.get("shareurl") or "")
if not reference_url:
reference_url = f"https://www.cls.cn/detail/{item_id}"
sectors = infer_sectors(f"{title} {summary}", "touzi-mingjian")
sentiment = infer_sentiment(f"{title} {summary}")
items.append(
ClsNewsItem(
id=f"cls-live-{item_id}",
title=title[:120],
published_at=published_dt.isoformat(timespec="seconds"),
source=source,
summary=summary[:500],
reference_url=reference_url,
sectors=sectors,
sentiment=sentiment,
)
)
if not items:
raise RuntimeError("No telegraph items parsed from cls.cn")
return sorted(items, key=lambda item: item.published_at, reverse=True)
def get_accounts() -> list[Account]:
records = fetch_accounts()
return records or ACCOUNTS
def normalize_date(value: str) -> str:
return datetime.fromisoformat(value).date().isoformat()
def blank_daily_input(date_str: str) -> DailyInputDocument:
return DailyInputDocument(
date=date_str,
updated_at=iso_timestamp(),
accounts=[
DailyInputAccount(account_id=account.id, account_name=account.name, links=[])
for account in get_accounts()
],
)
def clean_links(links: Iterable[str]) -> list[str]:
normalized: list[str] = []
seen: set[str] = set()
for raw_link in links:
link = raw_link.strip()
if not link or link in seen:
continue
seen.add(link)
normalized.append(link)
return normalized
def normalize_daily_input(date_str: str, payload: DailyInputUpsertPayload) -> DailyInputDocument:
payload_map = {item.account_id: clean_links(item.links) for item in payload.accounts}
return DailyInputDocument(
date=date_str,
updated_at=iso_timestamp(),
accounts=[
DailyInputAccount(
account_id=account.id,
account_name=account.name,
links=payload_map.get(account.id, []),
)
for account in get_accounts()
],
)
def load_daily_input(date_str: str) -> DailyInputDocument:
payload = fetch_daily_input_document(date_str)
if payload is None:
return blank_daily_input(date_str)
return payload
def save_daily_input(document: DailyInputDocument) -> DailyInputDocument:
return save_daily_input_document(document)
def load_report(date_str: str) -> ReportDocument | None:
return fetch_report_document(date_str)
def save_report(document: ReportDocument) -> ReportDocument:
return save_report_document(document)
def list_reports() -> list[ReportListItem]:
return fetch_report_list()
def title_from_link(account_name: str, url: str, index: int) -> str:
text = unquote(urlparse(url).path or url)
tokens = [
token
for token in re.split(r"[\W_]+", text.lower())
if token and token not in {"s", "mp", "weixin", "qq", "com"}
]
meaningful = [token for token in tokens if len(token) > 1]
if meaningful:
topic = " / ".join(token.upper() if len(token) <= 3 else token.capitalize() for token in meaningful[:3])
return f"{account_name}\uff1a{topic} \u89c2\u5bdf"
return f"{account_name}\uff1a\u5e02\u573a\u8ddf\u8e2a\u7b2c {index + 1} \u6761"
def infer_sectors(text: str, account_id: str) -> list[str]:
lowered = text.lower()
sectors = [
sector
for sector, keywords in SECTOR_KEYWORDS.items()
if any(keyword.lower() in lowered for keyword in keywords)
]
if sectors:
return sectors[:3]
return ACCOUNT_FOCUS.get(account_id, ["AI", "\u7b97\u529b"])[:2]
def infer_sentiment(text: str) -> str:
lowered = text.lower()
positive = sum(keyword.lower() in lowered for keyword in POSITIVE_KEYWORDS)
negative = sum(keyword.lower() in lowered for keyword in NEGATIVE_KEYWORDS)
if positive > negative:
return SENTIMENT_BULL
if negative > positive:
return SENTIMENT_BEAR
return SENTIMENT_NEUTRAL
def infer_article_type(title: str) -> str:
lowered = title.lower()
for keyword, article_type in ARTICLE_TYPE_PATTERNS:
if keyword.lower() in lowered:
return article_type
return "\u4e3b\u9898\u89c2\u70b9"
def build_article_summary(title: str, sectors: list[str], sentiment: str) -> str:
sector_text = "\u3001".join(sectors[:2]) if sectors else "\u6838\u5fc3\u4e3b\u7ebf"
sentiment_text = {
SENTIMENT_BULL: "\u504f\u79ef\u6781\u7684\u8282\u594f\u5224\u65ad",
SENTIMENT_BEAR: "\u660e\u663e\u504f\u8c28\u614e\u7684\u98ce\u9669\u63d0\u9192",
SENTIMENT_NEUTRAL: "\u66f4\u5f3a\u8c03\u7ed3\u6784\u5206\u5316\u4e0e\u7b49\u5f85\u786e\u8ba4",
}[sentiment]
return f"{title} \u56f4\u7ed5 {sector_text} \u5c55\u5f00\uff0c\u7ed9\u51fa\u7684\u7ed3\u8bba\u662f{sentiment_text}\uff0c\u9002\u5408\u4f5c\u4e3a\u5f53\u65e5\u76d8\u9762\u8ddf\u8e2a\u4e0e\u590d\u76d8\u53c2\u8003\u3002"
def generate_report(date_str: str, input_document: DailyInputDocument) -> ReportDocument:
base_date = datetime.fromisoformat(date_str)
articles: list[OpinionArticle] = []
for account_index, account in enumerate(input_document.accounts):
for link_index, url in enumerate(account.links):
title = title_from_link(account.account_name, url, link_index)
sectors = infer_sectors(f"{title} {url}", account.account_id)
sentiment = infer_sentiment(f"{title} {url}")
published_at = (
base_date.replace(hour=9 + ((account_index + link_index) % 8), minute=(link_index * 12) % 60)
.replace(tzinfo=SHANGHAI)
.isoformat(timespec="seconds")
)
articles.append(
OpinionArticle(
id=f"{date_str}-{account.account_id}-{link_index}",
account_id=account.account_id,
account_name=account.account_name,
title=title,
published_at=published_at,
summary=build_article_summary(title, sectors, sentiment),
source_url=url,
sectors=sectors,
sentiment=sentiment,
article_type=infer_article_type(title),
)
)
if not articles:
return ReportDocument(
date=date_str,
generated_at=iso_timestamp(),
summary="\u5f53\u65e5\u5c1a\u672a\u5f55\u5165\u6587\u7ae0\u94fe\u63a5\uff0c\u7cfb\u7edf\u5df2\u4fdd\u7559\u65e5\u62a5\u7ed3\u6784\uff0c\u7b49\u5f85\u8865\u5145\u516c\u4f17\u53f7\u6587\u7ae0\u540e\u518d\u751f\u6210\u5b8c\u6574\u7ed3\u8bba\u3002",
focus_sectors=[],
article_count=0,
account_count=0,
articles=[],
)
sector_counter = Counter(sector for article in articles for sector in article.sectors)
focus_sectors = [sector for sector, _count in sector_counter.most_common(4)]
sentiment_counter = Counter(article.sentiment for article in articles)
if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
tone = "\u6574\u4f53\u504f\u79ef\u6781\uff0c\u4e3b\u7ebf\u8ba8\u8bba\u96c6\u4e2d\u5ea6\u8f83\u9ad8"
elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
tone = "\u6574\u4f53\u504f\u8c28\u614e\uff0c\u98ce\u9669\u63a7\u5236\u4ecd\u662f\u4e3b\u53d9\u4e8b"
else:
tone = "\u591a\u7a7a\u5206\u6b67\u5e76\u5b58\uff0c\u5e02\u573a\u66f4\u770b\u91cd\u9a8c\u8bc1\u4e0e\u8282\u594f"
active_accounts = len([account for account in input_document.accounts if account.links])
sector_text = "\u3001".join(focus_sectors) if focus_sectors else "\u6682\u65e0\u805a\u7126\u677f\u5757"
summary = (
f"{date_str} \u5171\u6574\u7406 {len(articles)} \u7bc7\u516c\u4f17\u53f7\u89c2\u70b9\uff0c\u8986\u76d6 {active_accounts} \u4e2a\u8d26\u6237\u3002"
f"{tone}\uff0c\u8ba8\u8bba\u91cd\u70b9\u843d\u5728 {sector_text}\u3002"
)
return ReportDocument(
date=date_str,
generated_at=iso_timestamp(),
summary=summary,
focus_sectors=focus_sectors,
article_count=len(articles),
account_count=active_accounts,
articles=sorted(articles, key=lambda item: item.published_at, reverse=True),
)
def build_cls_news_document(
reference_time: datetime | None = None,
*,
allow_live_fetch: bool = True,
) -> ClsNewsDocument:
current = reference_time or now_local()
try:
if allow_live_fetch:
items = fetch_cls_telegraph_items(current)
else:
raise RuntimeError("Live fetch disabled for non-current date")
except Exception:
items = build_fallback_cls_items(current)
sector_counter = Counter(sector for item in items for sector in item.sectors)
watch_list = [sector for sector, _count in sector_counter.most_common(5)]
overview = (
"\u8d44\u8baf\u5217\u8868\u5c55\u793a\u6240\u9009\u65e5\u671f\u5185\u7684\u8d22\u8054\u793e 7x24 \u8d44\u8baf\uff0c"
"\u5f53\u65e5\u6570\u636e\u6765\u81ea cls.cn \u5b9e\u65f6\u6293\u53d6\uff0c\u6bcf 3 \u5206\u949f\u66f4\u65b0\u4e00\u6b21\u3002"
)
hot_topics = (
"\u70ed\u70b9\u6982\u89c8\u53ea\u4fdd\u7559\u5bf9\u677f\u5757\u5b58\u5728\u660e\u663e\u5f71\u54cd\u7684\u65b9\u5411\uff0c"
f"\u5f53\u524d\u4e3b\u8981\u96c6\u4e2d\u5728 {'\u3001'.join(watch_list[:3])}\u3002"
)
sector_impacts: list[ClsSectorImpact] = []
seen_sectors: set[str] = set()
for sector in watch_list[:4]:
if sector in seen_sectors:
continue
seen_sectors.add(sector)
related_items = [item for item in items if sector in item.sectors]
if not related_items:
continue
sentiment_counter = Counter(item.sentiment for item in related_items)
if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
sentiment = SENTIMENT_BULL
reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u50ac\u5316\u6216\u666f\u6c14\u5f3a\u5316\uff0c\u77ed\u7ebf\u504f\u6b63\u5411\u5f71\u54cd\u3002"
elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
sentiment = SENTIMENT_BEAR
reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u5151\u73b0\u6216\u5206\u6b67\uff0c\u77ed\u7ebf\u504f\u8d1f\u5411\u5f71\u54cd\u3002"
else:
sentiment = SENTIMENT_NEUTRAL
reason = f"{sector} \u65b9\u5411\u6709\u8ba8\u8bba\u4f46\u4ecd\u9700\u9a8c\u8bc1\uff0c\u77ed\u7ebf\u4ee5\u4e2d\u6027\u89c2\u5bdf\u4e3a\u4e3b\u3002"
sector_impacts.append(
ClsSectorImpact(
sector=sector,
sentiment=sentiment,
reason=reason,
related_titles=list(dict.fromkeys(item.title for item in related_items[:2])),
)
)
return ClsNewsDocument(
date=current.date().isoformat(),
updated_at=iso_timestamp(current),
window_label="\u5f53\u5929\u8d44\u8baf",
summary=ClsNewsSummary(
overview=overview,
hot_topics=hot_topics,
watch_list=watch_list,
),
sector_impacts=sector_impacts,
items=items,
)
def load_cls_news(date_str: str) -> ClsNewsDocument | None:
return fetch_cls_news_document(date_str)
def build_reference_time(date_str: str) -> datetime:
date_value = datetime.fromisoformat(date_str).date()
if date_value == now_local().date():
return now_local()
return datetime.combine(date_value, time(hour=15, minute=0), tzinfo=SHANGHAI)
def refresh_cls_news(date_str: str | None = None) -> ClsNewsDocument:
normalized_date = normalize_date(date_str or now_local().date().isoformat())
existing = load_cls_news(normalized_date)
reference_time = build_reference_time(normalized_date)
allow_live_fetch = normalized_date == now_local().date().isoformat()
try:
document = build_cls_news_document(reference_time, allow_live_fetch=allow_live_fetch)
except Exception:
if existing is not None:
return existing
raise
return save_cls_news_document(document)
def get_cls_news(date_str: str | None = None) -> ClsNewsDocument:
normalized_date = normalize_date(date_str or now_local().date().isoformat())
document = load_cls_news(normalized_date)
if document is None:
return refresh_cls_news(normalized_date)
if normalized_date != now_local().date().isoformat():
return document
updated_at = ensure_local_timezone(datetime.fromisoformat(document.updated_at))
if now_local() - updated_at >= CLS_REFRESH_INTERVAL:
return refresh_cls_news(normalized_date)
return document
def seed_demo_content() -> None:
save_accounts(ACCOUNTS)
today = now_local().date()
for offset, account_links in SAMPLE_INPUTS.items():
date_str = (today - timedelta(days=offset)).isoformat()
if fetch_daily_input_document(date_str) is not None and fetch_report_document(date_str) is not None:
continue
payload = DailyInputUpsertPayload(
accounts=[
{"account_id": account.id, "links": account_links.get(account.id, [])}
for account in ACCOUNTS
]
)
input_document = normalize_daily_input(date_str, payload)
save_daily_input_document(input_document)
save_report_document(generate_report(date_str, input_document))
today_str = today.isoformat()
if fetch_cls_news_document(today_str) is None:
save_cls_news_document(build_cls_news_document())

View File

@ -0,0 +1,480 @@
from __future__ import annotations
import hashlib
import json
from contextlib import contextmanager
from datetime import date, datetime
from pathlib import Path
from typing import Any, Iterator
from sqlalchemy import JSON, Date, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, create_engine, delete, select
from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship, sessionmaker
from app.models import (
Account,
ClsNewsDocument,
ClsNewsItem,
ClsNewsSummary,
ClsSectorImpact,
DailyInputAccount,
DailyInputDocument,
OpinionArticle,
ReportDocument,
ReportListItem,
)
PROJECT_ROOT = Path(__file__).resolve().parents[3]
CONFIG_DIR = PROJECT_ROOT / "backend" / "config"
DATABASE_CONFIG_PATH = CONFIG_DIR / "database.json"
class Base(DeclarativeBase):
pass
class AccountRecord(Base):
__tablename__ = "accounts"
id: Mapped[str] = mapped_column(String(64), primary_key=True)
name: Mapped[str] = mapped_column(String(128), nullable=False)
description: Mapped[str] = mapped_column(String(255), nullable=False)
class DailyInputRecord(Base):
__tablename__ = "daily_inputs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
date: Mapped[Any] = mapped_column(Date, nullable=False, unique=True, index=True)
updated_at: Mapped[Any] = mapped_column(DateTime(timezone=True), nullable=False)
links: Mapped[list["DailyInputLinkRecord"]] = relationship(
back_populates="daily_input",
cascade="all, delete-orphan",
order_by="DailyInputLinkRecord.sort_order",
)
class DailyInputLinkRecord(Base):
__tablename__ = "daily_input_links"
__table_args__ = (
UniqueConstraint("daily_input_id", "account_id", "url_hash", name="uq_daily_input_account_url"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
daily_input_id: Mapped[int] = mapped_column(ForeignKey("daily_inputs.id", ondelete="CASCADE"), nullable=False)
account_id: Mapped[str] = mapped_column(ForeignKey("accounts.id"), nullable=False, index=True)
url: Mapped[str] = mapped_column(String(1024), nullable=False)
url_hash: Mapped[str] = mapped_column(String(64), nullable=False)
sort_order: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
daily_input: Mapped[DailyInputRecord] = relationship(back_populates="links")
class ReportRecord(Base):
__tablename__ = "reports"
date: Mapped[Any] = mapped_column(Date, primary_key=True)
generated_at: Mapped[Any] = mapped_column(DateTime(timezone=True), nullable=False)
summary: Mapped[str] = mapped_column(Text, nullable=False)
focus_sectors: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
article_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
account_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
articles: Mapped[list["ReportArticleRecord"]] = relationship(
back_populates="report",
cascade="all, delete-orphan",
order_by="ReportArticleRecord.sort_order",
)
class ReportArticleRecord(Base):
__tablename__ = "report_articles"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
report_date: Mapped[Any] = mapped_column(ForeignKey("reports.date", ondelete="CASCADE"), nullable=False, index=True)
sort_order: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
article_id: Mapped[str] = mapped_column(String(128), nullable=False)
account_id: Mapped[str] = mapped_column(String(64), nullable=False)
account_name: Mapped[str] = mapped_column(String(128), nullable=False)
title: Mapped[str] = mapped_column(String(255), nullable=False)
published_at: Mapped[str] = mapped_column(String(64), nullable=False)
summary: Mapped[str] = mapped_column(Text, nullable=False)
source_url: Mapped[str] = mapped_column(Text, nullable=False)
sectors: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
sentiment: Mapped[str] = mapped_column(String(16), nullable=False)
article_type: Mapped[str] = mapped_column(String(64), nullable=False)
report: Mapped[ReportRecord] = relationship(back_populates="articles")
class ClsNewsSnapshotRecord(Base):
__tablename__ = "cls_news_snapshots"
date: Mapped[Any] = mapped_column(Date, primary_key=True)
updated_at: Mapped[Any] = mapped_column(DateTime(timezone=True), nullable=False)
window_label: Mapped[str] = mapped_column(String(64), nullable=False)
overview: Mapped[str] = mapped_column(Text, nullable=False)
hot_topics: Mapped[str] = mapped_column(Text, nullable=False)
watch_list: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
sector_impacts: Mapped[list["ClsSectorImpactRecord"]] = relationship(
back_populates="snapshot",
cascade="all, delete-orphan",
order_by="ClsSectorImpactRecord.sort_order",
)
items: Mapped[list["ClsNewsItemRecord"]] = relationship(
back_populates="snapshot",
cascade="all, delete-orphan",
order_by="ClsNewsItemRecord.sort_order",
)
class ClsSectorImpactRecord(Base):
__tablename__ = "cls_sector_impacts"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
snapshot_date: Mapped[Any] = mapped_column(ForeignKey("cls_news_snapshots.date", ondelete="CASCADE"), nullable=False, index=True)
sort_order: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
sector: Mapped[str] = mapped_column(String(64), nullable=False)
sentiment: Mapped[str] = mapped_column(String(16), nullable=False)
reason: Mapped[str] = mapped_column(Text, nullable=False)
related_titles: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
snapshot: Mapped[ClsNewsSnapshotRecord] = relationship(back_populates="sector_impacts")
class ClsNewsItemRecord(Base):
__tablename__ = "cls_news_items"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
snapshot_date: Mapped[Any] = mapped_column(ForeignKey("cls_news_snapshots.date", ondelete="CASCADE"), nullable=False, index=True)
sort_order: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
item_id: Mapped[str] = mapped_column(String(128), nullable=False)
title: Mapped[str] = mapped_column(String(255), nullable=False)
published_at: Mapped[str] = mapped_column(String(64), nullable=False)
source: Mapped[str] = mapped_column(String(128), nullable=False)
summary: Mapped[str] = mapped_column(Text, nullable=False)
reference_url: Mapped[str] = mapped_column(Text, nullable=False)
sectors: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
sentiment: Mapped[str] = mapped_column(String(16), nullable=False)
snapshot: Mapped[ClsNewsSnapshotRecord] = relationship(back_populates="items")
def load_database_config() -> dict[str, Any]:
if not DATABASE_CONFIG_PATH.exists():
raise RuntimeError(f"Database config not found: {DATABASE_CONFIG_PATH}")
config = json.loads(DATABASE_CONFIG_PATH.read_text(encoding="utf-8"))
required_fields = ("host", "port", "username", "password", "database")
missing = [field for field in required_fields if not config.get(field)]
if missing:
raise RuntimeError(f"Database config is incomplete: {', '.join(missing)}")
if any(str(config.get(field)).strip() == "CHANGE_ME" for field in required_fields):
raise RuntimeError(f"Database config still contains placeholders: {DATABASE_CONFIG_PATH}")
config.setdefault("charset", "utf8mb4")
config.setdefault("echo", False)
return config
_ENGINE = None
_SESSION_FACTORY: sessionmaker[Session] | None = None
def parse_date(value: str) -> date:
return date.fromisoformat(value)
def parse_datetime(value: str) -> datetime:
return datetime.fromisoformat(value)
def hash_url(value: str) -> str:
return hashlib.sha256(value.encode("utf-8")).hexdigest()
def get_engine():
global _ENGINE
if _ENGINE is None:
config = load_database_config()
_ENGINE = create_engine(
(
f"mysql+pymysql://{config['username']}:{config['password']}"
f"@{config['host']}:{config['port']}/{config['database']}?charset={config['charset']}"
),
echo=bool(config.get("echo", False)),
future=True,
)
return _ENGINE
def get_session_factory() -> sessionmaker[Session]:
global _SESSION_FACTORY
if _SESSION_FACTORY is None:
_SESSION_FACTORY = sessionmaker(bind=get_engine(), autoflush=False, autocommit=False, future=True)
return _SESSION_FACTORY
def init_database() -> None:
Base.metadata.create_all(get_engine())
@contextmanager
def session_scope() -> Iterator[Session]:
session = get_session_factory()()
try:
yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
def save_accounts(accounts: list[Account]) -> None:
with session_scope() as session:
for account in accounts:
existing = session.get(AccountRecord, account.id)
if existing is None:
session.add(AccountRecord(id=account.id, name=account.name, description=account.description))
continue
existing.name = account.name
existing.description = account.description
def fetch_accounts() -> list[Account]:
with session_scope() as session:
records = session.scalars(select(AccountRecord).order_by(AccountRecord.id)).all()
return [Account(id=record.id, name=record.name, description=record.description) for record in records]
def fetch_daily_input_document(date_str: str) -> DailyInputDocument | None:
with session_scope() as session:
record = session.scalar(select(DailyInputRecord).where(DailyInputRecord.date == parse_date(date_str)))
if record is None:
return None
account_records = session.scalars(select(AccountRecord).order_by(AccountRecord.id)).all()
links_by_account: dict[str, list[str]] = {}
for link in record.links:
links_by_account.setdefault(link.account_id, []).append(link.url)
return DailyInputDocument(
date=str(record.date),
updated_at=record.updated_at.isoformat(timespec="seconds"),
accounts=[
DailyInputAccount(
account_id=account.id,
account_name=account.name,
links=links_by_account.get(account.id, []),
)
for account in account_records
],
)
def save_daily_input_document(document: DailyInputDocument) -> DailyInputDocument:
with session_scope() as session:
record = session.scalar(select(DailyInputRecord).where(DailyInputRecord.date == parse_date(document.date)))
if record is None:
record = DailyInputRecord(date=parse_date(document.date), updated_at=parse_datetime(document.updated_at))
session.add(record)
session.flush()
else:
record.updated_at = parse_datetime(document.updated_at)
record.links.clear()
session.flush()
sort_order = 0
for account in document.accounts:
for url in account.links:
record.links.append(
DailyInputLinkRecord(
account_id=account.account_id,
url=url,
url_hash=hash_url(url),
sort_order=sort_order,
)
)
sort_order += 1
return document
def fetch_report_document(date_str: str) -> ReportDocument | None:
with session_scope() as session:
record = session.get(ReportRecord, parse_date(date_str))
if record is None:
return None
return ReportDocument(
date=str(record.date),
generated_at=record.generated_at.isoformat(timespec="seconds"),
summary=record.summary,
focus_sectors=list(record.focus_sectors or []),
article_count=record.article_count,
account_count=record.account_count,
articles=[
OpinionArticle(
id=article.article_id,
account_id=article.account_id,
account_name=article.account_name,
title=article.title,
published_at=article.published_at,
summary=article.summary,
source_url=article.source_url,
sectors=list(article.sectors or []),
sentiment=article.sentiment,
article_type=article.article_type,
)
for article in record.articles
],
)
def save_report_document(document: ReportDocument) -> ReportDocument:
with session_scope() as session:
record = session.get(ReportRecord, parse_date(document.date))
if record is None:
record = ReportRecord(
date=parse_date(document.date),
generated_at=parse_datetime(document.generated_at),
summary=document.summary,
focus_sectors=document.focus_sectors,
article_count=document.article_count,
account_count=document.account_count,
)
session.add(record)
else:
record.generated_at = parse_datetime(document.generated_at)
record.summary = document.summary
record.focus_sectors = document.focus_sectors
record.article_count = document.article_count
record.account_count = document.account_count
record.articles.clear()
session.flush()
for index, article in enumerate(document.articles):
record.articles.append(
ReportArticleRecord(
sort_order=index,
article_id=article.id,
account_id=article.account_id,
account_name=article.account_name,
title=article.title,
published_at=article.published_at,
summary=article.summary,
source_url=article.source_url,
sectors=article.sectors,
sentiment=article.sentiment,
article_type=article.article_type,
)
)
return document
def fetch_report_list() -> list[ReportListItem]:
with session_scope() as session:
records = session.scalars(select(ReportRecord).order_by(ReportRecord.date.desc())).all()
return [
ReportListItem(
date=str(record.date),
generated_at=record.generated_at.isoformat(timespec="seconds"),
summary=record.summary,
article_count=record.article_count,
focus_sectors=list(record.focus_sectors or []),
)
for record in records
]
def fetch_cls_news_document(date_str: str) -> ClsNewsDocument | None:
with session_scope() as session:
record = session.get(ClsNewsSnapshotRecord, parse_date(date_str))
if record is None:
return None
return ClsNewsDocument(
date=str(record.date),
updated_at=record.updated_at.isoformat(timespec="seconds"),
window_label=record.window_label,
summary=ClsNewsSummary(
overview=record.overview,
hot_topics=record.hot_topics,
watch_list=list(record.watch_list or []),
),
sector_impacts=[
ClsSectorImpact(
sector=item.sector,
sentiment=item.sentiment,
reason=item.reason,
related_titles=list(item.related_titles or []),
)
for item in record.sector_impacts
],
items=[
ClsNewsItem(
id=item.item_id,
title=item.title,
published_at=item.published_at,
source=item.source,
summary=item.summary,
reference_url=item.reference_url,
sectors=list(item.sectors or []),
sentiment=item.sentiment,
)
for item in record.items
],
)
def save_cls_news_document(document: ClsNewsDocument) -> ClsNewsDocument:
with session_scope() as session:
record = session.get(ClsNewsSnapshotRecord, parse_date(document.date))
if record is None:
record = ClsNewsSnapshotRecord(
date=parse_date(document.date),
updated_at=parse_datetime(document.updated_at),
window_label=document.window_label,
overview=document.summary.overview,
hot_topics=document.summary.hot_topics,
watch_list=document.summary.watch_list,
)
session.add(record)
else:
record.updated_at = parse_datetime(document.updated_at)
record.window_label = document.window_label
record.overview = document.summary.overview
record.hot_topics = document.summary.hot_topics
record.watch_list = document.summary.watch_list
session.execute(
delete(ClsSectorImpactRecord).where(ClsSectorImpactRecord.snapshot_date == record.date)
)
session.execute(
delete(ClsNewsItemRecord).where(ClsNewsItemRecord.snapshot_date == record.date)
)
record.sector_impacts = []
record.items = []
session.flush()
for index, impact in enumerate(document.sector_impacts):
record.sector_impacts.append(
ClsSectorImpactRecord(
sort_order=index,
sector=impact.sector,
sentiment=impact.sentiment,
reason=impact.reason,
related_titles=impact.related_titles,
)
)
for index, item in enumerate(document.items):
record.items.append(
ClsNewsItemRecord(
sort_order=index,
item_id=item.id,
title=item.title,
published_at=item.published_at,
source=item.source,
summary=item.summary,
reference_url=item.reference_url,
sectors=item.sectors,
sentiment=item.sentiment,
)
)
return document