Initial commit

This commit is contained in:
wanghep
2026-03-20 22:59:54 +08:00
commit 68b9e253e2
63 changed files with 8116 additions and 0 deletions

107
backend/app/main.py Normal file
View File

@ -0,0 +1,107 @@
from __future__ import annotations
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi import Query
from fastapi.middleware.cors import CORSMiddleware
from app.models import (
Account,
ClsNewsDocument,
DailyInputDocument,
DailyInputUpsertPayload,
ReportDocument,
ReportListItem,
)
from app.services.domain import (
get_accounts,
get_cls_news,
list_reports,
load_daily_input,
load_report,
normalize_daily_input,
normalize_date,
refresh_cls_news,
save_daily_input,
save_report,
seed_demo_content,
generate_report,
)
from app.services.storage import init_database
@asynccontextmanager
async def lifespan(_app: FastAPI):
init_database()
seed_demo_content()
yield
app = FastAPI(
title="WeChat Finance Daily",
version="0.1.0",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/api/health")
def health() -> dict[str, str]:
return {"status": "ok"}
@app.get("/api/accounts", response_model=list[Account])
def accounts() -> list[Account]:
return get_accounts()
@app.get("/api/daily-inputs/{date_str}", response_model=DailyInputDocument)
def get_daily_inputs(date_str: str) -> DailyInputDocument:
return load_daily_input(normalize_date(date_str))
@app.put("/api/daily-inputs/{date_str}", response_model=DailyInputDocument)
def put_daily_inputs(date_str: str, payload: DailyInputUpsertPayload) -> DailyInputDocument:
document = normalize_daily_input(normalize_date(date_str), payload)
return save_daily_input(document)
@app.post("/api/reports/{date_str}/generate", response_model=ReportDocument)
def generate_daily_report(date_str: str) -> ReportDocument:
normalized_date = normalize_date(date_str)
input_document = load_daily_input(normalized_date)
report = generate_report(normalized_date, input_document)
return save_report(report)
@app.get("/api/reports", response_model=list[ReportListItem])
def get_report_list() -> list[ReportListItem]:
return list_reports()
@app.get("/api/opinions/{date_str}", response_model=ReportDocument)
def get_opinion_report(date_str: str) -> ReportDocument:
normalized_date = normalize_date(date_str)
existing = load_report(normalized_date)
if existing is not None:
return existing
report = generate_report(normalized_date, load_daily_input(normalized_date))
return save_report(report)
@app.get("/api/cls-news", response_model=ClsNewsDocument)
def get_cls_news_payload(date: str | None = Query(default=None)) -> ClsNewsDocument:
return get_cls_news(date)
@app.post("/api/cls-news/refresh", response_model=ClsNewsDocument)
def refresh_cls_news_payload(date: str | None = Query(default=None)) -> ClsNewsDocument:
return refresh_cls_news(date)

98
backend/app/models.py Normal file
View File

@ -0,0 +1,98 @@
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel, Field
Sentiment = Literal["看多", "看空", "中性"]
class Account(BaseModel):
id: str
name: str
description: str
class DailyInputAccountPayload(BaseModel):
account_id: str
links: list[str] = Field(default_factory=list)
class DailyInputUpsertPayload(BaseModel):
accounts: list[DailyInputAccountPayload] = Field(default_factory=list)
class DailyInputAccount(BaseModel):
account_id: str
account_name: str
links: list[str] = Field(default_factory=list)
class DailyInputDocument(BaseModel):
date: str
updated_at: str
accounts: list[DailyInputAccount] = Field(default_factory=list)
class OpinionArticle(BaseModel):
id: str
account_id: str
account_name: str
title: str
published_at: str
summary: str
source_url: str
sectors: list[str] = Field(default_factory=list)
sentiment: Sentiment
article_type: str
class ReportDocument(BaseModel):
date: str
generated_at: str
summary: str
focus_sectors: list[str] = Field(default_factory=list)
article_count: int
account_count: int
articles: list[OpinionArticle] = Field(default_factory=list)
class ReportListItem(BaseModel):
date: str
generated_at: str
summary: str
article_count: int
focus_sectors: list[str] = Field(default_factory=list)
class ClsNewsItem(BaseModel):
id: str
title: str
published_at: str
source: str
summary: str
reference_url: str
sectors: list[str] = Field(default_factory=list)
sentiment: Sentiment
class ClsNewsSummary(BaseModel):
overview: str
hot_topics: str
watch_list: list[str] = Field(default_factory=list)
class ClsSectorImpact(BaseModel):
sector: str
sentiment: Sentiment
reason: str
related_titles: list[str] = Field(default_factory=list)
class ClsNewsDocument(BaseModel):
date: str
updated_at: str
window_label: str
summary: ClsNewsSummary
sector_impacts: list[ClsSectorImpact] = Field(default_factory=list)
items: list[ClsNewsItem] = Field(default_factory=list)

View File

@ -0,0 +1,733 @@
from __future__ import annotations
import json
import re
from collections import Counter
from datetime import datetime, time, timedelta
from typing import Iterable
from urllib.parse import unquote, urlparse
from zoneinfo import ZoneInfo
import requests
from bs4 import BeautifulSoup
from app.models import (
Account,
ClsNewsDocument,
ClsNewsItem,
ClsNewsSummary,
ClsSectorImpact,
DailyInputAccount,
DailyInputDocument,
DailyInputUpsertPayload,
OpinionArticle,
ReportDocument,
ReportListItem,
)
from app.services.storage import (
fetch_accounts,
fetch_cls_news_document,
fetch_daily_input_document,
fetch_report_document,
fetch_report_list,
save_accounts,
save_cls_news_document,
save_daily_input_document,
save_report_document,
)
SHANGHAI = ZoneInfo("Asia/Shanghai")
CLS_REFRESH_INTERVAL = timedelta(minutes=3)
CLS_TELEGRAPH_URL = "https://m.cls.cn/telegraph"
HTTP_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
),
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
}
SENTIMENT_BULL = "\u770b\u591a"
SENTIMENT_BEAR = "\u770b\u7a7a"
SENTIMENT_NEUTRAL = "\u4e2d\u6027"
ACCOUNTS: list[Account] = [
Account(
id="touzi-mingjian",
name="\u6295\u8d44\u660e\u89c1",
description="\u504f\u4e3b\u9898\u8f6e\u52a8\u4e0e\u4e3b\u7ebf\u5224\u65ad\uff0c\u9002\u5408\u8ddf\u8e2a\u5e02\u573a\u504f\u597d\u53d8\u5316\u3002",
),
Account(
id="aigujun-2020",
name="\u7231\u80a1\u541b2020",
description="\u5173\u6ce8\u60c5\u7eea\u3001\u70ed\u70b9\u6269\u6563\u4e0e\u4ea4\u6613\u7ec6\u8282\u3002",
),
Account(
id="mazhiming-shouping",
name="\u9a6c\u5fd7\u660e\u6536\u8bc4",
description="\u65e5\u5185\u6536\u8bc4\u4e0e\u60c5\u7eea\u53d8\u5316\u603b\u7ed3\u3002",
),
Account(
id="laobai-guandian",
name="\u8001\u767d\u5206\u6790\u5ba4\u89c2\u70b9",
description="\u504f\u7b56\u7565\u62c6\u89e3\u548c\u5173\u952e\u677f\u5757\u8ddf\u8e2a\u3002",
),
]
ACCOUNT_FOCUS = {
"touzi-mingjian": ["AI", "\u7b97\u529b", "\u673a\u5668\u4eba"],
"aigujun-2020": ["CPO", "\u5b58\u50a8\u82af\u7247", "\u65b0\u80fd\u6e90"],
"mazhiming-shouping": ["AI", "\u5238\u5546", "\u6c7d\u8f66"],
"laobai-guandian": ["\u673a\u5668\u4eba", "\u534a\u5bfc\u4f53", "\u65b0\u80fd\u6e90"],
}
SECTOR_KEYWORDS = {
"AI": ["ai", "\u4eba\u5de5\u667a\u80fd", "\u5927\u6a21\u578b", "\u6a21\u578b"],
"\u7b97\u529b": ["\u7b97\u529b", "compute", "server", "gpu"],
"CPO": ["cpo", "\u5149\u6a21\u5757", "\u9ad8\u901f\u4e92\u8054"],
"\u5b58\u50a8\u82af\u7247": ["\u5b58\u50a8", "memory", "dram", "nand"],
"\u534a\u5bfc\u4f53": ["\u534a\u5bfc\u4f53", "chip", "wafer", "\u6676\u5706"],
"\u5238\u5546": ["\u5238\u5546", "broker", "\u8bc1\u5238"],
"\u77f3\u6cb9\u5929\u7136\u6c14": ["\u77f3\u6cb9", "\u5929\u7136\u6c14", "\u6cb9\u6c14", "\u80fd\u6e90\u4ef7\u683c"],
"\u65b0\u80fd\u6e90": ["\u65b0\u80fd\u6e90", "\u9502\u7535", "\u5149\u4f0f", "\u50a8\u80fd"],
"\u519b\u5de5": ["\u519b\u5de5", "\u536b\u661f", "\u822a\u5929"],
"\u673a\u5668\u4eba": ["\u673a\u5668\u4eba", "robot", "\u81ea\u52a8\u5316"],
"\u6c7d\u8f66": ["\u6c7d\u8f66", "\u8f66\u4f01", "\u667a\u9a7e", "\u6574\u8f66"],
"\u533b\u836f": ["\u533b\u836f", "\u521b\u65b0\u836f", "\u533b\u7597"],
}
POSITIVE_KEYWORDS = [
"\u673a\u4f1a",
"\u4fee\u590d",
"\u589e\u5f3a",
"\u4e3b\u7ebf",
"\u589e\u91cf",
"\u53cd\u5f39",
"\u7a81\u7834",
"\u79ef\u6781",
"up",
"bull",
]
NEGATIVE_KEYWORDS = [
"\u98ce\u9669",
"\u627f\u538b",
"\u8c28\u614e",
"\u56de\u8c03",
"\u7f29\u91cf",
"\u89c2\u671b",
"\u5206\u6b67",
"bear",
"down",
]
ARTICLE_TYPE_PATTERNS = [
("\u6536\u8bc4", "\u5e02\u573a\u6536\u8bc4"),
("\u5348", "\u76d8\u4e2d\u89c2\u5bdf"),
("\u7b56\u7565", "\u7b56\u7565\u8ddf\u8e2a"),
("\u590d\u76d8", "\u76d8\u9762\u590d\u76d8"),
("\u884c\u4e1a", "\u884c\u4e1a\u89c2\u5bdf"),
]
CLS_NEWS_TEMPLATES = [
{
"title": "\u8d22\u8054\u793e\u76d8\u524d\u7cbe\u9009\uff1a\u7b97\u529b\u94fe\u56de\u6696\uff0c\u8d44\u91d1\u91cd\u65b0\u805a\u7126\u9ad8\u666f\u6c14\u65b9\u5411",
"summary": "\u9694\u591c\u5e02\u573a\u98ce\u9669\u504f\u597d\u56de\u5347\uff0c\u7b97\u529b\u4e0e\u670d\u52a1\u5668\u94fe\u6761\u83b7\u8d44\u91d1\u91cd\u65b0\u914d\u7f6e\uff0c\u60c5\u7eea\u4fee\u590d\u5148\u4e8e\u6210\u4ea4\u5168\u9762\u653e\u5927\u3002",
"sectors": ["\u7b97\u529b", "AI"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/compute-rebound",
},
{
"title": "AI Daily\uff1aCPO \u4e0e\u5b58\u50a8\u82af\u7247\u540c\u6b65\u8d70\u5f3a\uff0c\u666f\u6c14\u5ea6\u7ebf\u7d22\u5ef6\u7eed",
"summary": "\u9ad8\u901f\u4e92\u8054\u4e0e\u5b58\u50a8\u62a5\u4ef7\u9884\u671f\u652f\u6491\u677f\u5757\u8868\u73b0\uff0c\u8d44\u91d1\u66f4\u503e\u5411\u4e8e\u56f4\u7ed5\u786e\u5b9a\u6027\u73af\u8282\u96c6\u4e2d\u3002",
"sectors": ["CPO", "\u5b58\u50a8\u82af\u7247"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/ai-daily-cpo-memory",
},
{
"title": "\u8d22\u8054\u793e\u884c\u4e1a\u89c2\u5bdf\uff1a\u673a\u5668\u4eba\u94fe\u6761\u5206\u5316\uff0c\u8ba2\u5355\u5151\u73b0\u6210\u4e3a\u77ed\u671f\u7126\u70b9",
"summary": "\u673a\u5668\u4eba\u65b9\u5411\u5185\u90e8\u5f00\u59cb\u51fa\u73b0\u5151\u73b0\u4e0e\u6362\u624b\uff0c\u5e02\u573a\u4ece\u6982\u5ff5\u6269\u6563\u8f6c\u5411\u4e1a\u7ee9\u4e0e\u8ba2\u5355\u9a8c\u8bc1\u3002",
"sectors": ["\u673a\u5668\u4eba"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/robotics-orders",
},
{
"title": "\u8d22\u8054\u793e\u80fd\u6e90\u8ffd\u8e2a\uff1a\u6cb9\u6c14\u677f\u5757\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5207\u5411\u9632\u5fa1\u54c1\u79cd",
"summary": "\u539f\u6cb9\u4ef7\u683c\u7ef4\u6301\u9ad8\u4f4d\u540e\uff0c\u6cb9\u6c14\u65b9\u5411\u51fa\u73b0\u9ad8\u4f4d\u9707\u8361\uff0c\u90e8\u5206\u8d44\u91d1\u8f6c\u5411\u533b\u836f\u7b49\u9632\u5b88\u677f\u5757\u3002",
"sectors": ["\u77f3\u6cb9\u5929\u7136\u6c14", "\u533b\u836f"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/energy-rotation",
},
{
"title": "AI Daily\uff1a\u6c7d\u8f66\u4e0e\u667a\u9a7e\u5ef6\u7eed\u5206\u6b67\uff0c\u4e3b\u7ebf\u4ecd\u9700\u7b49\u5f85\u9500\u91cf\u6570\u636e\u9a8c\u8bc1",
"summary": "\u6574\u8f66\u4e0e\u667a\u9a7e\u65b9\u5411\u5173\u6ce8\u5ea6\u4ecd\u9ad8\uff0c\u4f46\u5e02\u573a\u5bf9\u4f30\u503c\u6269\u5f20\u5df2\u6709\u4fdd\u7559\uff0c\u7b49\u5f85\u9500\u91cf\u548c\u8ba2\u5355\u6570\u636e\u786e\u8ba4\u3002",
"sectors": ["\u6c7d\u8f66"],
"sentiment": SENTIMENT_BEAR,
"reference_url": "https://www.cls.cn/detail/auto-data-watch",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u534a\u5bfc\u4f53\u8bbe\u5907\u65b9\u5411\u8d70\u5f3a\uff0c\u673a\u6784\u79f0\u56fd\u4ea7\u66ff\u4ee3\u8282\u594f\u63d0\u901f",
"summary": "\u6676\u5706\u5236\u9020\u4e0e\u8bbe\u5907\u94fe\u6761\u51fa\u73b0\u5f02\u52a8\uff0c\u5e02\u573a\u56f4\u7ed5\u56fd\u4ea7\u66ff\u4ee3\u548c\u8d44\u672c\u5f00\u652f\u6062\u590d\u91cd\u65b0\u5b9a\u4ef7\u3002",
"sectors": ["\u534a\u5bfc\u4f53"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/semi-equipment-up",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u5238\u5546\u677f\u5757\u5348\u540e\u62c9\u5347\uff0c\u5e02\u573a\u60c5\u7eea\u6709\u6240\u4fee\u590d",
"summary": "\u6307\u6570\u9707\u8361\u8fc7\u7a0b\u4e2d\u5238\u5546\u627f\u62c5\u60c5\u7eea\u4fee\u590d\u529f\u80fd\uff0c\u5e26\u52a8\u90e8\u5206\u9ad8\u5f39\u6027\u65b9\u5411\u56de\u6696\u3002",
"sectors": ["\u5238\u5546"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/broker-rebound",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u521b\u65b0\u836f\u65b9\u5411\u6301\u7eed\u6d3b\u8dc3\uff0c\u8d44\u91d1\u8f6c\u5411\u9632\u5b88\u4e0e\u6210\u957f\u517c\u987e",
"summary": "\u533b\u836f\u677f\u5757\u83b7\u5f97\u589e\u91cf\u8d44\u91d1\u5173\u6ce8\uff0c\u521b\u65b0\u836f\u548c\u5668\u68b0\u7ec6\u5206\u8868\u73b0\u66f4\u5f3a\u3002",
"sectors": ["\u533b\u836f"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/medical-active",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u65b0\u80fd\u6e90\u94fe\u6761\u5206\u5316\u52a0\u5267\uff0c\u673a\u6784\u63d0\u9192\u5173\u6ce8\u4ea7\u80fd\u51fa\u6e05\u8282\u594f",
"summary": "\u65b0\u80fd\u6e90\u677f\u5757\u5185\u90e8\u8f6e\u52a8\u660e\u663e\uff0c\u8d44\u91d1\u66f4\u504f\u5411\u4f4e\u4f4d\u73af\u8282\u548c\u6210\u672c\u6539\u5584\u65b9\u5411\u3002",
"sectors": ["\u65b0\u80fd\u6e90"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/new-energy-split",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u519b\u5de5\u677f\u5757\u76d8\u4e2d\u5f02\u52a8\uff0c\u8ba2\u5355\u5151\u73b0\u9884\u671f\u91cd\u65b0\u5347\u6e29",
"summary": "\u519b\u5de5\u94fe\u6761\u76d8\u4e2d\u8d70\u5f3a\uff0c\u5e02\u573a\u5173\u6ce8\u540e\u7eed\u8ba2\u5355\u5151\u73b0\u4e0e\u4f30\u503c\u5207\u6362\u7a7a\u95f4\u3002",
"sectors": ["\u519b\u5de5"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/defense-orders",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u673a\u5668\u4eba\u677f\u5757\u51b2\u9ad8\u56de\u843d\uff0c\u77ed\u7ebf\u535a\u5f08\u60c5\u7eea\u5347\u6e29",
"summary": "\u673a\u5668\u4eba\u65b9\u5411\u9ad8\u4f4d\u9707\u8361\uff0c\u8d44\u91d1\u5728\u9898\u6750\u6269\u6563\u4e0e\u5151\u73b0\u538b\u529b\u4e4b\u95f4\u53cd\u590d\u5207\u6362\u3002",
"sectors": ["\u673a\u5668\u4eba"],
"sentiment": SENTIMENT_NEUTRAL,
"reference_url": "https://www.cls.cn/detail/robotics-intraday",
},
{
"title": "\u8d22\u8054\u793e7x24\uff1a\u5b58\u50a8\u82af\u7247\u62a5\u4ef7\u9884\u671f\u7ee7\u7eed\u4e0a\u4fee\uff0c\u4ea7\u4e1a\u94fe\u666f\u6c14\u5ea6\u53d7\u5173\u6ce8",
"summary": "\u5b58\u50a8\u73af\u8282\u4ef7\u683c\u4fee\u590d\u903b\u8f91\u5ef6\u7eed\uff0c\u5e02\u573a\u91cd\u65b0\u4ea4\u6613\u4f9b\u9700\u6539\u5584\u4e0e\u76c8\u5229\u5f39\u6027\u3002",
"sectors": ["\u5b58\u50a8\u82af\u7247"],
"sentiment": SENTIMENT_BULL,
"reference_url": "https://www.cls.cn/detail/memory-price-up",
},
]
SAMPLE_INPUTS = {
1: {
"touzi-mingjian": ["https://mp.weixin.qq.com/s/semiconductor-capacity-and-chip-cycle"],
"aigujun-2020": ["https://mp.weixin.qq.com/s/storage-chip-price-repair"],
"mazhiming-shouping": ["https://mp.weixin.qq.com/s/market-close-sector-rotation"],
"laobai-guandian": ["https://mp.weixin.qq.com/s/robotics-and-energy-balance"],
},
}
def now_local() -> datetime:
return datetime.now(SHANGHAI)
def iso_timestamp(value: datetime | None = None) -> str:
return (value or now_local()).replace(microsecond=0).isoformat()
def ensure_local_timezone(value: datetime) -> datetime:
if value.tzinfo is None:
return value.replace(tzinfo=SHANGHAI)
return value.astimezone(SHANGHAI)
def normalize_whitespace(value: str) -> str:
return re.sub(r"\s+", " ", value).strip()
def extract_json_object(script_text: str, marker: str) -> str:
marker_index = script_text.find(marker)
if marker_index < 0:
raise RuntimeError(f"Marker not found: {marker}")
start = script_text.find("{", marker_index)
if start < 0:
raise RuntimeError(f"JSON object start not found for marker: {marker}")
depth = 0
in_string = False
escaped = False
for index in range(start, len(script_text)):
char = script_text[index]
if in_string:
if escaped:
escaped = False
elif char == "\\":
escaped = True
elif char == '"':
in_string = False
continue
if char == '"':
in_string = True
continue
if char == "{":
depth += 1
continue
if char == "}":
depth -= 1
if depth == 0:
return script_text[start : index + 1]
raise RuntimeError(f"JSON object end not found for marker: {marker}")
def parse_telegraph_timestamp(date_str: str, time_str: str) -> str:
normalized_time = time_str if len(time_str.split(":")) == 3 else f"{time_str}:00"
return datetime.fromisoformat(f"{date_str}T{normalized_time}").replace(tzinfo=SHANGHAI).isoformat(timespec="seconds")
def split_title_and_summary(content: str) -> tuple[str, str]:
cleaned = normalize_whitespace(content)
bracket_match = re.match(r"^[\[({\u3010\u3016](.+?)[\])}\u3011\u3017][\uff1a: ]*(.*)$", cleaned)
if bracket_match:
title = normalize_whitespace(bracket_match.group(1))
summary = normalize_whitespace(bracket_match.group(2) or cleaned)
return title[:80], summary or title
sentence_parts = re.split(r"[。;;!?]", cleaned, maxsplit=1)
title = sentence_parts[0][:80]
summary = cleaned if len(cleaned) <= 220 else f"{cleaned[:217]}..."
return title, summary
def build_fallback_cls_items(reference_time: datetime) -> list[ClsNewsItem]:
items: list[ClsNewsItem] = []
for index, template in enumerate(CLS_NEWS_TEMPLATES):
published_at = (reference_time - timedelta(minutes=index * 95 + 8)).replace(microsecond=0).isoformat()
items.append(
ClsNewsItem(
id=f"cls-{index + 1}",
title=template["title"],
published_at=published_at,
source="\u8d22\u8054\u793e" if index % 2 == 0 else "\u8d22\u8054\u793e AI Daily",
summary=template["summary"],
reference_url=template["reference_url"],
sectors=template["sectors"],
sentiment=template["sentiment"],
)
)
return sorted(items, key=lambda item: item.published_at, reverse=True)
def fetch_cls_telegraph_items(reference_time: datetime) -> list[ClsNewsItem]:
session = requests.Session()
session.trust_env = False
response = session.get(CLS_TELEGRAPH_URL, headers=HTTP_HEADERS, timeout=15)
response.raise_for_status()
response.encoding = "utf-8"
soup = BeautifulSoup(response.text, "html.parser")
next_data_script = None
for script in soup.find_all("script"):
script_text = script.string or script.get_text()
if "__NEXT_DATA__ =" in script_text:
next_data_script = script_text
break
if not next_data_script:
raise RuntimeError("Missing __NEXT_DATA__ payload on cls.cn")
next_data = json.loads(extract_json_object(next_data_script, "__NEXT_DATA__ ="))
roll_data = (
next_data.get("props", {})
.get("initialState", {})
.get("roll_data", [])
)
if not isinstance(roll_data, list) or not roll_data:
raise RuntimeError("Missing roll_data in cls.cn payload")
target_date = reference_time.date()
items: list[ClsNewsItem] = []
seen_ids: set[int] = set()
latest_limit = 80
for entry in roll_data:
if len(items) >= latest_limit:
break
item_id = int(entry.get("id") or 0)
if not item_id or item_id in seen_ids:
continue
seen_ids.add(item_id)
timestamp = int(entry.get("modified_time") or entry.get("ctime") or 0)
if not timestamp:
continue
published_dt = datetime.fromtimestamp(timestamp, tz=SHANGHAI)
if published_dt.date() != target_date:
continue
raw_content = normalize_whitespace(
entry.get("content")
or entry.get("brief")
or entry.get("title")
or ""
)
if len(raw_content) < 8:
continue
title = normalize_whitespace(entry.get("title") or "")
if not title:
title, _ = split_title_and_summary(raw_content)
summary = normalize_whitespace(entry.get("brief") or "")
if not summary:
_, summary = split_title_and_summary(raw_content)
source = normalize_whitespace(entry.get("author") or "\u8d22\u8054\u793e7x24")
reference_url = normalize_whitespace(entry.get("shareurl") or "")
if not reference_url:
reference_url = f"https://www.cls.cn/detail/{item_id}"
sectors = infer_sectors(f"{title} {summary}", "touzi-mingjian")
sentiment = infer_sentiment(f"{title} {summary}")
items.append(
ClsNewsItem(
id=f"cls-live-{item_id}",
title=title[:120],
published_at=published_dt.isoformat(timespec="seconds"),
source=source,
summary=summary[:500],
reference_url=reference_url,
sectors=sectors,
sentiment=sentiment,
)
)
if not items:
raise RuntimeError("No telegraph items parsed from cls.cn")
return sorted(items, key=lambda item: item.published_at, reverse=True)
def get_accounts() -> list[Account]:
records = fetch_accounts()
return records or ACCOUNTS
def normalize_date(value: str) -> str:
return datetime.fromisoformat(value).date().isoformat()
def blank_daily_input(date_str: str) -> DailyInputDocument:
return DailyInputDocument(
date=date_str,
updated_at=iso_timestamp(),
accounts=[
DailyInputAccount(account_id=account.id, account_name=account.name, links=[])
for account in get_accounts()
],
)
def clean_links(links: Iterable[str]) -> list[str]:
normalized: list[str] = []
seen: set[str] = set()
for raw_link in links:
link = raw_link.strip()
if not link or link in seen:
continue
seen.add(link)
normalized.append(link)
return normalized
def normalize_daily_input(date_str: str, payload: DailyInputUpsertPayload) -> DailyInputDocument:
payload_map = {item.account_id: clean_links(item.links) for item in payload.accounts}
return DailyInputDocument(
date=date_str,
updated_at=iso_timestamp(),
accounts=[
DailyInputAccount(
account_id=account.id,
account_name=account.name,
links=payload_map.get(account.id, []),
)
for account in get_accounts()
],
)
def load_daily_input(date_str: str) -> DailyInputDocument:
payload = fetch_daily_input_document(date_str)
if payload is None:
return blank_daily_input(date_str)
return payload
def save_daily_input(document: DailyInputDocument) -> DailyInputDocument:
return save_daily_input_document(document)
def load_report(date_str: str) -> ReportDocument | None:
return fetch_report_document(date_str)
def save_report(document: ReportDocument) -> ReportDocument:
return save_report_document(document)
def list_reports() -> list[ReportListItem]:
return fetch_report_list()
def title_from_link(account_name: str, url: str, index: int) -> str:
text = unquote(urlparse(url).path or url)
tokens = [
token
for token in re.split(r"[\W_]+", text.lower())
if token and token not in {"s", "mp", "weixin", "qq", "com"}
]
meaningful = [token for token in tokens if len(token) > 1]
if meaningful:
topic = " / ".join(token.upper() if len(token) <= 3 else token.capitalize() for token in meaningful[:3])
return f"{account_name}\uff1a{topic} \u89c2\u5bdf"
return f"{account_name}\uff1a\u5e02\u573a\u8ddf\u8e2a\u7b2c {index + 1} \u6761"
def infer_sectors(text: str, account_id: str) -> list[str]:
lowered = text.lower()
sectors = [
sector
for sector, keywords in SECTOR_KEYWORDS.items()
if any(keyword.lower() in lowered for keyword in keywords)
]
if sectors:
return sectors[:3]
return ACCOUNT_FOCUS.get(account_id, ["AI", "\u7b97\u529b"])[:2]
def infer_sentiment(text: str) -> str:
lowered = text.lower()
positive = sum(keyword.lower() in lowered for keyword in POSITIVE_KEYWORDS)
negative = sum(keyword.lower() in lowered for keyword in NEGATIVE_KEYWORDS)
if positive > negative:
return SENTIMENT_BULL
if negative > positive:
return SENTIMENT_BEAR
return SENTIMENT_NEUTRAL
def infer_article_type(title: str) -> str:
lowered = title.lower()
for keyword, article_type in ARTICLE_TYPE_PATTERNS:
if keyword.lower() in lowered:
return article_type
return "\u4e3b\u9898\u89c2\u70b9"
def build_article_summary(title: str, sectors: list[str], sentiment: str) -> str:
sector_text = "\u3001".join(sectors[:2]) if sectors else "\u6838\u5fc3\u4e3b\u7ebf"
sentiment_text = {
SENTIMENT_BULL: "\u504f\u79ef\u6781\u7684\u8282\u594f\u5224\u65ad",
SENTIMENT_BEAR: "\u660e\u663e\u504f\u8c28\u614e\u7684\u98ce\u9669\u63d0\u9192",
SENTIMENT_NEUTRAL: "\u66f4\u5f3a\u8c03\u7ed3\u6784\u5206\u5316\u4e0e\u7b49\u5f85\u786e\u8ba4",
}[sentiment]
return f"{title} \u56f4\u7ed5 {sector_text} \u5c55\u5f00\uff0c\u7ed9\u51fa\u7684\u7ed3\u8bba\u662f{sentiment_text}\uff0c\u9002\u5408\u4f5c\u4e3a\u5f53\u65e5\u76d8\u9762\u8ddf\u8e2a\u4e0e\u590d\u76d8\u53c2\u8003\u3002"
def generate_report(date_str: str, input_document: DailyInputDocument) -> ReportDocument:
base_date = datetime.fromisoformat(date_str)
articles: list[OpinionArticle] = []
for account_index, account in enumerate(input_document.accounts):
for link_index, url in enumerate(account.links):
title = title_from_link(account.account_name, url, link_index)
sectors = infer_sectors(f"{title} {url}", account.account_id)
sentiment = infer_sentiment(f"{title} {url}")
published_at = (
base_date.replace(hour=9 + ((account_index + link_index) % 8), minute=(link_index * 12) % 60)
.replace(tzinfo=SHANGHAI)
.isoformat(timespec="seconds")
)
articles.append(
OpinionArticle(
id=f"{date_str}-{account.account_id}-{link_index}",
account_id=account.account_id,
account_name=account.account_name,
title=title,
published_at=published_at,
summary=build_article_summary(title, sectors, sentiment),
source_url=url,
sectors=sectors,
sentiment=sentiment,
article_type=infer_article_type(title),
)
)
if not articles:
return ReportDocument(
date=date_str,
generated_at=iso_timestamp(),
summary="\u5f53\u65e5\u5c1a\u672a\u5f55\u5165\u6587\u7ae0\u94fe\u63a5\uff0c\u7cfb\u7edf\u5df2\u4fdd\u7559\u65e5\u62a5\u7ed3\u6784\uff0c\u7b49\u5f85\u8865\u5145\u516c\u4f17\u53f7\u6587\u7ae0\u540e\u518d\u751f\u6210\u5b8c\u6574\u7ed3\u8bba\u3002",
focus_sectors=[],
article_count=0,
account_count=0,
articles=[],
)
sector_counter = Counter(sector for article in articles for sector in article.sectors)
focus_sectors = [sector for sector, _count in sector_counter.most_common(4)]
sentiment_counter = Counter(article.sentiment for article in articles)
if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
tone = "\u6574\u4f53\u504f\u79ef\u6781\uff0c\u4e3b\u7ebf\u8ba8\u8bba\u96c6\u4e2d\u5ea6\u8f83\u9ad8"
elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
tone = "\u6574\u4f53\u504f\u8c28\u614e\uff0c\u98ce\u9669\u63a7\u5236\u4ecd\u662f\u4e3b\u53d9\u4e8b"
else:
tone = "\u591a\u7a7a\u5206\u6b67\u5e76\u5b58\uff0c\u5e02\u573a\u66f4\u770b\u91cd\u9a8c\u8bc1\u4e0e\u8282\u594f"
active_accounts = len([account for account in input_document.accounts if account.links])
sector_text = "\u3001".join(focus_sectors) if focus_sectors else "\u6682\u65e0\u805a\u7126\u677f\u5757"
summary = (
f"{date_str} \u5171\u6574\u7406 {len(articles)} \u7bc7\u516c\u4f17\u53f7\u89c2\u70b9\uff0c\u8986\u76d6 {active_accounts} \u4e2a\u8d26\u6237\u3002"
f"{tone}\uff0c\u8ba8\u8bba\u91cd\u70b9\u843d\u5728 {sector_text}\u3002"
)
return ReportDocument(
date=date_str,
generated_at=iso_timestamp(),
summary=summary,
focus_sectors=focus_sectors,
article_count=len(articles),
account_count=active_accounts,
articles=sorted(articles, key=lambda item: item.published_at, reverse=True),
)
def build_cls_news_document(
reference_time: datetime | None = None,
*,
allow_live_fetch: bool = True,
) -> ClsNewsDocument:
current = reference_time or now_local()
try:
if allow_live_fetch:
items = fetch_cls_telegraph_items(current)
else:
raise RuntimeError("Live fetch disabled for non-current date")
except Exception:
items = build_fallback_cls_items(current)
sector_counter = Counter(sector for item in items for sector in item.sectors)
watch_list = [sector for sector, _count in sector_counter.most_common(5)]
overview = (
"\u8d44\u8baf\u5217\u8868\u5c55\u793a\u6240\u9009\u65e5\u671f\u5185\u7684\u8d22\u8054\u793e 7x24 \u8d44\u8baf\uff0c"
"\u5f53\u65e5\u6570\u636e\u6765\u81ea cls.cn \u5b9e\u65f6\u6293\u53d6\uff0c\u6bcf 3 \u5206\u949f\u66f4\u65b0\u4e00\u6b21\u3002"
)
hot_topics = (
"\u70ed\u70b9\u6982\u89c8\u53ea\u4fdd\u7559\u5bf9\u677f\u5757\u5b58\u5728\u660e\u663e\u5f71\u54cd\u7684\u65b9\u5411\uff0c"
f"\u5f53\u524d\u4e3b\u8981\u96c6\u4e2d\u5728 {'\u3001'.join(watch_list[:3])}\u3002"
)
sector_impacts: list[ClsSectorImpact] = []
seen_sectors: set[str] = set()
for sector in watch_list[:4]:
if sector in seen_sectors:
continue
seen_sectors.add(sector)
related_items = [item for item in items if sector in item.sectors]
if not related_items:
continue
sentiment_counter = Counter(item.sentiment for item in related_items)
if sentiment_counter[SENTIMENT_BULL] > sentiment_counter[SENTIMENT_BEAR]:
sentiment = SENTIMENT_BULL
reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u50ac\u5316\u6216\u666f\u6c14\u5f3a\u5316\uff0c\u77ed\u7ebf\u504f\u6b63\u5411\u5f71\u54cd\u3002"
elif sentiment_counter[SENTIMENT_BEAR] > sentiment_counter[SENTIMENT_BULL]:
sentiment = SENTIMENT_BEAR
reason = f"{sector} \u65b9\u5411\u51fa\u73b0\u5151\u73b0\u6216\u5206\u6b67\uff0c\u77ed\u7ebf\u504f\u8d1f\u5411\u5f71\u54cd\u3002"
else:
sentiment = SENTIMENT_NEUTRAL
reason = f"{sector} \u65b9\u5411\u6709\u8ba8\u8bba\u4f46\u4ecd\u9700\u9a8c\u8bc1\uff0c\u77ed\u7ebf\u4ee5\u4e2d\u6027\u89c2\u5bdf\u4e3a\u4e3b\u3002"
sector_impacts.append(
ClsSectorImpact(
sector=sector,
sentiment=sentiment,
reason=reason,
related_titles=list(dict.fromkeys(item.title for item in related_items[:2])),
)
)
return ClsNewsDocument(
date=current.date().isoformat(),
updated_at=iso_timestamp(current),
window_label="\u5f53\u5929\u8d44\u8baf",
summary=ClsNewsSummary(
overview=overview,
hot_topics=hot_topics,
watch_list=watch_list,
),
sector_impacts=sector_impacts,
items=items,
)
def load_cls_news(date_str: str) -> ClsNewsDocument | None:
return fetch_cls_news_document(date_str)
def build_reference_time(date_str: str) -> datetime:
date_value = datetime.fromisoformat(date_str).date()
if date_value == now_local().date():
return now_local()
return datetime.combine(date_value, time(hour=15, minute=0), tzinfo=SHANGHAI)
def refresh_cls_news(date_str: str | None = None) -> ClsNewsDocument:
normalized_date = normalize_date(date_str or now_local().date().isoformat())
existing = load_cls_news(normalized_date)
reference_time = build_reference_time(normalized_date)
allow_live_fetch = normalized_date == now_local().date().isoformat()
try:
document = build_cls_news_document(reference_time, allow_live_fetch=allow_live_fetch)
except Exception:
if existing is not None:
return existing
raise
return save_cls_news_document(document)
def get_cls_news(date_str: str | None = None) -> ClsNewsDocument:
normalized_date = normalize_date(date_str or now_local().date().isoformat())
document = load_cls_news(normalized_date)
if document is None:
return refresh_cls_news(normalized_date)
if normalized_date != now_local().date().isoformat():
return document
updated_at = ensure_local_timezone(datetime.fromisoformat(document.updated_at))
if now_local() - updated_at >= CLS_REFRESH_INTERVAL:
return refresh_cls_news(normalized_date)
return document
def seed_demo_content() -> None:
save_accounts(ACCOUNTS)
today = now_local().date()
for offset, account_links in SAMPLE_INPUTS.items():
date_str = (today - timedelta(days=offset)).isoformat()
if fetch_daily_input_document(date_str) is not None and fetch_report_document(date_str) is not None:
continue
payload = DailyInputUpsertPayload(
accounts=[
{"account_id": account.id, "links": account_links.get(account.id, [])}
for account in ACCOUNTS
]
)
input_document = normalize_daily_input(date_str, payload)
save_daily_input_document(input_document)
save_report_document(generate_report(date_str, input_document))
today_str = today.isoformat()
if fetch_cls_news_document(today_str) is None:
save_cls_news_document(build_cls_news_document())

View File

@ -0,0 +1,480 @@
from __future__ import annotations
import hashlib
import json
from contextlib import contextmanager
from datetime import date, datetime
from pathlib import Path
from typing import Any, Iterator
from sqlalchemy import JSON, Date, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, create_engine, delete, select
from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship, sessionmaker
from app.models import (
Account,
ClsNewsDocument,
ClsNewsItem,
ClsNewsSummary,
ClsSectorImpact,
DailyInputAccount,
DailyInputDocument,
OpinionArticle,
ReportDocument,
ReportListItem,
)
PROJECT_ROOT = Path(__file__).resolve().parents[3]
CONFIG_DIR = PROJECT_ROOT / "backend" / "config"
DATABASE_CONFIG_PATH = CONFIG_DIR / "database.json"
class Base(DeclarativeBase):
pass
class AccountRecord(Base):
__tablename__ = "accounts"
id: Mapped[str] = mapped_column(String(64), primary_key=True)
name: Mapped[str] = mapped_column(String(128), nullable=False)
description: Mapped[str] = mapped_column(String(255), nullable=False)
class DailyInputRecord(Base):
__tablename__ = "daily_inputs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
date: Mapped[Any] = mapped_column(Date, nullable=False, unique=True, index=True)
updated_at: Mapped[Any] = mapped_column(DateTime(timezone=True), nullable=False)
links: Mapped[list["DailyInputLinkRecord"]] = relationship(
back_populates="daily_input",
cascade="all, delete-orphan",
order_by="DailyInputLinkRecord.sort_order",
)
class DailyInputLinkRecord(Base):
__tablename__ = "daily_input_links"
__table_args__ = (
UniqueConstraint("daily_input_id", "account_id", "url_hash", name="uq_daily_input_account_url"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
daily_input_id: Mapped[int] = mapped_column(ForeignKey("daily_inputs.id", ondelete="CASCADE"), nullable=False)
account_id: Mapped[str] = mapped_column(ForeignKey("accounts.id"), nullable=False, index=True)
url: Mapped[str] = mapped_column(String(1024), nullable=False)
url_hash: Mapped[str] = mapped_column(String(64), nullable=False)
sort_order: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
daily_input: Mapped[DailyInputRecord] = relationship(back_populates="links")
class ReportRecord(Base):
__tablename__ = "reports"
date: Mapped[Any] = mapped_column(Date, primary_key=True)
generated_at: Mapped[Any] = mapped_column(DateTime(timezone=True), nullable=False)
summary: Mapped[str] = mapped_column(Text, nullable=False)
focus_sectors: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
article_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
account_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
articles: Mapped[list["ReportArticleRecord"]] = relationship(
back_populates="report",
cascade="all, delete-orphan",
order_by="ReportArticleRecord.sort_order",
)
class ReportArticleRecord(Base):
__tablename__ = "report_articles"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
report_date: Mapped[Any] = mapped_column(ForeignKey("reports.date", ondelete="CASCADE"), nullable=False, index=True)
sort_order: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
article_id: Mapped[str] = mapped_column(String(128), nullable=False)
account_id: Mapped[str] = mapped_column(String(64), nullable=False)
account_name: Mapped[str] = mapped_column(String(128), nullable=False)
title: Mapped[str] = mapped_column(String(255), nullable=False)
published_at: Mapped[str] = mapped_column(String(64), nullable=False)
summary: Mapped[str] = mapped_column(Text, nullable=False)
source_url: Mapped[str] = mapped_column(Text, nullable=False)
sectors: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
sentiment: Mapped[str] = mapped_column(String(16), nullable=False)
article_type: Mapped[str] = mapped_column(String(64), nullable=False)
report: Mapped[ReportRecord] = relationship(back_populates="articles")
class ClsNewsSnapshotRecord(Base):
__tablename__ = "cls_news_snapshots"
date: Mapped[Any] = mapped_column(Date, primary_key=True)
updated_at: Mapped[Any] = mapped_column(DateTime(timezone=True), nullable=False)
window_label: Mapped[str] = mapped_column(String(64), nullable=False)
overview: Mapped[str] = mapped_column(Text, nullable=False)
hot_topics: Mapped[str] = mapped_column(Text, nullable=False)
watch_list: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
sector_impacts: Mapped[list["ClsSectorImpactRecord"]] = relationship(
back_populates="snapshot",
cascade="all, delete-orphan",
order_by="ClsSectorImpactRecord.sort_order",
)
items: Mapped[list["ClsNewsItemRecord"]] = relationship(
back_populates="snapshot",
cascade="all, delete-orphan",
order_by="ClsNewsItemRecord.sort_order",
)
class ClsSectorImpactRecord(Base):
__tablename__ = "cls_sector_impacts"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
snapshot_date: Mapped[Any] = mapped_column(ForeignKey("cls_news_snapshots.date", ondelete="CASCADE"), nullable=False, index=True)
sort_order: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
sector: Mapped[str] = mapped_column(String(64), nullable=False)
sentiment: Mapped[str] = mapped_column(String(16), nullable=False)
reason: Mapped[str] = mapped_column(Text, nullable=False)
related_titles: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
snapshot: Mapped[ClsNewsSnapshotRecord] = relationship(back_populates="sector_impacts")
class ClsNewsItemRecord(Base):
__tablename__ = "cls_news_items"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
snapshot_date: Mapped[Any] = mapped_column(ForeignKey("cls_news_snapshots.date", ondelete="CASCADE"), nullable=False, index=True)
sort_order: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
item_id: Mapped[str] = mapped_column(String(128), nullable=False)
title: Mapped[str] = mapped_column(String(255), nullable=False)
published_at: Mapped[str] = mapped_column(String(64), nullable=False)
source: Mapped[str] = mapped_column(String(128), nullable=False)
summary: Mapped[str] = mapped_column(Text, nullable=False)
reference_url: Mapped[str] = mapped_column(Text, nullable=False)
sectors: Mapped[list[str]] = mapped_column(JSON, nullable=False, default=list)
sentiment: Mapped[str] = mapped_column(String(16), nullable=False)
snapshot: Mapped[ClsNewsSnapshotRecord] = relationship(back_populates="items")
def load_database_config() -> dict[str, Any]:
if not DATABASE_CONFIG_PATH.exists():
raise RuntimeError(f"Database config not found: {DATABASE_CONFIG_PATH}")
config = json.loads(DATABASE_CONFIG_PATH.read_text(encoding="utf-8"))
required_fields = ("host", "port", "username", "password", "database")
missing = [field for field in required_fields if not config.get(field)]
if missing:
raise RuntimeError(f"Database config is incomplete: {', '.join(missing)}")
if any(str(config.get(field)).strip() == "CHANGE_ME" for field in required_fields):
raise RuntimeError(f"Database config still contains placeholders: {DATABASE_CONFIG_PATH}")
config.setdefault("charset", "utf8mb4")
config.setdefault("echo", False)
return config
_ENGINE = None
_SESSION_FACTORY: sessionmaker[Session] | None = None
def parse_date(value: str) -> date:
return date.fromisoformat(value)
def parse_datetime(value: str) -> datetime:
return datetime.fromisoformat(value)
def hash_url(value: str) -> str:
return hashlib.sha256(value.encode("utf-8")).hexdigest()
def get_engine():
global _ENGINE
if _ENGINE is None:
config = load_database_config()
_ENGINE = create_engine(
(
f"mysql+pymysql://{config['username']}:{config['password']}"
f"@{config['host']}:{config['port']}/{config['database']}?charset={config['charset']}"
),
echo=bool(config.get("echo", False)),
future=True,
)
return _ENGINE
def get_session_factory() -> sessionmaker[Session]:
global _SESSION_FACTORY
if _SESSION_FACTORY is None:
_SESSION_FACTORY = sessionmaker(bind=get_engine(), autoflush=False, autocommit=False, future=True)
return _SESSION_FACTORY
def init_database() -> None:
Base.metadata.create_all(get_engine())
@contextmanager
def session_scope() -> Iterator[Session]:
session = get_session_factory()()
try:
yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
def save_accounts(accounts: list[Account]) -> None:
with session_scope() as session:
for account in accounts:
existing = session.get(AccountRecord, account.id)
if existing is None:
session.add(AccountRecord(id=account.id, name=account.name, description=account.description))
continue
existing.name = account.name
existing.description = account.description
def fetch_accounts() -> list[Account]:
with session_scope() as session:
records = session.scalars(select(AccountRecord).order_by(AccountRecord.id)).all()
return [Account(id=record.id, name=record.name, description=record.description) for record in records]
def fetch_daily_input_document(date_str: str) -> DailyInputDocument | None:
with session_scope() as session:
record = session.scalar(select(DailyInputRecord).where(DailyInputRecord.date == parse_date(date_str)))
if record is None:
return None
account_records = session.scalars(select(AccountRecord).order_by(AccountRecord.id)).all()
links_by_account: dict[str, list[str]] = {}
for link in record.links:
links_by_account.setdefault(link.account_id, []).append(link.url)
return DailyInputDocument(
date=str(record.date),
updated_at=record.updated_at.isoformat(timespec="seconds"),
accounts=[
DailyInputAccount(
account_id=account.id,
account_name=account.name,
links=links_by_account.get(account.id, []),
)
for account in account_records
],
)
def save_daily_input_document(document: DailyInputDocument) -> DailyInputDocument:
with session_scope() as session:
record = session.scalar(select(DailyInputRecord).where(DailyInputRecord.date == parse_date(document.date)))
if record is None:
record = DailyInputRecord(date=parse_date(document.date), updated_at=parse_datetime(document.updated_at))
session.add(record)
session.flush()
else:
record.updated_at = parse_datetime(document.updated_at)
record.links.clear()
session.flush()
sort_order = 0
for account in document.accounts:
for url in account.links:
record.links.append(
DailyInputLinkRecord(
account_id=account.account_id,
url=url,
url_hash=hash_url(url),
sort_order=sort_order,
)
)
sort_order += 1
return document
def fetch_report_document(date_str: str) -> ReportDocument | None:
with session_scope() as session:
record = session.get(ReportRecord, parse_date(date_str))
if record is None:
return None
return ReportDocument(
date=str(record.date),
generated_at=record.generated_at.isoformat(timespec="seconds"),
summary=record.summary,
focus_sectors=list(record.focus_sectors or []),
article_count=record.article_count,
account_count=record.account_count,
articles=[
OpinionArticle(
id=article.article_id,
account_id=article.account_id,
account_name=article.account_name,
title=article.title,
published_at=article.published_at,
summary=article.summary,
source_url=article.source_url,
sectors=list(article.sectors or []),
sentiment=article.sentiment,
article_type=article.article_type,
)
for article in record.articles
],
)
def save_report_document(document: ReportDocument) -> ReportDocument:
with session_scope() as session:
record = session.get(ReportRecord, parse_date(document.date))
if record is None:
record = ReportRecord(
date=parse_date(document.date),
generated_at=parse_datetime(document.generated_at),
summary=document.summary,
focus_sectors=document.focus_sectors,
article_count=document.article_count,
account_count=document.account_count,
)
session.add(record)
else:
record.generated_at = parse_datetime(document.generated_at)
record.summary = document.summary
record.focus_sectors = document.focus_sectors
record.article_count = document.article_count
record.account_count = document.account_count
record.articles.clear()
session.flush()
for index, article in enumerate(document.articles):
record.articles.append(
ReportArticleRecord(
sort_order=index,
article_id=article.id,
account_id=article.account_id,
account_name=article.account_name,
title=article.title,
published_at=article.published_at,
summary=article.summary,
source_url=article.source_url,
sectors=article.sectors,
sentiment=article.sentiment,
article_type=article.article_type,
)
)
return document
def fetch_report_list() -> list[ReportListItem]:
with session_scope() as session:
records = session.scalars(select(ReportRecord).order_by(ReportRecord.date.desc())).all()
return [
ReportListItem(
date=str(record.date),
generated_at=record.generated_at.isoformat(timespec="seconds"),
summary=record.summary,
article_count=record.article_count,
focus_sectors=list(record.focus_sectors or []),
)
for record in records
]
def fetch_cls_news_document(date_str: str) -> ClsNewsDocument | None:
with session_scope() as session:
record = session.get(ClsNewsSnapshotRecord, parse_date(date_str))
if record is None:
return None
return ClsNewsDocument(
date=str(record.date),
updated_at=record.updated_at.isoformat(timespec="seconds"),
window_label=record.window_label,
summary=ClsNewsSummary(
overview=record.overview,
hot_topics=record.hot_topics,
watch_list=list(record.watch_list or []),
),
sector_impacts=[
ClsSectorImpact(
sector=item.sector,
sentiment=item.sentiment,
reason=item.reason,
related_titles=list(item.related_titles or []),
)
for item in record.sector_impacts
],
items=[
ClsNewsItem(
id=item.item_id,
title=item.title,
published_at=item.published_at,
source=item.source,
summary=item.summary,
reference_url=item.reference_url,
sectors=list(item.sectors or []),
sentiment=item.sentiment,
)
for item in record.items
],
)
def save_cls_news_document(document: ClsNewsDocument) -> ClsNewsDocument:
with session_scope() as session:
record = session.get(ClsNewsSnapshotRecord, parse_date(document.date))
if record is None:
record = ClsNewsSnapshotRecord(
date=parse_date(document.date),
updated_at=parse_datetime(document.updated_at),
window_label=document.window_label,
overview=document.summary.overview,
hot_topics=document.summary.hot_topics,
watch_list=document.summary.watch_list,
)
session.add(record)
else:
record.updated_at = parse_datetime(document.updated_at)
record.window_label = document.window_label
record.overview = document.summary.overview
record.hot_topics = document.summary.hot_topics
record.watch_list = document.summary.watch_list
session.execute(
delete(ClsSectorImpactRecord).where(ClsSectorImpactRecord.snapshot_date == record.date)
)
session.execute(
delete(ClsNewsItemRecord).where(ClsNewsItemRecord.snapshot_date == record.date)
)
record.sector_impacts = []
record.items = []
session.flush()
for index, impact in enumerate(document.sector_impacts):
record.sector_impacts.append(
ClsSectorImpactRecord(
sort_order=index,
sector=impact.sector,
sentiment=impact.sentiment,
reason=impact.reason,
related_titles=impact.related_titles,
)
)
for index, item in enumerate(document.items):
record.items.append(
ClsNewsItemRecord(
sort_order=index,
item_id=item.id,
title=item.title,
published_at=item.published_at,
source=item.source,
summary=item.summary,
reference_url=item.reference_url,
sectors=item.sectors,
sentiment=item.sentiment,
)
)
return document

View File

@ -0,0 +1,9 @@
{
"host": "127.0.0.1",
"port": 3306,
"username": "your-user",
"password": "your-password",
"database": "your-database",
"charset": "utf8mb4",
"echo": false
}

View File

@ -0,0 +1,9 @@
{
"host": "152.136.100.182",
"port": 3306,
"username": "root",
"password": "4a3986024e6662f9e571782ece1587298291d18925b44f1f",
"database": "zixun",
"charset": "utf8mb4",
"echo": false
}

7
backend/requirements.txt Normal file
View File

@ -0,0 +1,7 @@
fastapi==0.116.1
uvicorn[standard]==0.35.0
pydantic==2.11.7
SQLAlchemy==2.0.36
PyMySQL==1.1.1
requests==2.32.3
beautifulsoup4==4.12.3

7
backend/run_backend.ps1 Normal file
View File

@ -0,0 +1,7 @@
$ErrorActionPreference = 'Stop'
$root = Split-Path -Parent $PSScriptRoot
Set-Location $root
python -m uvicorn app.main:app --app-dir backend --host 127.0.0.1 --port 3000

94
backend/verify_backend.py Normal file
View File

@ -0,0 +1,94 @@
from __future__ import annotations
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT / "backend"))
from app.main import ( # noqa: E402
accounts,
generate_daily_report,
get_cls_news_payload,
get_daily_inputs,
get_opinion_report,
get_report_list,
health,
put_daily_inputs,
)
from app.models import DailyInputUpsertPayload # noqa: E402
from app.services.storage import ( # noqa: E402
CLS_NEWS_PATH,
daily_input_path,
ensure_data_dirs,
report_path,
)
def assert_true(condition: bool, message: str) -> None:
if not condition:
raise AssertionError(message)
def run() -> None:
ensure_data_dirs()
date_str = "2026-03-19"
payload = DailyInputUpsertPayload(
accounts=[
{
"account_id": "touzi-mingjian",
"links": ["https://mp.weixin.qq.com/s/_l429HDdGFi18eOJpDujjA"],
},
{
"account_id": "aigujun-2020",
"links": ["https://mp.weixin.qq.com/s/1No9toallxkKRjpj4wZt9Q"],
},
{
"account_id": "mazhiming-shouping",
"links": ["https://mp.weixin.qq.com/s/i0vlr02f7Ydb9GvCa7idTw"],
},
{
"account_id": "laobai-guandian",
"links": ["https://mp.weixin.qq.com/s/hRqoxqoR8UNiWw3UEj6_yQ"],
},
]
)
saved_input = put_daily_inputs(date_str, payload)
generated_report = generate_daily_report(date_str)
loaded_input = get_daily_inputs(date_str)
loaded_report = get_opinion_report(date_str)
report_list = get_report_list()
cls_news = get_cls_news_payload()
assert_true(health()["status"] == "ok", "health endpoint failed")
assert_true(len(accounts()) == 4, "account count should be 4")
assert_true(len(saved_input.accounts) == 4, "saved daily input should contain 4 accounts")
assert_true(len(loaded_input.accounts) == 4, "loaded daily input should contain 4 accounts")
assert_true(generated_report.article_count == 4, "generated report should contain 4 articles")
assert_true(loaded_report.article_count == 4, "loaded report should contain 4 articles")
assert_true(any(item.date == date_str for item in report_list), "report list should include target date")
assert_true(len(cls_news.items) > 0, "cls news should not be empty")
assert_true(daily_input_path(date_str).exists(), "daily input json file missing")
assert_true(report_path(date_str).exists(), "report json file missing")
assert_true(CLS_NEWS_PATH.exists(), "cls news json file missing")
summary = {
"health": health(),
"accounts": [item.name for item in accounts()],
"daily_input_file": str(daily_input_path(date_str)),
"report_file": str(report_path(date_str)),
"cls_news_file": str(CLS_NEWS_PATH),
"daily_input_accounts": len(loaded_input.accounts),
"report_article_count": loaded_report.article_count,
"report_focus_sectors": loaded_report.focus_sectors,
"report_dates": [item.date for item in report_list[:5]],
"cls_news_items": len(cls_news.items),
}
print(json.dumps(summary, ensure_ascii=False, indent=2))
if __name__ == "__main__":
run()