Initial commit
This commit is contained in:
20
tools/backfill_ashare_sector_history.py
Normal file
20
tools/backfill_ashare_sector_history.py
Normal file
@ -0,0 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
sys.path.insert(0, str(ROOT / "backend"))
|
||||
|
||||
from app.services.ashare_flow_service import ashare_flow_service # noqa: E402
|
||||
|
||||
|
||||
def main() -> None:
|
||||
result = ashare_flow_service.backfill_sector_daily_history()
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
20
tools/send_test_alert.py
Normal file
20
tools/send_test_alert.py
Normal file
@ -0,0 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
sys.path.insert(0, str(ROOT / "backend"))
|
||||
|
||||
from app.services.alert_service import alert_service # noqa: E402
|
||||
|
||||
|
||||
def main() -> None:
|
||||
result = alert_service.send_close_snapshot_test_alert()
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
234
tools/source_probe.py
Normal file
234
tools/source_probe.py
Normal file
@ -0,0 +1,234 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
OUTPUT_PATH = ROOT / "backend" / "data" / "source_probe_results.json"
|
||||
|
||||
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProbeResult:
|
||||
source: str
|
||||
page_url: str
|
||||
fetched: bool
|
||||
status_code: int | None = None
|
||||
title: str | None = None
|
||||
latest_date: str | None = None
|
||||
direct_table_access: bool = False
|
||||
pagination_access: bool = False
|
||||
realtime_hint: bool = False
|
||||
notes: list[str] = field(default_factory=list)
|
||||
error: str | None = None
|
||||
extracted: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def fetch(url: str, *, referer: str | None = None, timeout: int = 20) -> tuple[int, str]:
|
||||
headers = dict(DEFAULT_HEADERS)
|
||||
if referer:
|
||||
headers["Referer"] = referer
|
||||
request = Request(url, headers=headers)
|
||||
with urlopen(request, timeout=timeout) as response:
|
||||
content_type = response.headers.get_content_charset() or "utf-8"
|
||||
raw = response.read()
|
||||
try:
|
||||
html = raw.decode(content_type, "ignore")
|
||||
except LookupError:
|
||||
html = raw.decode("utf-8", "ignore")
|
||||
return response.status, html
|
||||
|
||||
|
||||
def fetch_gbk(url: str, *, referer: str | None = None, timeout: int = 20) -> tuple[int, str]:
|
||||
headers = dict(DEFAULT_HEADERS)
|
||||
if referer:
|
||||
headers["Referer"] = referer
|
||||
request = Request(url, headers=headers)
|
||||
with urlopen(request, timeout=timeout) as response:
|
||||
return response.status, response.read().decode("gbk", "ignore")
|
||||
|
||||
|
||||
def extract_title(html: str) -> str | None:
|
||||
match = re.search(r"<title>(.*?)</title>", html, re.S | re.I)
|
||||
if not match:
|
||||
return None
|
||||
return re.sub(r"\s+", " ", match.group(1)).strip()
|
||||
|
||||
|
||||
def extract_first_date(html: str) -> str | None:
|
||||
match = re.search(r"<td>(20\d{2}-\d{2}-\d{2})</td>", html)
|
||||
if match:
|
||||
return match.group(1)
|
||||
match = re.search(r"(20\d{2}-\d{2}-\d{2})", html)
|
||||
return match.group(1) if match else None
|
||||
|
||||
|
||||
def extract_page_info(html: str) -> str | None:
|
||||
match = re.search(r'<span class="page_info">([^<]+)</span>', html)
|
||||
return match.group(1).strip() if match else None
|
||||
|
||||
|
||||
def probe_eastmoney() -> ProbeResult:
|
||||
result = ProbeResult(
|
||||
source="eastmoney",
|
||||
page_url="https://data.eastmoney.com/hsgtV2/hsgtDetail/scgkDetail_nx.html",
|
||||
fetched=False,
|
||||
)
|
||||
try:
|
||||
status, html = fetch(result.page_url, referer="https://data.eastmoney.com/")
|
||||
result.fetched = True
|
||||
result.status_code = status
|
||||
result.title = extract_title(html)
|
||||
date_match = re.search(r"更新时间[:: ]*</[^>]+>\s*<[^>]*>(20\d{2}-\d{2}-\d{2})", html)
|
||||
fallback_date = re.search(r"(20\d{2}-\d{2}-\d{2})", html)
|
||||
result.latest_date = date_match.group(1) if date_match else (fallback_date.group(1) if fallback_date else None)
|
||||
result.direct_table_access = "南向资金" in html and "港股通(沪)" in html and "港股通(深)" in html
|
||||
result.pagination_access = "南向历史" in html or "历史数据" in html
|
||||
result.realtime_hint = "成交净买额(当日)" in html or "实时" in html
|
||||
result.notes.extend(
|
||||
[
|
||||
"公开页面可访问。",
|
||||
"页面文案包含南向资金、港股通(沪)、港股通(深)。",
|
||||
]
|
||||
)
|
||||
if result.latest_date:
|
||||
result.notes.append(f"页面中检出日期 {result.latest_date}。")
|
||||
result.extracted = {
|
||||
"contains_southbound": "南向资金" in html,
|
||||
"contains_shanghai": "港股通(沪)" in html,
|
||||
"contains_shenzhen": "港股通(深)" in html,
|
||||
"contains_today_net_buy": "成交净买额(当日)" in html,
|
||||
}
|
||||
except (HTTPError, URLError, TimeoutError) as exc:
|
||||
result.error = f"{type(exc).__name__}: {exc}"
|
||||
return result
|
||||
|
||||
|
||||
def probe_zhitong() -> ProbeResult:
|
||||
url = "https://www.zhitongcaijing.com/content/detail/1295067.html"
|
||||
result = ProbeResult(source="zhitong", page_url=url, fetched=False)
|
||||
try:
|
||||
status, html = fetch(url, referer="https://www.zhitongcaijing.com/")
|
||||
result.fetched = True
|
||||
result.status_code = status
|
||||
result.title = extract_title(html)
|
||||
date_match = re.search(r"(20\d{2}-\d{2}-\d{2})\s+\d{2}:\d{2}:\d{2}", html)
|
||||
result.latest_date = date_match.group(1) if date_match else extract_first_date(html)
|
||||
result.direct_table_access = False
|
||||
result.pagination_access = False
|
||||
result.realtime_hint = False
|
||||
t2 = "T+2" in html or "T+2" in html
|
||||
delayed = "延迟数据" in html or "T+2日结算" in html
|
||||
result.notes.extend(
|
||||
[
|
||||
"站点可访问,但当前命中的是资讯文章页。",
|
||||
"页面语义更偏新闻/统计解读,不是可直接分页拉取的标准数据表。",
|
||||
]
|
||||
)
|
||||
if t2 or delayed:
|
||||
result.notes.append("页面明确指向 T+2 或延迟数据。")
|
||||
result.extracted = {
|
||||
"contains_t_plus_2": t2,
|
||||
"contains_delay_notice": delayed,
|
||||
"contains_southbound": "南向资金" in html,
|
||||
}
|
||||
except (HTTPError, URLError, TimeoutError) as exc:
|
||||
result.error = f"{type(exc).__name__}: {exc}"
|
||||
return result
|
||||
|
||||
|
||||
def probe_wind() -> ProbeResult:
|
||||
url = "https://www.wind.com.cn/portal/zh/WFT/index.html"
|
||||
result = ProbeResult(source="wind", page_url=url, fetched=False)
|
||||
try:
|
||||
status, html = fetch(url, referer="https://www.wind.com.cn/")
|
||||
result.fetched = True
|
||||
result.status_code = status
|
||||
result.title = extract_title(html)
|
||||
result.latest_date = extract_first_date(html)
|
||||
result.direct_table_access = False
|
||||
result.pagination_access = False
|
||||
result.realtime_hint = "API" in html or "Client API" in html
|
||||
result.notes.extend(
|
||||
[
|
||||
"官方产品页可访问。",
|
||||
"当前拿到的是产品介绍页,不是公开南向资金网页数据表。",
|
||||
]
|
||||
)
|
||||
if result.realtime_hint:
|
||||
result.notes.append("页面包含 API/客户端能力描述,说明数据更可能通过授权终端或接口获取。")
|
||||
result.extracted = {
|
||||
"contains_client_api": "Client API" in html,
|
||||
"contains_excel_plugin": "Excel" in html,
|
||||
"contains_terminal": "金融终端" in html,
|
||||
}
|
||||
except (HTTPError, URLError, TimeoutError) as exc:
|
||||
result.error = f"{type(exc).__name__}: {exc}"
|
||||
return result
|
||||
|
||||
|
||||
def probe_ths_reference() -> ProbeResult:
|
||||
result = ProbeResult(
|
||||
source="ths_reference",
|
||||
page_url="https://data.10jqka.com.cn/hgt/ggtb/",
|
||||
fetched=False,
|
||||
)
|
||||
try:
|
||||
status, html = fetch_gbk(result.page_url, referer="https://data.10jqka.com.cn/")
|
||||
status_page_2, html_page_2 = fetch_gbk(
|
||||
"https://data.10jqka.com.cn/hgt/ggtb/board/getGgtPage/page/2/",
|
||||
referer=result.page_url,
|
||||
)
|
||||
result.fetched = True
|
||||
result.status_code = status
|
||||
result.title = extract_title(html)
|
||||
result.latest_date = extract_first_date(html)
|
||||
result.direct_table_access = "<table class=\"m-table J-ajax-table\">" in html
|
||||
result.pagination_access = "<table class=\"m-table J-ajax-table\">" in html_page_2 and status_page_2 == 200
|
||||
result.realtime_hint = False
|
||||
result.notes.extend(
|
||||
[
|
||||
"同花顺公开 HTML 历史表可抓取。",
|
||||
"分页可通过非 ajax=1 直达 URL 访问。",
|
||||
]
|
||||
)
|
||||
result.extracted = {
|
||||
"page_info": extract_page_info(html),
|
||||
"page_2_first_date": extract_first_date(html_page_2),
|
||||
"latest_date": result.latest_date,
|
||||
}
|
||||
except (HTTPError, URLError, TimeoutError) as exc:
|
||||
result.error = f"{type(exc).__name__}: {exc}"
|
||||
return result
|
||||
|
||||
|
||||
def main() -> None:
|
||||
results = [
|
||||
probe_eastmoney(),
|
||||
probe_zhitong(),
|
||||
probe_wind(),
|
||||
probe_ths_reference(),
|
||||
]
|
||||
payload = {
|
||||
"generated_at_utc": datetime.now(timezone.utc).isoformat(),
|
||||
"results": [asdict(item) for item in results],
|
||||
}
|
||||
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
OUTPUT_PATH.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
print(str(OUTPUT_PATH))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
33
tools/start_backend_10000.ps1
Normal file
33
tools/start_backend_10000.ps1
Normal file
@ -0,0 +1,33 @@
|
||||
$log = "C:\Users\Administrator\Desktop\nanxiang1\run_logs\backend-10000.log"
|
||||
New-Item -ItemType Directory -Force -Path "C:\Users\Administrator\Desktop\nanxiang1\run_logs" | Out-Null
|
||||
if (Test-Path $log) {
|
||||
Remove-Item $log -Force
|
||||
}
|
||||
|
||||
$psi = New-Object System.Diagnostics.ProcessStartInfo
|
||||
$psi.FileName = "C:\Users\Administrator\Miniforge3\python.exe"
|
||||
$psi.Arguments = "-m uvicorn app.main:app --app-dir backend --host 127.0.0.1 --port 10000"
|
||||
$psi.WorkingDirectory = "C:\Users\Administrator\Desktop\nanxiang1"
|
||||
$psi.UseShellExecute = $false
|
||||
$psi.RedirectStandardOutput = $true
|
||||
$psi.RedirectStandardError = $true
|
||||
|
||||
$process = New-Object System.Diagnostics.Process
|
||||
$process.StartInfo = $psi
|
||||
$null = $process.Start()
|
||||
|
||||
Start-Sleep -Seconds 5
|
||||
|
||||
if (-not $process.HasExited) {
|
||||
$stdout = $process.StandardOutput.ReadExisting()
|
||||
$stderr = $process.StandardError.ReadExisting()
|
||||
Set-Content -Path $log -Value ($stdout + $stderr) -Encoding UTF8
|
||||
Write-Output "STARTED_PID=$($process.Id)"
|
||||
Get-Content $log
|
||||
} else {
|
||||
$stdout = $process.StandardOutput.ReadToEnd()
|
||||
$stderr = $process.StandardError.ReadToEnd()
|
||||
Set-Content -Path $log -Value ($stdout + $stderr) -Encoding UTF8
|
||||
Write-Output "EXITED_CODE=$($process.ExitCode)"
|
||||
Get-Content $log
|
||||
}
|
||||
25
tools/sync_ashare_flows.py
Normal file
25
tools/sync_ashare_flows.py
Normal file
@ -0,0 +1,25 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
sys.path.insert(0, str(ROOT / "backend"))
|
||||
|
||||
from app.services.ashare_flow_service import ashare_flow_service # noqa: E402
|
||||
|
||||
|
||||
def main() -> None:
|
||||
result = {
|
||||
"index_realtime": ashare_flow_service.sync_index_realtime(),
|
||||
"sector_realtime": ashare_flow_service.sync_sector_realtime(),
|
||||
"index_history": ashare_flow_service.backfill_index_daily_history(),
|
||||
"sector_history": ashare_flow_service.backfill_sector_daily_history(),
|
||||
}
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
20
tools/sync_eastmoney.py
Normal file
20
tools/sync_eastmoney.py
Normal file
@ -0,0 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
sys.path.insert(0, str(ROOT / "backend"))
|
||||
|
||||
from app.services.eastmoney_sync_service import eastmoney_sync_service # noqa: E402
|
||||
|
||||
|
||||
def main() -> None:
|
||||
result = eastmoney_sync_service.sync()
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user