mirror of
https://github.com/HKUDS/AI-Trader
synced 2026-04-21 21:47:54 +00:00
156 lines
3.4 KiB
Python
156 lines
3.4 KiB
Python
import glob
|
||
import json
|
||
import os
|
||
|
||
all_nasdaq_100_symbols = [
|
||
"NVDA",
|
||
"MSFT",
|
||
"AAPL",
|
||
"GOOG",
|
||
"GOOGL",
|
||
"AMZN",
|
||
"META",
|
||
"AVGO",
|
||
"TSLA",
|
||
"NFLX",
|
||
"PLTR",
|
||
"COST",
|
||
"ASML",
|
||
"AMD",
|
||
"CSCO",
|
||
"AZN",
|
||
"TMUS",
|
||
"MU",
|
||
"LIN",
|
||
"PEP",
|
||
"SHOP",
|
||
"APP",
|
||
"INTU",
|
||
"AMAT",
|
||
"LRCX",
|
||
"PDD",
|
||
"QCOM",
|
||
"ARM",
|
||
"INTC",
|
||
"BKNG",
|
||
"AMGN",
|
||
"TXN",
|
||
"ISRG",
|
||
"GILD",
|
||
"KLAC",
|
||
"PANW",
|
||
"ADBE",
|
||
"HON",
|
||
"CRWD",
|
||
"CEG",
|
||
"ADI",
|
||
"ADP",
|
||
"DASH",
|
||
"CMCSA",
|
||
"VRTX",
|
||
"MELI",
|
||
"SBUX",
|
||
"CDNS",
|
||
"ORLY",
|
||
"SNPS",
|
||
"MSTR",
|
||
"MDLZ",
|
||
"ABNB",
|
||
"MRVL",
|
||
"CTAS",
|
||
"TRI",
|
||
"MAR",
|
||
"MNST",
|
||
"CSX",
|
||
"ADSK",
|
||
"PYPL",
|
||
"FTNT",
|
||
"AEP",
|
||
"WDAY",
|
||
"REGN",
|
||
"ROP",
|
||
"NXPI",
|
||
"DDOG",
|
||
"AXON",
|
||
"ROST",
|
||
"IDXX",
|
||
"EA",
|
||
"PCAR",
|
||
"FAST",
|
||
"EXC",
|
||
"TTWO",
|
||
"XEL",
|
||
"ZS",
|
||
"PAYX",
|
||
"WBD",
|
||
"BKR",
|
||
"CPRT",
|
||
"CCEP",
|
||
"FANG",
|
||
"TEAM",
|
||
"CHTR",
|
||
"KDP",
|
||
"MCHP",
|
||
"GEHC",
|
||
"VRSK",
|
||
"CTSH",
|
||
"CSGP",
|
||
"KHC",
|
||
"ODFL",
|
||
"DXCM",
|
||
"TTD",
|
||
"ON",
|
||
"BIIB",
|
||
"LULU",
|
||
"CDW",
|
||
"GFS",
|
||
]
|
||
|
||
# 合并所有以 daily_price 开头的 json,逐文件一行写入 merged.jsonl
|
||
current_dir = os.path.dirname(__file__)
|
||
pattern = os.path.join(current_dir, "daily_price*.json")
|
||
files = sorted(glob.glob(pattern))
|
||
|
||
output_file = os.path.join(current_dir, "merged.jsonl")
|
||
|
||
with open(output_file, "w", encoding="utf-8") as fout:
|
||
for fp in files:
|
||
basename = os.path.basename(fp)
|
||
# 仅当文件名包含任一纳指100成分符号时才写入
|
||
if not any(symbol in basename for symbol in all_nasdaq_100_symbols):
|
||
continue
|
||
with open(fp, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
# 统一重命名:"1. open" -> "1. buy price";"4. close" -> "4. sell price"
|
||
# 对于最新的一天,只保留并写入 "1. buy price"
|
||
try:
|
||
# 查找所有以 "Time Series" 开头的键
|
||
series = None
|
||
for key, value in data.items():
|
||
if key.startswith("Time Series"):
|
||
series = value
|
||
break
|
||
if isinstance(series, dict) and series:
|
||
# 先对所有日期做键名重命名
|
||
for d, bar in list(series.items()):
|
||
if not isinstance(bar, dict):
|
||
continue
|
||
if "1. open" in bar:
|
||
bar["1. buy price"] = bar.pop("1. open")
|
||
if "4. close" in bar:
|
||
bar["4. sell price"] = bar.pop("4. close")
|
||
# 再处理最新日期,仅保留买入价
|
||
latest_date = max(series.keys())
|
||
latest_bar = series.get(latest_date, {})
|
||
if isinstance(latest_bar, dict):
|
||
buy_val = latest_bar.get("1. buy price")
|
||
series[latest_date] = {"1. buy price": buy_val} if buy_val is not None else {}
|
||
# 更新 Meta Data 描述
|
||
meta = data.get("Meta Data", {})
|
||
if isinstance(meta, dict):
|
||
meta["1. Information"] = "Daily Prices (buy price, high, low, sell price) and Volumes"
|
||
except Exception:
|
||
# 若结构异常则原样写入
|
||
pass
|
||
|
||
fout.write(json.dumps(data, ensure_ascii=False) + "\n")
|