File size: 26,835 Bytes
ac60dc3
 
 
 
 
cfe1a3c
 
 
 
 
 
 
 
 
 
 
ac60dc3
 
 
 
 
 
 
cfe1a3c
 
 
 
 
 
 
 
62f31c8
 
 
 
cfe1a3c
 
 
 
 
 
 
 
ac60dc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfe1a3c
ac60dc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfe1a3c
 
 
 
ac60dc3
 
cfe1a3c
ac60dc3
cfe1a3c
ac60dc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298d9ab
ac60dc3
 
 
 
 
 
cfe1a3c
62f31c8
 
ac60dc3
 
 
 
 
 
 
 
 
 
 
62f31c8
 
298d9ab
 
 
 
 
 
 
 
 
 
 
 
 
 
cfe1a3c
 
 
 
 
 
 
 
 
 
 
298d9ab
ac60dc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298d9ab
cfe1a3c
 
 
 
 
 
298d9ab
 
 
ac60dc3
298d9ab
 
 
cfe1a3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ec911f
 
 
9d3e0cb
 
 
 
 
 
 
 
8ec911f
 
 
 
 
 
 
 
 
9d3e0cb
 
 
 
ac60dc3
 
 
 
 
 
 
 
 
 
 
 
9d3e0cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ec911f
9d3e0cb
8ec911f
cfe1a3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d48ef09
cfe1a3c
 
d48ef09
 
cfe1a3c
 
 
 
 
 
ac60dc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfe1a3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d48ef09
cfe1a3c
62f31c8
cfe1a3c
62f31c8
cfe1a3c
 
62f31c8
cfe1a3c
 
62f31c8
cfe1a3c
 
62f31c8
cfe1a3c
 
 
62f31c8
cfe1a3c
 
 
d48ef09
 
cfe1a3c
 
d48ef09
 
cfe1a3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ec911f
cfe1a3c
 
 
 
 
 
 
d48ef09
cfe1a3c
8ec911f
 
 
d48ef09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
# ================================
# 版本切换开关 - 从配置文件导入
# ================================
from data_source_config import USE_YFINANCE_VERSION, API_TIMEOUT_SECONDS, MAX_RETRY_ATTEMPTS

import logging
import re
import pandas as pd
from datetime import datetime, timedelta
import time  # 导入标准库的 time 模块

import os
import requests
import threading
import asyncio

# 根据开关导入不同的模块
if USE_YFINANCE_VERSION:
    import yfinance as yf
    print("🔄 Using yfinance version (new)")
else:
    import akshare as ak
    print("🔄 Using akshare version (old)")

logging.basicConfig(level=logging.INFO)


# 获取当前文件的目录
base_dir = os.path.dirname(os.path.abspath(__file__))

# 构建CSV文件的绝对路径
nasdaq_100_path = os.path.join(base_dir, './model/nasdaq100.csv')
dow_jones_path = os.path.join(base_dir, './model/dji.csv')
sp500_path = os.path.join(base_dir, './model/sp500.csv')
nasdaq_composite_path = os.path.join(base_dir, './model/nasdaq_all.csv')
# 从CSV文件加载成分股数据
nasdaq_100_stocks = pd.read_csv(nasdaq_100_path)
dow_jones_stocks = pd.read_csv(dow_jones_path)
sp500_stocks = pd.read_csv(sp500_path)
nasdaq_composite_stocks = pd.read_csv(nasdaq_composite_path)


def fetch_stock_us_spot_data_with_retries():
    """根据开关选择不同的数据源获取股票列表"""
    if USE_YFINANCE_VERSION:
        return fetch_stock_us_spot_data_yfinance()
    else:
        return fetch_stock_us_spot_data_akshare()

def fetch_stock_us_spot_data_akshare():
    """原始的 akshare 实现"""
    if not USE_YFINANCE_VERSION:
        # 定义重试间隔时间序列(秒)
        retry_intervals = [10, 20, 60, 300, 600]
        retry_index = 0  # 初始重试序号

        while True:
            try:
                # 尝试获取API数据
                symbols = ak.stock_us_spot_em()
                return symbols  # 成功获取数据后返回

            except Exception as e:
                print(f"Error fetching data: {e}")

                # 获取当前重试等待时间
                wait_time = retry_intervals[retry_index]
                print(f"Retrying in {wait_time} seconds...")
                time.sleep(wait_time)  # 等待指定的秒数

                # 更新重试索引,但不要超出重试时间列表的范围
                retry_index = min(retry_index + 1, len(retry_intervals) - 1)
    else:
        print("Warning: akshare function called while using yfinance version")
        return pd.DataFrame()

def fetch_stock_us_spot_data_yfinance():
    """新的 yfinance 实现"""
    try:
        # 从本地CSV文件收集所有股票代码
        all_symbols = set()
        
        # 从各个指数CSV文件中提取股票代码
        for df, name in [
            (nasdaq_100_stocks, "NASDAQ-100"),
            (dow_jones_stocks, "Dow Jones"),
            (sp500_stocks, "S&P 500"),
            (nasdaq_composite_stocks, "NASDAQ Composite")
        ]:
            if 'Symbol' in df.columns:
                symbols_from_csv = df['Symbol'].dropna().astype(str).tolist()
                all_symbols.update(symbols_from_csv)
            elif 'Code' in df.columns:
                symbols_from_csv = df['Code'].dropna().astype(str).tolist()
                all_symbols.update(symbols_from_csv)
        
        # 添加一些常见的ETF和热门股票
        additional_symbols = [
            # 主要ETF
            'SPY', 'QQQ', 'IWM', 'VTI', 'ARKK', 'TQQQ', 'SQQQ', 'SPXL',
            # 热门科技股
            'AAPL', 'MSFT', 'GOOGL', 'GOOG', 'AMZN', 'TSLA', 'META', 'NVDA', 'NFLX',
            'AMD', 'INTC', 'ORCL', 'CRM', 'ADBE', 'PYPL', 'UBER', 'LYFT',
            # 中概股
            'BABA', 'JD', 'PDD', 'NIO', 'XPEV', 'LI', 'DIDI', 'TME',
            # 其他热门股票
            'COST', 'WMT', 'JPM', 'BAC', 'XOM', 'CVX', 'PFE', 'JNJ', 'KO', 'PEP'
        ]
        all_symbols.update(additional_symbols)
        
        # 创建DataFrame
        symbols_list = sorted(list(all_symbols))
        symbols_df = pd.DataFrame({
            '代码': symbols_list,
            '名称': [f'{symbol} Inc.' for symbol in symbols_list]  # 简单的名称映射
        })
        
        print(f"Created symbols dataframe with {len(symbols_df)} symbols using yfinance version")
        return symbols_df
        
    except Exception as e:
        print(f"Error creating symbols dataframe: {e}")
        # 返回基本的fallback数据
        fallback_symbols = [
            'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'NFLX',
            'SPY', 'QQQ', 'IWM', 'VTI'
        ]
        return pd.DataFrame({
            '代码': fallback_symbols,
            '名称': [f'{symbol} Inc.' for symbol in fallback_symbols]
        })



async def fetch_stock_us_spot_data_with_retries_async():
    """异步版本的股票数据获取,支持版本切换"""
    if USE_YFINANCE_VERSION:
        try:
            return await asyncio.to_thread(fetch_stock_us_spot_data_yfinance)
        except Exception as e:
            print(f"Error in async yfinance fetch: {e}")
            return pd.DataFrame()
    else:
        return await fetch_stock_us_spot_data_akshare_async()

async def fetch_stock_us_spot_data_akshare_async():
    """原始的 akshare 异步实现"""
    if not USE_YFINANCE_VERSION:
        retry_intervals = [10, 20]  # 减少重试次数
        retry_index = 0
        max_retries = 2  # 最多重试2次

        for attempt in range(max_retries + 1):
            try:
                # 添加30秒超时
                symbols = await asyncio.wait_for(
                    asyncio.to_thread(ak.stock_us_spot_em), 
                    timeout=30.0
                )
                return symbols
            except asyncio.TimeoutError:
                print(f"Timeout error fetching data (attempt {attempt + 1}/{max_retries + 1})")
            except Exception as e:
                print(f"Error fetching data (attempt {attempt + 1}/{max_retries + 1}): {e}")
            
            if attempt < max_retries:
                wait_time = retry_intervals[min(retry_index, len(retry_intervals) - 1)]
                print(f"Retrying in {wait_time} seconds...")
                await asyncio.sleep(wait_time)
                retry_index += 1
        
        # 如果所有重试都失败,返回空数据
        print("All retries failed, returning empty data")
        return pd.DataFrame()
    else:
        print("Warning: akshare async function called while using yfinance version")
        return pd.DataFrame()

symbols = None

def create_fallback_symbols():
    """创建fallback符号数据,用于测试"""
    fallback_symbols = [
        'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'NFLX',
        'SPY', 'QQQ', 'IWM', 'VTI'
    ]
    return pd.DataFrame({
        '代码': fallback_symbols,
        '名称': [f'{symbol} Inc.' for symbol in fallback_symbols]
    })

async def fetch_symbols():
    global symbols
    try:
        print("Starting symbols initialization...")
        # 异步获取数据
        symbols = await fetch_stock_us_spot_data_with_retries_async()
        if symbols is not None and not symbols.empty:
            print(f"Symbols initialized successfully: {len(symbols)} symbols loaded")
        else:
            print("Symbols initialization failed, using empty dataset")
            symbols = pd.DataFrame()
    except Exception as e:
        print(f"Error in fetch_symbols: {e}")
        symbols = pd.DataFrame()
    finally:
        print("Symbols initialization completed")


# 全局变量
index_us_stock_index_INX = None
index_us_stock_index_DJI = None
index_us_stock_index_IXIC = None
index_us_stock_index_NDX = None

def update_stock_indices():
    global index_us_stock_index_INX, index_us_stock_index_DJI, index_us_stock_index_IXIC, index_us_stock_index_NDX
    try:
        print("Starting stock indices update...")
        
        if USE_YFINANCE_VERSION:
            print("Updating indices using yfinance...")
            # 使用 yfinance 更新指数数据
            from datetime import datetime, timedelta
            
            # 计算日期范围
            end_date = datetime.now()
            start_date = end_date - timedelta(weeks=8)
            
            # 定义指数映射
            indices = {
                '^GSPC': 'INX',  # S&P 500
                '^DJI': 'DJI',   # Dow Jones
                '^IXIC': 'IXIC', # NASDAQ Composite
                '^NDX': 'NDX'    # NASDAQ 100
            }
            
            for yf_symbol, var_name in indices.items():
                try:
                    ticker = yf.Ticker(yf_symbol)
                    hist_data = ticker.history(start=start_date, end=end_date)
                    
                    if not hist_data.empty:
                        # 转换为与akshare相同的格式
                        formatted_data = pd.DataFrame({
                            'date': hist_data.index.strftime('%Y-%m-%d'),
                            '开盘': hist_data['Open'].values,
                            '收盘': hist_data['Close'].values,
                            '最高': hist_data['High'].values,
                            '最低': hist_data['Low'].values,
                            '成交量': hist_data['Volume'].values,
                            '成交额': (hist_data['Close'] * hist_data['Volume']).values
                        })
                        
                        # 设置全局变量
                        if var_name == 'INX':
                            index_us_stock_index_INX = formatted_data
                        elif var_name == 'DJI':
                            index_us_stock_index_DJI = formatted_data
                        elif var_name == 'IXIC':
                            index_us_stock_index_IXIC = formatted_data
                        elif var_name == 'NDX':
                            index_us_stock_index_NDX = formatted_data
                        
                        print(f"Successfully updated {var_name}: {len(formatted_data)} records")
                    else:
                        print(f"No data received for {yf_symbol}")
                        
                except Exception as e:
                    print(f"Error fetching {yf_symbol}: {e}")
        else:
            print("Updating indices using akshare...")
            # 使用 akshare 更新指数数据
            index_us_stock_index_INX = ak.index_us_stock_sina(symbol=".INX")
            index_us_stock_index_DJI = ak.index_us_stock_sina(symbol=".DJI")
            index_us_stock_index_IXIC = ak.index_us_stock_sina(symbol=".IXIC")
            index_us_stock_index_NDX = ak.index_us_stock_sina(symbol=".NDX")
        
        print("Stock indices updated successfully")
    except Exception as e:
        print(f"Error updating stock indices: {e}")

    # 设置定时器,每隔12小时更新一次
    threading.Timer(12 * 60 * 60, update_stock_indices).start()

# 程序开始时不立即更新,而是延迟启动
def start_indices_update():
    """延迟启动股票指数更新,避免阻塞应用启动"""
    threading.Timer(5, update_stock_indices).start()  # 5秒后开始第一次更新

# 延迟启动股票指数更新
start_indices_update()


# 创建列名转换的字典
column_mapping = {
    '日期': 'date',
    '开盘': 'open',
    '收盘': 'close',
    '最高': 'high',
    '最低': 'low',
    '成交量': 'volume',
    '成交额': 'amount',
    '振幅': 'amplitude',
    '涨跌幅': 'price_change_percentage',
    '涨跌额': 'price_change_amount',
    '换手率': 'turnover_rate'
}

# 定义一个标准的列顺序
standard_columns = ['date', 'open', 'close', 'high', 'low', 'volume', 'amount']


# 定义查找函数
def find_stock_entry(stock_code):
    # 使用 str.endswith 来匹配股票代码
    matching_row = symbols[symbols['代码'].str.endswith(stock_code)]
    # print(symbols)
    if not matching_row.empty:
        # print(f"股票代码 {stock_code} 找到, 代码为 {matching_row['代码'].values[0]}")
        return matching_row['代码'].values[0]
    else:
        return ""
    
    '''
    # 示例调用
    # 测试函数
    result = find_stock_entry('AAPL')

    if isinstance(result, pd.DataFrame) and not result.empty:
        # 如果找到的结果不为空,获取代码列的值
        code_value = result['代码'].values[0]
        print(code_value)
    else:
        print(result)
    '''


def reduce_columns(df, columns_to_keep):
    return df[columns_to_keep]
    

# 创建缓存字典
_price_cache = {}

def get_last_minute_stock_price(symbol: str, max_retries=3) -> float:
    """获取股票最新价格,使用30分钟缓存,并包含重试机制"""

    if not symbol:
        return -1.0
    if symbol == "NONE_SYMBOL_FOUND":
        return -1.0
    
    current_time = datetime.now()
    
    # 检查缓存
    if symbol in _price_cache:
        cached_price, cached_time = _price_cache[symbol]
        # 如果缓存时间在30分钟内,直接返回缓存的价格
        if current_time - cached_time < timedelta(minutes=30):
            return cached_price

    # 重试机制
    for attempt in range(max_retries):
        try:
            # 缓存无效或不存在,从yfinance获取新数据
            if USE_YFINANCE_VERSION:
                stock_data = yf.download(
                    symbol, 
                    period='1d', 
                    interval='5m',
                    progress=False,  # 禁用进度条
                    timeout=10  # 设置超时时间
                )
            else:
                # 使用akshare获取数据的逻辑
                ticker = ak.stock_us_hist(symbol=symbol, period="daily", start_date="20240101", end_date="20240201")
                stock_data = ticker if not ticker.empty else pd.DataFrame()
            
            if stock_data.empty:
                print(f"Warning: Empty data received for {symbol}, attempt {attempt + 1}/{max_retries}")
                if attempt == max_retries - 1:
                    return -1.0
                time.sleep(1)  # 等待1秒后重试
                continue
                
            latest_price = float(stock_data['Close'].iloc[-1])
            
            # 更新缓存
            _price_cache[symbol] = (latest_price, current_time)
            
            return latest_price

        except Exception as e:
            print(f"Error fetching price for {symbol}, attempt {attempt + 1}/{max_retries}: {str(e)}")
            if attempt == max_retries - 1:
                return -1.0
            time.sleep(1)  # 等待1秒后重试
    
    return -1.0

# 返回个股历史数据
def get_stock_history(symbol, news_date, retries=10):
    # 定义重试间隔时间序列(秒)
    retry_intervals = [10, 20, 60, 300, 600]
    retry_count = 0

    # 如果传入的symbol不包含数字前缀,则通过 find_stock_entry 获取完整的symbol
    if not any(char.isdigit() for char in symbol):
        full_symbol = find_stock_entry(symbol)
        if len(symbol) != 0 and full_symbol:
            symbol = full_symbol
        else:
            symbol = ""


    # 将news_date转换为datetime对象
    current_date = datetime.now()
    
    # 计算start_date和end_date
    start_date = (current_date - timedelta(days=60)).strftime("%Y%m%d")
    end_date = current_date.strftime("%Y%m%d")
    
    stock_hist_df = None
    retry_index = 0  # 初始化重试索引

    while retry_count <= retries and len(symbol) != 0:  # 无限循环重试
        try:
            # 根据版本开关选择不同的API
            if USE_YFINANCE_VERSION:
                # 使用 yfinance 获取数据
                ticker = yf.Ticker(symbol)
                # 将日期格式转换为 yfinance 期望的格式 (YYYY-MM-DD)
                yf_start_date = datetime.strptime(start_date, "%Y%m%d").strftime("%Y-%m-%d")
                yf_end_date = datetime.strptime(end_date, "%Y%m%d").strftime("%Y-%m-%d")
                
                stock_hist_df = ticker.history(start=yf_start_date, end=yf_end_date)

                if not stock_hist_df.empty:
                    # 转换为与akshare相同的格式
                    stock_hist_df = stock_hist_df.reset_index()
                    stock_hist_df = pd.DataFrame({
                        'date': stock_hist_df['Date'].dt.strftime('%Y-%m-%d'),
                        '开盘': stock_hist_df['Open'],
                        '收盘': stock_hist_df['Close'],
                        '最高': stock_hist_df['High'],
                        '最低': stock_hist_df['Low'],
                        '成交量': stock_hist_df['Volume'],
                        '成交额': stock_hist_df['Close'] * stock_hist_df['Volume'],
                        '振幅': 0,  # yfinance没有直接提供,设为0
                        '涨跌幅': 0,  # 可以计算,但这里简化为0
                        '涨跌额': 0,  # 可以计算,但这里简化为0
                        '换手率': 0   # yfinance没有直接提供,设为0
                    })
                else:
                    stock_hist_df = None
            else:
                # 使用 akshare 获取数据
                stock_hist_df = ak.stock_us_hist(symbol=symbol, period="daily", start_date=start_date, end_date=end_date, adjust="")

            if stock_hist_df is None or stock_hist_df.empty:  # 检查是否为空数据
                # print(f"No data for {symbol} on {news_date}.")
                stock_hist_df = None  # 将 DataFrame 设置为 None
            break


        except (requests.exceptions.Timeout, ConnectionError)  as e:
            print(f"Request timed out: {e}. Retrying...")
            retry_count += 1  # 增加重试次数
            continue


        except (TypeError, ValueError, BaseException) as e:
            print(f"Error {e} scraping data for {symbol} on {news_date}. Break...")
            # 可能是没数据,直接Break
            break

        # 如果发生异常,等待一段时间再重试
        wait_time = retry_intervals[retry_index]
        print(f"Waiting for {wait_time} seconds before retrying...")
        time.sleep(wait_time)
        retry_index = (retry_index + 1) if retry_index < len(retry_intervals) - 1 else retry_index  # 更新重试索引,不超过列表长度

    # 如果获取失败或数据为空,返回填充为0的 DataFrame
    if stock_hist_df is None or stock_hist_df.empty:
        # 构建一个空的 DataFrame,包含指定日期范围的空数据
        date_range = pd.date_range(start=start_date, end=end_date)
        stock_hist_df = pd.DataFrame({
            'date': date_range,
            '开盘': 0,
            '收盘': 0,
            '最高': 0,
            '最低': 0,
            '成交量': 0,
            '成交额': 0,
            '振幅': 0,
            '涨跌幅': 0,
            '涨跌额': 0,
            '换手率': 0
        })

        # 使用rename方法转换列名
        stock_hist_df = stock_hist_df.rename(columns=column_mapping)
        stock_hist_df = stock_hist_df.reindex(columns=standard_columns)
        # 处理个股数据,保留所需列
        stock_hist_df = reduce_columns(stock_hist_df, standard_columns)
        return stock_hist_df
    
    # 统一列名
    stock_hist_df = stock_hist_df.rename(columns=column_mapping)
    stock_hist_df = stock_hist_df.reindex(columns=standard_columns)
    # 处理个股数据,保留所需列
    stock_hist_df = reduce_columns(stock_hist_df, standard_columns)
    return stock_hist_df


    '''
    # 示例调用
    result = get_stock_history('AAPL', '20240214')
    print(result)
    '''
# result = get_stock_history('ATMU', '20231218')
# print(result)


# 返回个股所属指数历史数据
def get_stock_index_history(symbol, news_date, force_index=0):
    # 检查股票所属的指数
    if symbol in nasdaq_100_stocks['Symbol'].values or force_index == 1:
        index_code = ".NDX"
        index_data = index_us_stock_index_NDX
    elif symbol in dow_jones_stocks['Symbol'].values  or force_index == 2:
        index_code = ".DJI"
        index_data = index_us_stock_index_DJI
    elif symbol in sp500_stocks['Symbol'].values or force_index == 3:
        index_code = ".INX"
        index_data = index_us_stock_index_INX
    elif symbol in nasdaq_composite_stocks["Symbol"].values or symbol is None or symbol == "" or force_index == 4:
        index_code = ".IXIC"
        index_data = index_us_stock_index_IXIC
    else:
        # print(f"股票代码 {symbol} 不属于纳斯达克100、道琼斯工业、标准普尔500或纳斯达克综合指数。")
        index_code = ".IXIC"
        index_data = index_us_stock_index_IXIC

    # 获取当前日期
    current_date = datetime.now()

    # 计算 start_date 和 end_date
    start_date = (current_date - timedelta(weeks=8)).strftime("%Y-%m-%d")
    end_date = current_date.strftime("%Y-%m-%d")
    
    # 确保 index_data['date'] 是 datetime 类型
    index_data['date'] = pd.to_datetime(index_data['date'])

    # 从指数历史数据中提取指定日期范围的数据
    index_hist_df = index_data[(index_data['date'] >= start_date) & (index_data['date'] <= end_date)]
    
    # 统一列名
    index_hist_df = index_hist_df.rename(columns=column_mapping)
    index_hist_df = index_hist_df.reindex(columns=standard_columns)
    # 处理个股数据,保留所需列
    index_hist_df = reduce_columns(index_hist_df, standard_columns)
    return index_hist_df
    '''
    # 示例调用
    result = get_stock_index_history('AAPL', '20240214')
    print(result)
    '''


def find_stock_codes_or_names(entities):
    """
    从给定的实体列表中检索股票代码或公司名称。
    
    :param entities: 命名实体识别结果列表,格式为 [('实体名称', '实体类型'), ...]
    :return: 相关的股票代码列表
    """
    stock_codes = set()
    
    # 合并所有股票字典并清理数据,确保都是字符串
    all_symbols = pd.concat([nasdaq_100_stocks['Symbol'],
                            dow_jones_stocks['Symbol'],
                            sp500_stocks['Symbol'],
                            nasdaq_composite_stocks['Symbol']]).dropna().astype(str).unique().tolist()
    
    all_names = pd.concat([nasdaq_100_stocks['Name'],
                           nasdaq_composite_stocks['Name'],
                           sp500_stocks['Security'],
                           dow_jones_stocks['Company']]).dropna().astype(str).unique().tolist()
    
    # 创建一个 Name 到 Symbol 的映射
    name_to_symbol = {}
    for idx, name in enumerate(all_names):
        if idx < len(all_symbols):
            symbol = all_symbols[idx]
            name_to_symbol[name.lower()] = symbol
    
    # 查找实体映射到的股票代码
    for entity, entity_type in entities:
        entity_lower = entity.lower()
        entity_upper = entity.upper()

        # 检查 Symbol 列
        if entity_upper in all_symbols:
            stock_codes.add(entity_upper)
            #print(f"Matched symbol: {entity_upper}")

        # 检查 Name 列,确保完整匹配而不是部分匹配
        for name, symbol in name_to_symbol.items():
            # 使用正则表达式进行严格匹配
            pattern = rf'\b{re.escape(entity_lower)}\b'
            if re.search(pattern, name):
                stock_codes.add(symbol.upper())
                #print(f"Matched name/company: '{entity_lower}' in '{name}' -> {symbol.upper()}")

    #print(f"Stock codes found: {stock_codes}")
    if not stock_codes:
        return ['NONE_SYMBOL_FOUND']
    return list(stock_codes)


def process_history(stock_history, target_date, history_days=30, following_days=3):
    # 检查数据是否为空
    if stock_history.empty:
        return create_empty_data(history_days), create_empty_data(following_days)

    # 确保日期列存在并转换为datetime格式
    if 'date' not in stock_history.columns:
        return create_empty_data(history_days), create_empty_data(following_days)

    stock_history['date'] = pd.to_datetime(stock_history['date'])
    target_date = pd.to_datetime(target_date)
    
    # 按日期升序排序
    stock_history = stock_history.sort_values('date')
    
    # 找到目标日期对应的索引
    target_row = stock_history[stock_history['date'] <= target_date]
    if target_row.empty:
        return create_empty_data(history_days), create_empty_data(following_days)
    
    # 获取目标日期最近的行
    target_index = target_row.index[-1]
    target_pos = stock_history.index.get_loc(target_index)
    
    # 获取历史数据(包括目标日期)
    start_pos = max(0, target_pos - history_days + 1)
    previous_rows = stock_history.iloc[start_pos:target_pos + 1]
    
    # 获取后续数据
    following_rows = stock_history.iloc[target_pos + 1:target_pos + following_days + 1]
    
    # 删除日期列并确保数据完整性
    previous_rows = previous_rows.drop(columns=['date'])
    following_rows = following_rows.drop(columns=['date'])
    
    # 处理数据不足的情况
    previous_rows = handle_insufficient_data(previous_rows, history_days)
    following_rows = handle_insufficient_data(following_rows, following_days)
    
    return previous_rows.iloc[:, :6], following_rows.iloc[:, :6]

def create_empty_data(days):
    return pd.DataFrame({
        '开盘': [-1] * days,
        '收盘': [-1] * days,
        '最高': [-1] * days,
        '最低': [-1] * days,
        '成交量': [-1] * days,
        '成交额': [-1] * days
    })

def handle_insufficient_data(data, required_days):
    current_rows = len(data)
    if current_rows < required_days:
        missing_rows = required_days - current_rows
        empty_data = create_empty_data(missing_rows)
        return pd.concat([empty_data, data]).reset_index(drop=True)
    return data



if __name__ == "__main__":
    # 测试函数
    result = find_stock_entry('AAPL')
    print(f"find_stock_entry: {result}")
    result = get_stock_history('AAPL', '20240214')
    print(f"get_stock_history: {result}")
    result = get_stock_index_history('AAPL', '20240214')
    print(f"get_stock_index_history: {result}")
    result = find_stock_codes_or_names([('苹果', 'ORG'), ('苹果公司', 'ORG')])
    print(f"find_stock_codes_or_names: {result}")
    result = process_history(get_stock_history('AAPL', '20240214'), '20240214')
    print(f"process_history: {result}")
    result = process_history(get_stock_index_history('AAPL', '20240214'), '20240214')
    print(f"process_history: {result}")
    pass