Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import yfinance as yf | |
| from datasets import Dataset, load_dataset | |
| from transformers import pipeline | |
| import plotly.graph_objects as go | |
| import gradio as gr | |
| from huggingface_hub import login | |
| import os | |
| # Login Hugging Face | |
| token = os.getenv("HF_TOKEN") | |
| login(token=token) | |
| # --- Costanti --- | |
| HF_DATASET = "SelmaNajih001/Cnbc_MultiCompany" | |
| HF_PRIVATE_DATASET = "SelmaNajih001/portfolio_strategy_data2" | |
| MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation" | |
| MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla" | |
| MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft" | |
| MODEL_FINBERT = "ProsusAI/finbert" | |
| TICKERS = { | |
| "Tesla": "TSLA", #Tesla, Inc. | |
| "Microsoft": "MSFT" | |
| } | |
| companies = list(TICKERS.keys()) | |
| # --- Pipelines --- | |
| sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT) | |
| price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA) | |
| price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT) | |
| finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT) | |
| # --- Caricamento dataset --- | |
| df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"]) | |
| df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce') | |
| df_multi['date_merge'] = df_multi['date'].dt.normalize() | |
| df_multi.sort_values('date', inplace=True) | |
| try: | |
| ds_existing = load_dataset(HF_PRIVATE_DATASET)["train"] | |
| df_existing = pd.DataFrame(ds_existing) | |
| except: | |
| df_existing = pd.DataFrame() | |
| # --- Determina nuove righe --- | |
| if not df_existing.empty: | |
| df_to_add = df_multi[~df_multi['Date'].isin(df_existing['Date'])] | |
| else: | |
| df_to_add = df_multi.copy() | |
| # --- Calcolo solo sulle nuove righe --- | |
| df_to_add['Sentiment'] = "" | |
| df_to_add['Confidence'] = 0.0 | |
| df_to_add['Predicted'] = 0.0 | |
| df_to_add['FinBERT_Sentiment'] = "" | |
| df_to_add['FinBERT_Confidence'] = 0.0 | |
| for i, row in df_to_add.iterrows(): | |
| company = row['Company'] | |
| # Custom sentiment | |
| try: | |
| res = sentiment_pipeline(row['Title'])[0] | |
| df_to_add.at[i,'Sentiment'] = res['label'].upper().strip() | |
| df_to_add.at[i,'Confidence'] = res['score'] | |
| except: | |
| df_to_add.at[i,'Sentiment'] = 'ERROR' | |
| df_to_add.at[i,'Confidence'] = 0.0 | |
| # FinBERT | |
| try: | |
| res_f = finbert_pipeline(row['Title'])[0] | |
| df_to_add.at[i,'FinBERT_Sentiment'] = res_f['label'].upper().strip() | |
| df_to_add.at[i,'FinBERT_Confidence'] = res_f['score'] | |
| except: | |
| df_to_add.at[i,'FinBERT_Sentiment'] = 'ERROR' | |
| df_to_add.at[i,'FinBERT_Confidence'] = 0.0 | |
| # Regression | |
| try: | |
| if company == "Tesla": | |
| val = price_pipeline_tesla(row['Title'])[0]['score'] | |
| df_to_add.at[i,'Predicted'] = max(val, 1.0) | |
| elif company == "Microsoft": | |
| val = price_pipeline_msft(row['Title'])[0]['score'] | |
| df_to_add.at[i,'Predicted'] = max(val, 1.0) | |
| except: | |
| df_to_add.at[i,'Predicted'] = 0.0 | |
| # --- Aggiorna dataset esistente --- | |
| if not df_existing.empty: | |
| df_updated = pd.concat([df_existing, df_to_add], ignore_index=True) | |
| else: | |
| df_updated = df_to_add.copy() | |
| # --- Push su Hugging Face --- | |
| hf_dataset_updated = Dataset.from_pandas(df_updated) | |
| hf_dataset_updated.push_to_hub(HF_PRIVATE_DATASET, private=True) | |
| print(f"Dataset aggiornato su Hugging Face: {HF_PRIVATE_DATASET}") | |
| # --- Resto del codice (prezzi, strategie, Gradio) --- | |
| df_multi = df_updated.copy() | |
| prices = {} | |
| for company, ticker in TICKERS.items(): | |
| start_date = df_multi[df_multi['Company']==company]['date'].min() | |
| end_date = pd.Timestamp.today() | |
| df_prices = yf.download(ticker, start=start_date, end=end_date)[['Close']].reset_index() | |
| df_prices.columns = ['Date_', f'Close_{ticker}'] | |
| df_prices['date_merge'] = pd.to_datetime(df_prices['Date_']).dt.normalize() | |
| df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1) | |
| prices[company] = df_prices | |
| dfs_final = {} | |
| for company in companies: | |
| df_c = df_multi[df_multi['Company'] == company].copy() | |
| if company in prices: | |
| df_c = pd.merge(df_c, prices[company], on='date_merge', how='inner') | |
| df_c['Day'] = df_c['date'].dt.date | |
| df_c['Month'] = df_c['date'].dt.to_period('M').dt.to_timestamp() | |
| df_c['Year'] = df_c['date'].dt.year | |
| # Strategy A | |
| df_c['StrategyA_Cumulative'] = 0.0 | |
| for i in range(1, len(df_c)): | |
| pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0 | |
| price = df_c.loc[i-1, f'Close_{TICKERS[company]}'] | |
| if df_c.loc[i, 'Sentiment'] == "UP" and df_c.loc[i,'Confidence'] > 0.8: | |
| df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] + price * pct | |
| elif df_c.loc[i, 'Sentiment'] == "DOWN" and df_c.loc[i,'Confidence'] > 0.8: | |
| df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] - price * pct | |
| else: | |
| df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] | |
| # Strategy B | |
| df_c['StrategyB_Cumulative'] = 0.0 | |
| for i in range(1, len(df_c)): | |
| pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0 | |
| price = df_c.loc[i-1, f'Close_{TICKERS[company]}'] | |
| predicted = df_c.loc[i, 'Predicted'] | |
| if predicted > 1: | |
| df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] + price * pct | |
| elif predicted < -1: | |
| df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] - price * pct | |
| else: | |
| df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] | |
| # Strategy C | |
| df_c['StrategyC_Cumulative'] = 0.0 | |
| for i in range(1, len(df_c)): | |
| pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0 | |
| price = df_c.loc[i-1, f'Close_{TICKERS[company]}'] | |
| if df_c.loc[i, 'FinBERT_Sentiment'] == "POSITIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8: | |
| df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] + price * pct | |
| elif df_c.loc[i, 'FinBERT_Sentiment'] == "NEGATIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8: | |
| df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] - price * pct | |
| else: | |
| df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] | |
| dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore") | |
| # --- Funzione Gradio --- | |
| def show_company_data(selected_companies, aggregation="Day"): | |
| if not selected_companies: | |
| return pd.DataFrame(), None, None | |
| agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day") | |
| fig_strat = go.Figure() | |
| fig_price = go.Figure() | |
| dfs_display = [] | |
| for c in selected_companies: | |
| if c not in dfs_final: | |
| continue | |
| df_c = dfs_final[c] | |
| df_grouped = df_c.groupby(agg_col).agg({ | |
| 'StrategyA_Cumulative': 'last', | |
| 'StrategyB_Cumulative': 'last', | |
| 'StrategyC_Cumulative': 'last', | |
| f'Close_{TICKERS[c]}': 'last' | |
| }).reset_index() | |
| df_grouped['Company'] = c | |
| dfs_display.append(df_grouped) | |
| strategy_labels = { | |
| 'StrategyA_Cumulative': "Custom Sentiment", | |
| 'StrategyB_Cumulative': "Regression", | |
| 'StrategyC_Cumulative': "FinBERT" | |
| } | |
| for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']: | |
| fig_strat.add_trace(go.Scatter( | |
| x=df_grouped[agg_col], | |
| y=df_grouped[strat], | |
| mode="lines", | |
| name=f"{c} - {strategy_labels[strat]}" | |
| )) | |
| fig_price.add_trace(go.Scatter( | |
| x=df_grouped[agg_col], | |
| y=df_grouped[f'Close_{TICKERS[c]}'], | |
| mode="lines", | |
| name=f"{c} Price" | |
| )) | |
| fig_strat.update_layout( | |
| title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)", | |
| xaxis_title=aggregation, | |
| yaxis_title="Cumulative Value", | |
| template="plotly_dark", | |
| hovermode="x unified" | |
| ) | |
| fig_price.update_layout( | |
| title="Stock Prices", | |
| xaxis_title=aggregation, | |
| yaxis_title="Price", | |
| template="plotly_dark", | |
| hovermode="x unified" | |
| ) | |
| #df_display = pd.concat(dfs_display, ignore_index=True) if dfs_display else pd.DataFrame() | |
| return fig_strat, fig_price | |
| # --- Gradio Interface --- | |
| description_text = """ | |
| ### Portfolio Strategy Comparison Dashboard | |
| This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla. | |
| - **Strategy logic**: Each model's score (or regression value) is used as a buy/sell signal. | |
| - If the score exceeds 0.8 → buy | |
| - If the score is below -0.8 → sell | |
| - Otherwise → no trade | |
| - For the regression model, thresholds are +1 and -1. | |
| """ | |
| companies = ["Microsoft", "Tesla"] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Portfolio Strategy Dashboard") | |
| gr.Markdown(description_text) | |
| with gr.Row(): | |
| dropdown_companies = gr.Dropdown( | |
| choices=companies, | |
| value=["Microsoft", "Tesla"], | |
| multiselect=True, | |
| label="Select Companies" | |
| ) | |
| radio_aggregation = gr.Radio( | |
| choices=["Day", "Month", "Year"], | |
| value="Day", | |
| label="Aggregation Level" | |
| ) | |
| submit_btn = gr.Button("Submit") | |
| #data_table = gr.Dataframe(label="Data Preview", type="pandas") | |
| strategies_plot = gr.Plot(label="Strategies") | |
| prices_plot = gr.Plot(label="Prices") | |
| submit_btn.click( | |
| fn=show_company_data, | |
| inputs=[dropdown_companies, radio_aggregation], | |
| outputs=[strategies_plot, prices_plot] #data_table in caso da aggiungere dopo | |
| ) | |
| demo.launch() | |