InvestmentStrategyBasedOnSentiment

Runtime error

App Files Files Community

InvestmentStrategyBasedOnSentiment / app.py

SelmaNajih001

Update app.py

58ad01d verified 2 months ago

raw

history blame contribute delete

9.93 kB

	import pandas as pd
	import yfinance as yf
	from datasets import Dataset, load_dataset
	from transformers import pipeline
	import plotly.graph_objects as go
	import gradio as gr
	from huggingface_hub import login
	import os

	# Login Hugging Face
	token = os.getenv("HF_TOKEN")
	login(token=token)

	# --- Costanti ---
	HF_DATASET = "SelmaNajih001/Cnbc_MultiCompany"
	HF_PRIVATE_DATASET = "SelmaNajih001/portfolio_strategy_data2"
	MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
	MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla"
	MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft"
	MODEL_FINBERT = "ProsusAI/finbert"

	TICKERS = {
	"Tesla": "TSLA", #Tesla, Inc.
	"Microsoft": "MSFT"
	}
	companies = list(TICKERS.keys())

	# --- Pipelines ---
	sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
	price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
	price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
	finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)

	# --- Caricamento dataset ---
	df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
	df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
	df_multi['date_merge'] = df_multi['date'].dt.normalize()
	df_multi.sort_values('date', inplace=True)

	try:
	ds_existing = load_dataset(HF_PRIVATE_DATASET)["train"]
	df_existing = pd.DataFrame(ds_existing)
	except:
	df_existing = pd.DataFrame()

	# --- Determina nuove righe ---
	if not df_existing.empty:
	df_to_add = df_multi[~df_multi['Date'].isin(df_existing['Date'])]
	else:
	df_to_add = df_multi.copy()

	# --- Calcolo solo sulle nuove righe ---
	df_to_add['Sentiment'] = ""
	df_to_add['Confidence'] = 0.0
	df_to_add['Predicted'] = 0.0
	df_to_add['FinBERT_Sentiment'] = ""
	df_to_add['FinBERT_Confidence'] = 0.0

	for i, row in df_to_add.iterrows():
	company = row['Company']

	# Custom sentiment
	try:
	res = sentiment_pipeline(row['Title'])[0]
	df_to_add.at[i,'Sentiment'] = res['label'].upper().strip()
	df_to_add.at[i,'Confidence'] = res['score']
	except:
	df_to_add.at[i,'Sentiment'] = 'ERROR'
	df_to_add.at[i,'Confidence'] = 0.0

	# FinBERT
	try:
	res_f = finbert_pipeline(row['Title'])[0]
	df_to_add.at[i,'FinBERT_Sentiment'] = res_f['label'].upper().strip()
	df_to_add.at[i,'FinBERT_Confidence'] = res_f['score']
	except:
	df_to_add.at[i,'FinBERT_Sentiment'] = 'ERROR'
	df_to_add.at[i,'FinBERT_Confidence'] = 0.0

	# Regression
	try:
	if company == "Tesla":
	val = price_pipeline_tesla(row['Title'])[0]['score']
	df_to_add.at[i,'Predicted'] = max(val, 1.0)
	elif company == "Microsoft":
	val = price_pipeline_msft(row['Title'])[0]['score']
	df_to_add.at[i,'Predicted'] = max(val, 1.0)
	except:
	df_to_add.at[i,'Predicted'] = 0.0

	# --- Aggiorna dataset esistente ---
	if not df_existing.empty:
	df_updated = pd.concat([df_existing, df_to_add], ignore_index=True)
	else:
	df_updated = df_to_add.copy()

	# --- Push su Hugging Face ---
	hf_dataset_updated = Dataset.from_pandas(df_updated)
	hf_dataset_updated.push_to_hub(HF_PRIVATE_DATASET, private=True)
	print(f"Dataset aggiornato su Hugging Face: {HF_PRIVATE_DATASET}")

	# --- Resto del codice (prezzi, strategie, Gradio) ---
	df_multi = df_updated.copy()

	prices = {}
	for company, ticker in TICKERS.items():
	start_date = df_multi[df_multi['Company']==company]['date'].min()
	end_date = pd.Timestamp.today()
	df_prices = yf.download(ticker, start=start_date, end=end_date)[['Close']].reset_index()
	df_prices.columns = ['Date_', f'Close_{ticker}']
	df_prices['date_merge'] = pd.to_datetime(df_prices['Date_']).dt.normalize()
	df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1)
	prices[company] = df_prices

	dfs_final = {}
	for company in companies:
	df_c = df_multi[df_multi['Company'] == company].copy()

	if company in prices:
	df_c = pd.merge(df_c, prices[company], on='date_merge', how='inner')

	df_c['Day'] = df_c['date'].dt.date
	df_c['Month'] = df_c['date'].dt.to_period('M').dt.to_timestamp()
	df_c['Year'] = df_c['date'].dt.year

	# Strategy A
	df_c['StrategyA_Cumulative'] = 0.0
	for i in range(1, len(df_c)):
	pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
	price = df_c.loc[i-1, f'Close_{TICKERS[company]}']
	if df_c.loc[i, 'Sentiment'] == "UP" and df_c.loc[i,'Confidence'] > 0.8:
	df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] + price * pct
	elif df_c.loc[i, 'Sentiment'] == "DOWN" and df_c.loc[i,'Confidence'] > 0.8:
	df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] - price * pct
	else:
	df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative']
	# Strategy B
	df_c['StrategyB_Cumulative'] = 0.0
	for i in range(1, len(df_c)):
	pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
	price = df_c.loc[i-1, f'Close_{TICKERS[company]}']
	predicted = df_c.loc[i, 'Predicted']
	if predicted > 1:
	df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] + price * pct
	elif predicted < -1:
	df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] - price * pct
	else:
	df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative']
	# Strategy C
	df_c['StrategyC_Cumulative'] = 0.0
	for i in range(1, len(df_c)):
	pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0
	price = df_c.loc[i-1, f'Close_{TICKERS[company]}']
	if df_c.loc[i, 'FinBERT_Sentiment'] == "POSITIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8:
	df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] + price * pct
	elif df_c.loc[i, 'FinBERT_Sentiment'] == "NEGATIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8:
	df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] - price * pct
	else:
	df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative']

	dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")

	# --- Funzione Gradio ---
	def show_company_data(selected_companies, aggregation="Day"):
	if not selected_companies:
	return pd.DataFrame(), None, None

	agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")

	fig_strat = go.Figure()
	fig_price = go.Figure()
	dfs_display = []

	for c in selected_companies:
	if c not in dfs_final:
	continue
	df_c = dfs_final[c]

	df_grouped = df_c.groupby(agg_col).agg({
	'StrategyA_Cumulative': 'last',
	'StrategyB_Cumulative': 'last',
	'StrategyC_Cumulative': 'last',
	f'Close_{TICKERS[c]}': 'last'
	}).reset_index()

	df_grouped['Company'] = c
	dfs_display.append(df_grouped)

	strategy_labels = {
	'StrategyA_Cumulative': "Custom Sentiment",
	'StrategyB_Cumulative': "Regression",
	'StrategyC_Cumulative': "FinBERT"
	}
	for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']:
	fig_strat.add_trace(go.Scatter(
	x=df_grouped[agg_col],
	y=df_grouped[strat],
	mode="lines",
	name=f"{c} - {strategy_labels[strat]}"
	))

	fig_price.add_trace(go.Scatter(
	x=df_grouped[agg_col],
	y=df_grouped[f'Close_{TICKERS[c]}'],
	mode="lines",
	name=f"{c} Price"
	))

	fig_strat.update_layout(
	title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)",
	xaxis_title=aggregation,
	yaxis_title="Cumulative Value",
	template="plotly_dark",
	hovermode="x unified"
	)

	fig_price.update_layout(
	title="Stock Prices",
	xaxis_title=aggregation,
	yaxis_title="Price",
	template="plotly_dark",
	hovermode="x unified"
	)

	#df_display = pd.concat(dfs_display, ignore_index=True) if dfs_display else pd.DataFrame()
	return fig_strat, fig_price

	# --- Gradio Interface ---
	description_text = """
	### Portfolio Strategy Comparison Dashboard
	This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla.
	- Strategy logic: Each model's score (or regression value) is used as a buy/sell signal.
	- If the score exceeds 0.8 → buy
	- If the score is below -0.8 → sell
	- Otherwise → no trade
	- For the regression model, thresholds are +1 and -1.
	"""

	companies = ["Microsoft", "Tesla"]

	with gr.Blocks() as demo:
	gr.Markdown("# Portfolio Strategy Dashboard")
	gr.Markdown(description_text)

	with gr.Row():
	dropdown_companies = gr.Dropdown(
	choices=companies,
	value=["Microsoft", "Tesla"],
	multiselect=True,
	label="Select Companies"
	)
	radio_aggregation = gr.Radio(
	choices=["Day", "Month", "Year"],
	value="Day",
	label="Aggregation Level"
	)
	submit_btn = gr.Button("Submit")

	#data_table = gr.Dataframe(label="Data Preview", type="pandas")
	strategies_plot = gr.Plot(label="Strategies")
	prices_plot = gr.Plot(label="Prices")

	submit_btn.click(
	fn=show_company_data,
	inputs=[dropdown_companies, radio_aggregation],
	outputs=[strategies_plot, prices_plot] #data_table in caso da aggiungere dopo
	)

	demo.launch()