import pandas as pd import gradio as gr from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from transformers import pipeline import numpy as np import matplotlib.pyplot as plt import wikipedia from transformers import BertTokenizer, AutoModelForSeq2SeqLM, pipeline from arabert.preprocess import ArabertPreprocessor # Load the Excel sheet df = pd.read_excel('stock_data.xlsx') # Prepare the data for the model X = df[['final price today', 'change today']].values y = df['future_price'].values """ Create a pipeline with a scaler and a linear regression model scaler is used to standrlize the data and make the model more accurate. and the linear regression model is used to predict the future price. """ model_pipeline = Pipeline([ ('scaler', StandardScaler()), ('regressor', LinearRegression()) ]) # Train the pipeline model_pipeline.fit(X, y) # Initialize the summary generation pipeline model_name="malmarjeh/mbert2mbert-arabic-text-summarization" preprocessor = ArabertPreprocessor(model_name="") tokenizer = BertTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) pipeline = pipeline("text2text-generation",model=model,tokenizer=tokenizer) # function for building the plot def plot_stock_prices(stock_name, current_price, future_price): plt.figure(figsize=(10, 20)) # Plot the bar chart plt.bar(['Final Price Today', 'Predicted Future Price'], [current_price, future_price], color=['blue', 'green']) # Add labels and title plt.xlabel('Price Type') plt.ylabel('Price') plt.title(f'Stock Prices for {stock_name}') # Annotate the bars with the actual values plt.text(0, current_price, f'{current_price:.2f}', ha='center', va='bottom') plt.text(1, future_price, f'{future_price:.2f}', ha='center', va='bottom') # Adjust y-axis limits if the difference is very small if abs(current_price - future_price) < 1: plt.ylim(min(current_price, future_price) - 1, max(current_price, future_price) + 1) # Save the plot plt.savefig('graph.png') plt.close() # Function to get stock information and predict future price and make the summary def get_stock_info(stock_name): # check if the name in the list stock_info = df[df['name'] == stock_name] if stock_info.empty: # Return a message and an empty graph plt.figure(figsize=(10, 6)) plt.text(0.5, 0.5, 'Stock not found', horizontalalignment='center', verticalalignment='center', fontsize=12) plt.axis('off') plt.savefig('not_found.png') plt.close() return {"Error": "Stock not found"}, 'not_found.png', 'training_plot.png' else: # Set Wikipedia language to Arabic wikipedia.set_lang("ar") try: # searching with the stock name and note that the use of the words "شركة" and "السعودية" is to make sure the search is about companies in saudi market wiki_summary = wikipedia.summary("شركة "+ stock_name + " السعودية", sentences=5) except wikipedia.exceptions.DisambiguationError as e: wiki_summary = f"Multiple entries found for {stock_name}: {e.options}" except wikipedia.exceptions.PageError: wiki_summary = "Wikipedia page not found for this company." # Setting the data to make the prediction and summary info = stock_info.iloc[0] current_price = info['final price today'] change_today = info['change today'] # making the prediction future_price = model_pipeline.predict([[current_price, change_today]])[0] summary_prompt = f"سهم شركة {stock_name} سعره الحالي{current_price} اليوم مع تغيير قدره {change_today}. ويتوقع ان يصبح سعره {future_price} والجدير بالذكر ان " text = preprocessor.preprocess(wiki_summary) # building the summary summary = pipeline(text, pad_token_id=tokenizer.eos_token_id, num_beams=3, repetition_penalty=3.0, max_length=300, length_penalty=1.0, no_repeat_ngram_size = 3)[0]['generated_text'] # Plotting the graph graph = plot_stock_prices('Example Stock', current_price, future_price) return { "اخر قيمة لليوم": current_price, "اعلى قيمة لليوم": info.get('highest price today', 'N/A'), "اقل قيمة لليوم": info.get('lowest price today', 'N/A'), "التغير": change_today, "نسبة التغير": info.get('percentage of change today', 'N/A'), "الحجم": info.get('size', 'N/A'), "اخر تحديث (بالساعة)": info.get('last update time', 'N/A'), "السعر المتوقع": future_price, "التوقع والملخص": summary_prompt + summary, "مقال ويكيبيديا الكامل": wiki_summary }, "graph.png" # Create the Gradio interface iface = gr.Interface( fn=get_stock_info, inputs=gr.Textbox(label="اسم السهم"), outputs=[gr.JSON(), gr.Image()], title="توقع اسعار سوق الاسهم السعودي", description="الرجاء ادخال اسم الشركة لاظهار التوقع ومعلومات عامة عن الشركة" ) # Launch the interface with debug=True for error handling iface.launch(debug=True)