Faisal-Data's picture
Update app.py
24e60ed verified
import pandas as pd
import gradio as gr
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from transformers import pipeline
import numpy as np
import matplotlib.pyplot as plt
import wikipedia
from transformers import BertTokenizer, AutoModelForSeq2SeqLM, pipeline
from arabert.preprocess import ArabertPreprocessor
# Load the Excel sheet
df = pd.read_excel('stock_data.xlsx')
# Prepare the data for the model
X = df[['final price today', 'change today']].values
y = df['future_price'].values
"""
Create a pipeline with a scaler and a linear regression model
scaler is used to standrlize the data and make the model more accurate.
and the linear regression model is used to predict the future price.
"""
model_pipeline = Pipeline([
('scaler', StandardScaler()),
('regressor', LinearRegression())
])
# Train the pipeline
model_pipeline.fit(X, y)
# Initialize the summary generation pipeline
model_name="malmarjeh/mbert2mbert-arabic-text-summarization"
preprocessor = ArabertPreprocessor(model_name="")
tokenizer = BertTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
pipeline = pipeline("text2text-generation",model=model,tokenizer=tokenizer)
# function for building the plot
def plot_stock_prices(stock_name, current_price, future_price):
plt.figure(figsize=(10, 20))
# Plot the bar chart
plt.bar(['Final Price Today', 'Predicted Future Price'], [current_price, future_price], color=['blue', 'green'])
# Add labels and title
plt.xlabel('Price Type')
plt.ylabel('Price')
plt.title(f'Stock Prices for {stock_name}')
# Annotate the bars with the actual values
plt.text(0, current_price, f'{current_price:.2f}', ha='center', va='bottom')
plt.text(1, future_price, f'{future_price:.2f}', ha='center', va='bottom')
# Adjust y-axis limits if the difference is very small
if abs(current_price - future_price) < 1:
plt.ylim(min(current_price, future_price) - 1, max(current_price, future_price) + 1)
# Save the plot
plt.savefig('graph.png')
plt.close()
# Function to get stock information and predict future price and make the summary
def get_stock_info(stock_name):
# check if the name in the list
stock_info = df[df['name'] == stock_name]
if stock_info.empty:
# Return a message and an empty graph
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, 'Stock not found', horizontalalignment='center', verticalalignment='center', fontsize=12)
plt.axis('off')
plt.savefig('not_found.png')
plt.close()
return {"Error": "Stock not found"}, 'not_found.png', 'training_plot.png'
else:
# Set Wikipedia language to Arabic
wikipedia.set_lang("ar")
try:
# searching with the stock name and note that the use of the words "شركة" and "السعودية" is to make sure the search is about companies in saudi market
wiki_summary = wikipedia.summary("شركة "+ stock_name + " السعودية", sentences=5)
except wikipedia.exceptions.DisambiguationError as e:
wiki_summary = f"Multiple entries found for {stock_name}: {e.options}"
except wikipedia.exceptions.PageError:
wiki_summary = "Wikipedia page not found for this company."
# Setting the data to make the prediction and summary
info = stock_info.iloc[0]
current_price = info['final price today']
change_today = info['change today']
# making the prediction
future_price = model_pipeline.predict([[current_price, change_today]])[0]
summary_prompt = f"سهم شركة {stock_name} سعره الحالي{current_price} اليوم مع تغيير قدره {change_today}. ويتوقع ان يصبح سعره {future_price} والجدير بالذكر ان "
text = preprocessor.preprocess(wiki_summary)
# building the summary
summary = pipeline(text,
pad_token_id=tokenizer.eos_token_id,
num_beams=3,
repetition_penalty=3.0,
max_length=300,
length_penalty=1.0,
no_repeat_ngram_size = 3)[0]['generated_text']
# Plotting the graph
graph = plot_stock_prices('Example Stock', current_price, future_price)
return {
"اخر قيمة لليوم": current_price,
"اعلى قيمة لليوم": info.get('highest price today', 'N/A'),
"اقل قيمة لليوم": info.get('lowest price today', 'N/A'),
"التغير": change_today,
"نسبة التغير": info.get('percentage of change today', 'N/A'),
"الحجم": info.get('size', 'N/A'),
"اخر تحديث (بالساعة)": info.get('last update time', 'N/A'),
"السعر المتوقع": future_price,
"التوقع والملخص": summary_prompt + summary,
"مقال ويكيبيديا الكامل": wiki_summary
}, "graph.png"
# Create the Gradio interface
iface = gr.Interface(
fn=get_stock_info,
inputs=gr.Textbox(label="اسم السهم"),
outputs=[gr.JSON(), gr.Image()],
title="توقع اسعار سوق الاسهم السعودي",
description="الرجاء ادخال اسم الشركة لاظهار التوقع ومعلومات عامة عن الشركة"
)
# Launch the interface with debug=True for error handling
iface.launch(debug=True)