adding new reddit group
Browse files- app.py +0 -188
- app_crypto_arima_model.py +76 -0
- app_crypto_rf_model.py +124 -0
- app_crypto_scrape.py +82 -0
- app_hf.py +225 -0
- requirements.txt +5 -16
- scrape_utils.py +139 -58
- tools/.DS_Store +0 -0
- tools/.chainlit/config.toml +84 -0
- tools/__pycache__/crypto_sentiment_analysis_util.cpython-311.pyc +0 -0
- tools/__pycache__/data_analyst.cpython-311.pyc +0 -0
- tools/crypto_sentiment_analysis_util.py +282 -0
- tools/data_analyst.py +42 -0
- tools/df_history.csv +63 -0
- tools/df_with_forecast.csv +113 -0
- tools/stock_sentiment_evalutor.py +261 -0
app.py
DELETED
@@ -1,188 +0,0 @@
|
|
1 |
-
from langchain_core.messages import BaseMessage, HumanMessage
|
2 |
-
from langchain_openai import ChatOpenAI
|
3 |
-
from typing import Annotated
|
4 |
-
import operator
|
5 |
-
from typing import Sequence, TypedDict
|
6 |
-
import numpy as np
|
7 |
-
import pandas as pd
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
import os
|
10 |
-
from typing import Annotated
|
11 |
-
import operator
|
12 |
-
from typing import Sequence, TypedDict
|
13 |
-
import matplotlib.pyplot as plt
|
14 |
-
from langchain.schema.output_parser import StrOutputParser
|
15 |
-
import streamlit as st
|
16 |
-
import requests
|
17 |
-
from requests import Request, Session
|
18 |
-
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
|
19 |
-
import json
|
20 |
-
|
21 |
-
st.set_page_config(page_title="LangChain Agent", layout="wide")
|
22 |
-
load_dotenv()
|
23 |
-
|
24 |
-
COINGECKO_API_KEY=os.environ["COINGECKO_API_KEY"]
|
25 |
-
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
26 |
-
|
27 |
-
llm = ChatOpenAI(model="gpt-3.5-turbo")
|
28 |
-
|
29 |
-
#======================== AGENTS ==================================
|
30 |
-
# The agent state is the input to each node in the graph
|
31 |
-
class AgentState(TypedDict):
|
32 |
-
# The annotation tells the graph that new messages will always
|
33 |
-
# be added to the current states
|
34 |
-
messages: Annotated[Sequence[BaseMessage], operator.add]
|
35 |
-
# The 'next' field indicates where to route to next
|
36 |
-
next: str
|
37 |
-
|
38 |
-
from langchain_core.runnables import RunnableConfig
|
39 |
-
st.title("💬 Krypto")
|
40 |
-
|
41 |
-
#@st.cache_data
|
42 |
-
|
43 |
-
#@st.cache_resource
|
44 |
-
#def initialize_session_state():
|
45 |
-
if "chat_history" not in st.session_state:
|
46 |
-
st.session_state["messages"] = [{"role":"system", "content":"""
|
47 |
-
How can I help you?
|
48 |
-
"""}]
|
49 |
-
|
50 |
-
#initialize_session_state()
|
51 |
-
|
52 |
-
# Streamlit UI elements
|
53 |
-
|
54 |
-
#st.text("Start by entering the currency.")
|
55 |
-
|
56 |
-
sideb = st.sidebar
|
57 |
-
|
58 |
-
with st.sidebar:
|
59 |
-
#st.subheader("This is the LangGraph workflow visualization of this application rendered in real-time.")
|
60 |
-
#st.image(create_graph_image())
|
61 |
-
|
62 |
-
title = st.text_input("Start by entering the currency name:")
|
63 |
-
|
64 |
-
check1 = sideb.button(f"analyze {title}")
|
65 |
-
results=[]
|
66 |
-
|
67 |
-
if check1:
|
68 |
-
st.write(f"I am now producing analysis for {title}")
|
69 |
-
|
70 |
-
model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
|
71 |
-
chain= model | StrOutputParser()
|
72 |
-
result=chain.invoke(f"You are a cryptocurrency data analyst.\
|
73 |
-
Provide correct cryptocurrency ticker from Coingecko website for cryptocurrency: {title}.\
|
74 |
-
Expected output: ticker.\
|
75 |
-
Provide it in the following format: >>cryptocurrencyticker>> \
|
76 |
-
for example: >>BTC>>")
|
77 |
-
|
78 |
-
|
79 |
-
# for s in graph_data.stream(inputs):
|
80 |
-
# for key, value in s.items():
|
81 |
-
# print(f"Finished running: {value}:")
|
82 |
-
# result = value["messages"][-1].content
|
83 |
-
# results.append(value["messages"][-1].content)
|
84 |
-
|
85 |
-
print(result)
|
86 |
-
print('ticker',str(result).split(">>")[0])
|
87 |
-
if len(str(result).split(">>")[1])<10:
|
88 |
-
cryptocurrencyticker=(str(result).split(">>")[1])
|
89 |
-
else:
|
90 |
-
cryptocurrencyticker=(str(result).split(">>")[0])
|
91 |
-
cryptocurrency=title
|
92 |
-
|
93 |
-
print(cryptocurrency,cryptocurrencyticker)
|
94 |
-
print('here')
|
95 |
-
|
96 |
-
# # 1. Scrape historical Price and Volume currency data
|
97 |
-
# from datetime import date
|
98 |
-
# today = date.today()
|
99 |
-
# Day_end = today.strftime("%d")
|
100 |
-
# Month_end = today.strftime("%m")
|
101 |
-
# Year_end = today.strftime("%Y")
|
102 |
-
|
103 |
-
# from datetime import date
|
104 |
-
# from datetime import timedelta
|
105 |
-
|
106 |
-
# past=today-timedelta(days=200)
|
107 |
-
# Day_start = past.strftime("%d")
|
108 |
-
# Month_start = past.strftime("%m")
|
109 |
-
# Year_start = past.strftime("%Y")
|
110 |
-
|
111 |
-
# date_start=[Year_start,Month_start,Day_start]
|
112 |
-
# date_end=[Year_end,Month_end,Day_end]
|
113 |
-
|
114 |
-
# import datetime
|
115 |
-
# import time
|
116 |
-
|
117 |
-
# #DATE definitions
|
118 |
-
|
119 |
-
# date_time = datetime.datetime(int(date_start[0]),int(date_start[1]),int(date_start[2]))
|
120 |
-
# date_time_now = datetime.datetime(int(date_end[0]),int(date_end[1]),int(date_end[2]))
|
121 |
-
# unix_past=time.mktime(date_time.timetuple()) #change the date format into unix for scraping
|
122 |
-
# unix_now=time.mktime(date_time_now.timetuple())
|
123 |
-
# past=datetime.datetime(int(date_start[0]),int(date_start[1]),int(date_start[2])).strftime('%Y-%m-%d')
|
124 |
-
# now=datetime.datetime(int(date_end[0]),int(date_end[1]),int(date_end[2])).strftime('%Y-%m-%d')
|
125 |
-
# datum_range=pd.date_range(start=past,end=now, freq='D')
|
126 |
-
|
127 |
-
# #empty lists
|
128 |
-
# unix_all=[]
|
129 |
-
# coins_names=[]
|
130 |
-
|
131 |
-
# #create date variable
|
132 |
-
# for val in datum_range:
|
133 |
-
# unix_all=np.append(unix_all,time.mktime(val.timetuple()))
|
134 |
-
# #from utils import slice
|
135 |
-
# # Get API for CoinGecko
|
136 |
-
# #cg = CoinGeckoAPI()
|
137 |
-
|
138 |
-
# url = f"https://api.coingecko.com/api/v3/coins/{cryptocurrency.lower()}/market_chart/range?vs_currency=usd&from={unix_past}&to={unix_now}"
|
139 |
-
|
140 |
-
# headers = {
|
141 |
-
# "accept": "application/json",
|
142 |
-
# "x-cg-demo-api-key": COINGECKO_API_KEY
|
143 |
-
# }
|
144 |
-
# response = ''
|
145 |
-
# while response == '':
|
146 |
-
# try:
|
147 |
-
# response = requests.get(url, headers=headers, proxies={"http": "http://111.233.225.166:1234"})
|
148 |
-
# break
|
149 |
-
# except:
|
150 |
-
# print("Connection refused by the server..")
|
151 |
-
# print("Let me sleep for 5 seconds")
|
152 |
-
# print("ZZzzzz...")
|
153 |
-
# time.sleep(5)
|
154 |
-
# print("Was a nice sleep, now let me continue...")
|
155 |
-
# continue
|
156 |
-
|
157 |
-
# data=response.json()
|
158 |
-
|
159 |
-
#This example uses Python 2.7 and the python-request library.
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
url = 'https://sandbox-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
|
164 |
-
parameters = {
|
165 |
-
'start':'1',
|
166 |
-
'limit':'5000',
|
167 |
-
'convert':'USD'
|
168 |
-
}
|
169 |
-
headers = {
|
170 |
-
'Accepts': 'application/json',
|
171 |
-
'X-CMC_PRO_API_KEY': 'b54bcf4d-1bca-4e8e-9a24-22ff2c3d462c',
|
172 |
-
}
|
173 |
-
|
174 |
-
session = Session()
|
175 |
-
session.headers.update(headers)
|
176 |
-
|
177 |
-
try:
|
178 |
-
response = session.get(url, params=parameters)
|
179 |
-
data = json.loads(response.text)
|
180 |
-
print(data)
|
181 |
-
except (ConnectionError, Timeout, TooManyRedirects) as e:
|
182 |
-
print(e)
|
183 |
-
|
184 |
-
#data=cg.get_coin_market_chart_range_by_id(id=cryptocurrency.lower(),vs_currency='usd',include_market_cap='true', include_24hr_vol='true', from_timestamp=unix_past,to_timestamp=unix_now)
|
185 |
-
#df_ts_coins=su.scrape_historical_series([currency],date_start,date_end)
|
186 |
-
#================== Scrape Current/Historical Price ====================
|
187 |
-
st.write(data)
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_crypto_arima_model.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
import model_utils as mu
|
6 |
+
from statsmodels.tsa.arima.model import ARIMA
|
7 |
+
|
8 |
+
def model_run(df_all):
|
9 |
+
""" Prediciton function that runs ARIMA model and predicts tomorrow cryptocurrency price.
|
10 |
+
Useful for forecasting a variable using ARIMA model.
|
11 |
+
Use historical 'prices' and get prediction.
|
12 |
+
Give prediction output to the client.
|
13 |
+
"""
|
14 |
+
first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1))
|
15 |
+
#----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING --------------------------------------------
|
16 |
+
reframed_lags, df_final=mu.data_transform(df_all, first_day_future)
|
17 |
+
|
18 |
+
print(f'I have transformed the dataset into the frame for supervised learning')
|
19 |
+
df=reframed_lags[['prices','price_eth','GSPC','Day','Month', 'TNX', 'Employment', 'google_trend','EURUSD']]
|
20 |
+
date=pd.to_datetime(dict(year=reframed_lags['Year'], month=reframed_lags['Month'], day=reframed_lags['Day']))
|
21 |
+
df_with_date=pd.concat([date,df],axis=1)
|
22 |
+
df_with_date.columns=np.append('date',df.columns)
|
23 |
+
df_with_date.set_index('date',inplace=True)
|
24 |
+
df_with_date=df_with_date.dropna()
|
25 |
+
df_past=df_with_date.iloc[:-1,:]
|
26 |
+
df_future=df_with_date.iloc[-1:,:]
|
27 |
+
model = ARIMA(df_past['prices'],exog=df_past.drop(columns=['prices']), order=(2,1,2))
|
28 |
+
model_fit = model.fit()
|
29 |
+
|
30 |
+
# Make predictions
|
31 |
+
predictions = model_fit.forecast(steps=1,exog=df_future.drop(columns='prices'))
|
32 |
+
|
33 |
+
#Add forecast to df_with_date
|
34 |
+
df_with_forecast=reframed_lags.copy()
|
35 |
+
df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=predictions[-1:].values[0]
|
36 |
+
#----------------------------------- MODEL ACCURACY
|
37 |
+
#Calculate accuracy after transformation!!!
|
38 |
+
#get rid of values below 0.01 which skew the accuracy measure if in denominator
|
39 |
+
|
40 |
+
#Rolling window accuracy measure
|
41 |
+
if len(reframed_lags)>500:
|
42 |
+
train_size=0.9
|
43 |
+
elif len(reframed_lags)>200:
|
44 |
+
train_size=0.8
|
45 |
+
else:
|
46 |
+
train_size=0.7
|
47 |
+
predictions=[]
|
48 |
+
test_labels_all=[]
|
49 |
+
test_labels_all1=[]
|
50 |
+
train_labels_all=[]
|
51 |
+
data_arima=df_with_date
|
52 |
+
window_length=int((len(data_arima)-len(data_arima)*train_size))
|
53 |
+
for i in range(0,window_length):
|
54 |
+
train_accuracy=data_arima.iloc[0:int(len(data_arima)*train_size)+i,:]
|
55 |
+
|
56 |
+
test_accuracy=data_arima.iloc[len(train_accuracy):len(train_accuracy)+1,:]
|
57 |
+
train_features_accuracy=train_accuracy.drop(columns='prices')
|
58 |
+
test_features_accuracy=test_accuracy.drop(columns='prices')
|
59 |
+
train_labels_accuracy=train_accuracy['prices']
|
60 |
+
test_labels_accuracy=test_accuracy['prices']
|
61 |
+
print(train_labels_accuracy)
|
62 |
+
|
63 |
+
arima = ARIMA(train_labels_accuracy,exog=train_features_accuracy, order=(2,1,2)) #RandomForestRegressor(n_estimators= 1000)
|
64 |
+
arima_fit=arima.fit() #train_features_accuracy, train_labels_accuracy)
|
65 |
+
prediction_arima = arima_fit.forecast(steps=1,exog=test_features_accuracy) #predict(test_features_accuracy)
|
66 |
+
predictions=np.append(predictions,prediction_arima)
|
67 |
+
test_labels_all=np.append(test_labels_all,test_labels_accuracy)
|
68 |
+
train_labels_all=np.append(train_labels_all,train_accuracy)
|
69 |
+
test_labels_all1=np.append(test_labels_all1,test_accuracy)
|
70 |
+
|
71 |
+
#Calculate accuracy
|
72 |
+
from sklearn.metrics import r2_score
|
73 |
+
accuracy=r2_score(predictions,test_labels_all)
|
74 |
+
result_arima=pd.DataFrame({'prediction':predictions,'data':test_labels_all})
|
75 |
+
result_arima.to_csv('result_arima_kat.csv')
|
76 |
+
return df_with_forecast, accuracy, result_arima
|
app_crypto_rf_model.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from sklearn.ensemble import RandomForestRegressor
|
6 |
+
from sklearn.metrics import mean_squared_error
|
7 |
+
from math import sqrt
|
8 |
+
from sklearn.preprocessing import MinMaxScaler
|
9 |
+
import model_utils as mu
|
10 |
+
|
11 |
+
def model_run(df_all):
|
12 |
+
""" Prediciton function that runs random forest model and predicts tomorrow cryptocurrency price"""
|
13 |
+
|
14 |
+
first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1))
|
15 |
+
#----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING --------------------------------------------
|
16 |
+
reframed_lags, df_final=mu.data_transform(df_all, first_day_future)
|
17 |
+
print(f'I have transformed the dataset into the frame for supervised learning')
|
18 |
+
reframed_lags.to_csv('reframed_lags.csv')
|
19 |
+
#----------------------------------------- TRAIN/TEST SPLIT ------------------------------------------------------
|
20 |
+
""" Randomly split a chunk into train test based on train/test ratio (0.8) and split the other chunks for all the other currencies in the same fashion"""
|
21 |
+
import random
|
22 |
+
train_size=0.8
|
23 |
+
|
24 |
+
df_cut1=reframed_lags.reset_index().iloc[:,1:]
|
25 |
+
print('tady')
|
26 |
+
train_value=int(len(df_cut1)*train_size)
|
27 |
+
first_random=random.sample(range(len(df_cut1)-1), train_value)
|
28 |
+
train_bulk=np.sort(first_random) #make sure all the consequent ones have the same random numbers
|
29 |
+
|
30 |
+
df_cut=reframed_lags.reset_index()
|
31 |
+
train_sample=df_cut.loc[df_cut['index'].isin(train_bulk)]
|
32 |
+
test_sample=df_cut.loc[~df_cut['index'].isin(train_bulk)]
|
33 |
+
|
34 |
+
test=test_sample.iloc[:,1:]
|
35 |
+
train=train_sample.iloc[:,1:]
|
36 |
+
print(f'I have split the dataset into training and testing samples')
|
37 |
+
|
38 |
+
#----------------------------------- Re-Scale for supervised learning
|
39 |
+
# TRAIN RESCALE
|
40 |
+
# normalize features for the supervised learning (0,1)
|
41 |
+
scaler_train = MinMaxScaler(feature_range=(0, 1))
|
42 |
+
scaled = scaler_train.fit_transform(train.values.astype('float32'))
|
43 |
+
df_train=pd.DataFrame(scaled)
|
44 |
+
df_train.columns=train.columns #rename columns
|
45 |
+
|
46 |
+
# TEST RESCALE
|
47 |
+
scaler_test = MinMaxScaler(feature_range=(0, 1))
|
48 |
+
scaled = scaler_test.fit_transform(test.values.astype('float32'))
|
49 |
+
df_test=pd.DataFrame(scaled)
|
50 |
+
df_test.columns=test.columns #rename columns
|
51 |
+
|
52 |
+
#----------------------------------- MODEL
|
53 |
+
|
54 |
+
#define features
|
55 |
+
train_features=df_train.values
|
56 |
+
test_features=df_test.values
|
57 |
+
#define labels
|
58 |
+
train_labels = df_train['prices'].values
|
59 |
+
test_labels = df_test['prices'].values
|
60 |
+
|
61 |
+
#define baseline prediction (as last values) for evaluating prediction accuracy
|
62 |
+
baseline_preds = pd.DataFrame(test_features).iloc[:,0]
|
63 |
+
# Calculate errors for the baseline prediction
|
64 |
+
baseline_errors = abs(baseline_preds - test_labels)
|
65 |
+
|
66 |
+
# Import the model we are using
|
67 |
+
from sklearn.ensemble import RandomForestRegressor
|
68 |
+
# Instantiate model with 1000 decision trees
|
69 |
+
rf = RandomForestRegressor(n_estimators= 1000)
|
70 |
+
rf.fit(train_features, train_labels)
|
71 |
+
prediction_rf = rf.predict(test_features)
|
72 |
+
predictions=prediction_rf
|
73 |
+
|
74 |
+
#----------------------------------- MODEL OUTPUT TRANSFORMATION
|
75 |
+
#Convert test column
|
76 |
+
df_test['prices']=predictions
|
77 |
+
prediction_transformed=pd.DataFrame(scaler_test.inverse_transform(df_test.values.astype('float')))
|
78 |
+
prediction_transformed.columns=test.columns
|
79 |
+
|
80 |
+
#Convert prediction
|
81 |
+
df_test.loc[df_test.index==(len(df_test)-1),'prices']=predictions[-1:][0]
|
82 |
+
inv_transformed=pd.DataFrame(scaler_test.inverse_transform(df_test.values.astype('float')))
|
83 |
+
inv_transformed.columns=test.columns
|
84 |
+
|
85 |
+
# data with forecast
|
86 |
+
df_with_forecast=df_final.copy()
|
87 |
+
df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=inv_transformed['prices'][-1:].values[0]
|
88 |
+
print('Final result')
|
89 |
+
print(df_with_forecast)
|
90 |
+
|
91 |
+
#----------------------------------- MODEL ACCURACY
|
92 |
+
#Calculate accuracy after transformation!!!
|
93 |
+
#get rid of values below 0.01 which skew the accuracy measure if in denominator
|
94 |
+
|
95 |
+
#Rolling window accuracy measure
|
96 |
+
if len(reframed_lags)>500:
|
97 |
+
train_size=0.9
|
98 |
+
elif len(reframed_lags)>200:
|
99 |
+
train_size=0.8
|
100 |
+
else:
|
101 |
+
train_size=0.7
|
102 |
+
predictions=[]
|
103 |
+
test_labels_all=[]
|
104 |
+
window_length=int((len(reframed_lags)-len(reframed_lags)*train_size))
|
105 |
+
for i in range(0,window_length):
|
106 |
+
train_accuracy=reframed_lags.iloc[0:int(len(reframed_lags)*train_size)+i,:]
|
107 |
+
test_accuracy=reframed_lags.iloc[len(train_accuracy):len(train_accuracy)+1,:]
|
108 |
+
train_features_accuracy=train_accuracy.drop(columns='prices')
|
109 |
+
test_features_accuracy=test_accuracy.drop(columns='prices')
|
110 |
+
train_labels_accuracy=train_accuracy['prices']
|
111 |
+
test_labels_accuracy=test_accuracy['prices']
|
112 |
+
|
113 |
+
rf = RandomForestRegressor(n_estimators= 1000)
|
114 |
+
rf.fit(train_features_accuracy, train_labels_accuracy)
|
115 |
+
prediction_rf = rf.predict(test_features_accuracy)
|
116 |
+
predictions=np.append(predictions,prediction_rf)
|
117 |
+
test_labels_all=np.append(test_labels_all,test_labels_accuracy)
|
118 |
+
|
119 |
+
#Calculate accuracy
|
120 |
+
from sklearn.metrics import r2_score
|
121 |
+
accuracy=r2_score(predictions,test_labels_all)
|
122 |
+
result_rf=pd.DataFrame({'prediction':predictions,'data':test_labels_all})
|
123 |
+
result_rf.to_csv('result_rf.csv')
|
124 |
+
return df_with_forecast, accuracy, result_rf
|
app_crypto_scrape.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
#Import packages
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from pycoingecko import CoinGeckoAPI
|
6 |
+
cg = CoinGeckoAPI()
|
7 |
+
import pandas as pd
|
8 |
+
import numpy as np
|
9 |
+
from pytrends.request import TrendReq
|
10 |
+
pytrends = TrendReq(hl='en-US')
|
11 |
+
import scrape_utils as su
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
import os
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
COINMARKET_API_KEY=os.environ["COINMARKET_API_KEY"]
|
17 |
+
|
18 |
+
def scrape_crypto(currency, ticker):
|
19 |
+
|
20 |
+
# 1. Scrape historical Price and Volume currency data
|
21 |
+
from datetime import date
|
22 |
+
today = date.today()
|
23 |
+
Day_end = today.strftime("%d")
|
24 |
+
Month_end = today.strftime("%m")
|
25 |
+
Year_end = today.strftime("%Y")
|
26 |
+
|
27 |
+
from datetime import date
|
28 |
+
from datetime import timedelta
|
29 |
+
|
30 |
+
past=today-timedelta(days=300)
|
31 |
+
Day_start = past.strftime("%d")
|
32 |
+
Month_start = past.strftime("%m")
|
33 |
+
Year_start = past.strftime("%Y")
|
34 |
+
|
35 |
+
date_start=[Year_start,Month_start,Day_start]
|
36 |
+
date_end=[Year_end,Month_end,Day_end]
|
37 |
+
|
38 |
+
import datetime
|
39 |
+
df_ts_coins=su.scrape_historical_series([currency],ticker,date_start,date_end)[0]
|
40 |
+
print(df_ts_coins)
|
41 |
+
df_today_row=su.scrape_historical_series([currency],ticker,date_start,date_end)[1]
|
42 |
+
#print(df_today_row)
|
43 |
+
|
44 |
+
if len(df_ts_coins)>0:
|
45 |
+
print(df_today_row)
|
46 |
+
#df_today_row=df_today_row.drop(0)
|
47 |
+
|
48 |
+
df_ts_coins=df_ts_coins[['id','date','prices','market_caps','total_vol']]
|
49 |
+
df_ts_coins=pd.concat([df_ts_coins,df_today_row],axis=0)
|
50 |
+
df_ts_coins.set_index('date',inplace=True)
|
51 |
+
df_ts_coins.index=[pd.to_datetime(df_ts_coins.index[i]).strftime("%Y-%m-%d %H:%M:%S") for i in range(len(df_ts_coins))]
|
52 |
+
|
53 |
+
# 2. Scrape macro
|
54 |
+
df_cli=su.scrape_cli(past,today)
|
55 |
+
df_cpi=su.scrape_cpi_employment()
|
56 |
+
print(f'I have scraped CLI and L, CPI')
|
57 |
+
|
58 |
+
# 3. Scrape google-trends
|
59 |
+
google_data=su.scrape_google_trends(currency,ticker)
|
60 |
+
print(f'Google trend dataset')
|
61 |
+
|
62 |
+
# 4. Scrape Yahoo-Finance
|
63 |
+
df_finance=su.scrape_stocks(past,today)
|
64 |
+
print(f'yahoo dataset. I am done scraping !!!!!!!')
|
65 |
+
|
66 |
+
#==== 5. CONCAT DATAFRAMES TOGETHER
|
67 |
+
df_ts_coins.index=pd.to_datetime(df_ts_coins.index).strftime("%Y-%m-%d")
|
68 |
+
df_cli.index=pd.to_datetime(df_cli.index).strftime("%Y-%m-%d")
|
69 |
+
if len(df_cpi)>0:
|
70 |
+
df_cpi.index=pd.to_datetime(df_cpi.index).strftime("%Y-%m-%d")
|
71 |
+
else:
|
72 |
+
print('MISSING CPI')
|
73 |
+
df_cpi=pd.DataFrame({'CPI':np.repeat(0,len(df_cli)),'Employment':np.repeat(0,len(df_cli))})
|
74 |
+
df_cpi.index=df_cli.index
|
75 |
+
google_data.index=pd.to_datetime(google_data.index).strftime("%Y-%m-%d")
|
76 |
+
df_finance.index=pd.to_datetime(df_finance.index).strftime("%Y-%m-%d")
|
77 |
+
df_all=pd.concat([df_ts_coins,df_cli,df_cpi,google_data,df_finance],axis=1)
|
78 |
+
df_all=df_all.sort_index()
|
79 |
+
else:
|
80 |
+
print('No data available.')
|
81 |
+
df_all=pd.DataFrame()
|
82 |
+
return df_all
|
app_hf.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
from langchain.agents import AgentExecutor, create_openai_tools_agent
|
3 |
+
from langchain_core.messages import BaseMessage, HumanMessage
|
4 |
+
from langchain_openai import ChatOpenAI
|
5 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
6 |
+
from typing import Annotated
|
7 |
+
import operator
|
8 |
+
from typing import Sequence, TypedDict
|
9 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
10 |
+
import numpy as np
|
11 |
+
import pandas as pd
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
import os
|
14 |
+
from typing import Annotated
|
15 |
+
import operator
|
16 |
+
from typing import Sequence, TypedDict
|
17 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
18 |
+
import matplotlib.pyplot as plt
|
19 |
+
from langchain.schema.output_parser import StrOutputParser
|
20 |
+
from tools import data_analyst #forecasting_expert_arima, forecasting_expert_rf, evaluator, investment_advisor
|
21 |
+
from tools import crypto_sentiment_analysis_util
|
22 |
+
import app_crypto_rf_model as rf
|
23 |
+
import app_crypto_scrape as sa
|
24 |
+
import app_crypto_arima_model as arima
|
25 |
+
import streamlit as st
|
26 |
+
|
27 |
+
from datetime import date
|
28 |
+
today = date.today()
|
29 |
+
|
30 |
+
st.set_page_config(page_title="LangChain Agent", layout="wide")
|
31 |
+
load_dotenv()
|
32 |
+
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
33 |
+
|
34 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo")
|
35 |
+
|
36 |
+
#======================== AGENTS ==================================
|
37 |
+
# The agent state is the input to each node in the graph
|
38 |
+
class AgentState(TypedDict):
|
39 |
+
# The annotation tells the graph that new messages will always
|
40 |
+
# be added to the current states
|
41 |
+
messages: Annotated[Sequence[BaseMessage], operator.add]
|
42 |
+
# The 'next' field indicates where to route to next
|
43 |
+
next: str
|
44 |
+
|
45 |
+
tool=data_analyst.data_analyst_tools()
|
46 |
+
|
47 |
+
from langchain_core.runnables import RunnableConfig
|
48 |
+
st.title("💬 Krypto")
|
49 |
+
|
50 |
+
#@st.cache_data
|
51 |
+
|
52 |
+
#@st.cache_resource
|
53 |
+
#def initialize_session_state():
|
54 |
+
if "chat_history" not in st.session_state:
|
55 |
+
st.session_state["messages"] = [{"role":"system", "content":"""
|
56 |
+
How can I help you?
|
57 |
+
"""}]
|
58 |
+
|
59 |
+
#initialize_session_state()
|
60 |
+
|
61 |
+
# Streamlit UI elements
|
62 |
+
st.image('crypto_image.png')
|
63 |
+
#st.text("Start by entering the currency.")
|
64 |
+
|
65 |
+
sideb = st.sidebar
|
66 |
+
|
67 |
+
with st.sidebar:
|
68 |
+
#st.subheader("This is the LangGraph workflow visualization of this application rendered in real-time.")
|
69 |
+
#st.image(create_graph_image())
|
70 |
+
|
71 |
+
title = st.text_input("Start by entering the currency name:")
|
72 |
+
|
73 |
+
check1 = sideb.button(f"analyze {title}")
|
74 |
+
results=[]
|
75 |
+
|
76 |
+
if check1:
|
77 |
+
st.write(f"I am now producing analysis for {title}")
|
78 |
+
|
79 |
+
model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
|
80 |
+
chain= model | StrOutputParser()
|
81 |
+
result=chain.invoke(f"You are a cryptocurrency data analyst.\
|
82 |
+
Provide correct cryptocurrency ticker from Coingecko website for cryptocurrency: {title}.\
|
83 |
+
Expected output: ticker.\
|
84 |
+
Provide it in the following format: >>cryptocurrencyticker>> \
|
85 |
+
for example: >>BTC>>")
|
86 |
+
|
87 |
+
|
88 |
+
# for s in graph_data.stream(inputs):
|
89 |
+
# for key, value in s.items():
|
90 |
+
# print(f"Finished running: {value}:")
|
91 |
+
# result = value["messages"][-1].content
|
92 |
+
# results.append(value["messages"][-1].content)
|
93 |
+
|
94 |
+
print(result)
|
95 |
+
print('ticker',str(result).split(">>")[0])
|
96 |
+
if len(str(result).split(">>")[1])<10:
|
97 |
+
cryptocurrencyticker=(str(result).split(">>")[1])
|
98 |
+
else:
|
99 |
+
cryptocurrencyticker=(str(result).split(">>")[0])
|
100 |
+
cryptocurrency=title
|
101 |
+
|
102 |
+
print(cryptocurrency,cryptocurrencyticker)
|
103 |
+
print('here')
|
104 |
+
|
105 |
+
#================== Scrape Current/Historical Price ====================
|
106 |
+
df=sa.scrape_crypto(cryptocurrency,cryptocurrencyticker)
|
107 |
+
if len(df)>0:
|
108 |
+
print("Running forecasting models on historical prices")
|
109 |
+
df_with_forecast_rf, accuracy_rf, result_rf=rf.model_run(df)
|
110 |
+
|
111 |
+
df_with_forecast_arima, accuracy_arima, result_arima=arima.model_run(df)
|
112 |
+
|
113 |
+
#--- for llm
|
114 |
+
if accuracy_rf<accuracy_arima:
|
115 |
+
forecasted_price=(np.round(np.array(df_with_forecast_arima['prices'])[-1]),2)
|
116 |
+
prompt = f"You are an investment recommendation expert for crypto currency {cryptocurrency}.You are selecting the predicted price from the ARIMA model because its accuracy (R2 measure:{(np.round(accuracy_arima,2))}) is higher than the accuracy (R2:{(np.round(accuracy_rf,2))}) for random forest model.Compare current price to the predicted price. If current price exceeds predicted price, recommend selling the stock, otherwise recommend buying. Tell the user what the current price, predicted price and accuracy values are. You know that the predicted price for tomorrow using random forest model is {(np.round(np.array(df_with_forecast_rf['prices'])[-1],2))}. The prediction accuracy for the random forest model is {(np.round(accuracy_rf,2))}. The current price of {cryptocurrency} is: {(np.round(df['prices'][-1],2))}. "
|
117 |
+
|
118 |
+
|
119 |
+
else:
|
120 |
+
forecasted_price=(np.round(np.array(df_with_forecast_rf['prices'])[-1],2))
|
121 |
+
prompt = f"You are an investment recommendation expert for crypto currency {cryptocurrency}. You are selecting the predicted price from the random forest model because its accuracy (R2 measure:{(np.round(accuracy_rf,2))}) is higher than the accuracy (R2:{(np.round(accuracy_arima,2))}) for arima model. Compare current price to the predicted price. If current price exceeds predicted price, recommend selling the stock, otherwise recommend buying. Tell the user what the current price, predicted price and accuracy values are. You know that the predicted price for tomorrow using random forest model is {(np.round(np.array(df_with_forecast_arima['prices'])[-1]),2)}. The prediction accuracy for the random forest model is {(np.round(accuracy_arima,2))}. The current price of {cryptocurrency} is: {(np.round(df['prices'][-1],2))}. "
|
122 |
+
current_forecast=pd.read_csv('current_forecast.csv',index_col='date',parse_dates=True,infer_datetime_format=True)
|
123 |
+
today=pd.to_datetime(today).strftime('%Y-%m-%d')
|
124 |
+
print([(np.array(df_with_forecast_arima['prices'])[-1]),np.array(df_with_forecast_rf['prices'])[-1],today])
|
125 |
+
|
126 |
+
if today not in (current_forecast.index):
|
127 |
+
prices_arima=np.append(current_forecast['prices_arima'],(np.array(df_with_forecast_arima['prices'])[-1]))
|
128 |
+
prices_rf=np.append(current_forecast['prices_rf'],(np.array(df_with_forecast_rf['prices'])[-1]))
|
129 |
+
dates=np.append(current_forecast.index[0].strftime('%Y-%m-%d'),today)
|
130 |
+
current_forecast=pd.DataFrame({'date':dates, 'prices_rf':prices_rf,'prices_arima':prices_arima})
|
131 |
+
current_forecast.to_csv('current_forecast.csv')
|
132 |
+
|
133 |
+
#prompt=str(prompt)
|
134 |
+
inputs_reccommend = {"messages": [HumanMessage(content=prompt)]}
|
135 |
+
|
136 |
+
model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
|
137 |
+
response=model.invoke(prompt)
|
138 |
+
response_content=response.content
|
139 |
+
st.chat_message("assistant").markdown((response_content))
|
140 |
+
st.session_state.messages.append({"role": "assistant", "content": prompt})
|
141 |
+
|
142 |
+
fig, ax = plt.subplots(1,2, figsize=(10, 3))
|
143 |
+
ax[0].plot(result_arima['prediction'], color='blue', marker='o')
|
144 |
+
ax[0].plot(result_arima['data'], color='orange', marker='o')
|
145 |
+
ax[0].set_title('ARIMA')
|
146 |
+
ax[1].plot(result_rf['prediction'], color='blue', marker='o')
|
147 |
+
ax[1].plot(result_rf['data'], color='orange', marker='o')
|
148 |
+
ax[1].set_title('RF')
|
149 |
+
fig.suptitle('Prediction vs Actuals')
|
150 |
+
plt.legend(['prediction','actuals'])
|
151 |
+
st.pyplot(fig)
|
152 |
+
# ========================== Sentiment analysis
|
153 |
+
#Perform sentiment analysis on the cryptocurrency news & predict dominant sentiment along with plotting the sentiment breakdown chart
|
154 |
+
# Downloading from reddit
|
155 |
+
|
156 |
+
# Downloading from alpaca
|
157 |
+
news_articles = crypto_sentiment_analysis_util.fetch_news(cryptocurrency)
|
158 |
+
reddit_news_articles=crypto_sentiment_analysis_util.fetch_reddit_news(cryptocurrency)
|
159 |
+
#os.system('scrapy crawl reddit -o crypto_reddit.txt')
|
160 |
+
|
161 |
+
|
162 |
+
#crypto_sentiment_analysis_util.fetch_reddit_news() #(f"cryptocurrency {cryptocurrency}")
|
163 |
+
analysis_results = []
|
164 |
+
|
165 |
+
#Perform sentiment analysis for each product review
|
166 |
+
for article in news_articles:
|
167 |
+
if cryptocurrency[0:6] in article['News_Article'].lower():
|
168 |
+
sentiment_analysis_result = crypto_sentiment_analysis_util.analyze_sentiment(article['News_Article'])
|
169 |
+
|
170 |
+
# Display sentiment analysis results
|
171 |
+
#print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n')
|
172 |
+
|
173 |
+
result = {
|
174 |
+
'News_Article': sentiment_analysis_result["News_Article"],
|
175 |
+
'Sentiment': sentiment_analysis_result["Sentiment"][0]['label'],
|
176 |
+
'Index': sentiment_analysis_result["Sentiment"][0]['score']
|
177 |
+
}
|
178 |
+
|
179 |
+
analysis_results.append(result)
|
180 |
+
|
181 |
+
for article in reddit_news_articles:
|
182 |
+
if cryptocurrency[0:6] in article.lower():
|
183 |
+
sentiment_analysis_result_reddit = crypto_sentiment_analysis_util.analyze_sentiment(article)
|
184 |
+
|
185 |
+
# Display sentiment analysis results
|
186 |
+
#print(f'News Article: {sentiment_analysis_result_reddit["News_Article"]} : Sentiment: {sentiment_analysis_result_reddit["Sentiment"]}', '\n')
|
187 |
+
|
188 |
+
result = {
|
189 |
+
'News_Article': sentiment_analysis_result_reddit["News_Article"],
|
190 |
+
'Index':np.round(sentiment_analysis_result_reddit["Sentiment"][0]['score'],2)
|
191 |
+
}
|
192 |
+
analysis_results.append(result)
|
193 |
+
|
194 |
+
#Generate summarized message rationalize dominant sentiment
|
195 |
+
summary = crypto_sentiment_analysis_util.generate_summary_of_sentiment(analysis_results)
|
196 |
+
st.chat_message("assistant").write(str(summary))
|
197 |
+
st.session_state.messages.append({"role": "assistant", "content": summary})
|
198 |
+
#answers=np.append(res["messages"][-1].content,summary)
|
199 |
+
|
200 |
+
# Set OpenAI API key from Streamlit secrets
|
201 |
+
client = OpenAI(api_key=OPENAI_API_KEY)
|
202 |
+
|
203 |
+
# Set a default model
|
204 |
+
if "openai_model" not in st.session_state:
|
205 |
+
st.session_state["openai_model"] = "gpt-3.5-turbo"
|
206 |
+
|
207 |
+
#model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
|
208 |
+
if prompt := st.chat_input("Some other questions?"):
|
209 |
+
# Add user message to chat history
|
210 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
211 |
+
# Display user message in chat message container
|
212 |
+
with st.chat_message("user"):
|
213 |
+
st.markdown(prompt)
|
214 |
+
# Display assistant response in chat message container
|
215 |
+
with st.chat_message("assistant"):
|
216 |
+
stream = client.chat.completions.create(
|
217 |
+
model=st.session_state["openai_model"],
|
218 |
+
messages=[
|
219 |
+
{"role": m["role"], "content": m["content"]}
|
220 |
+
for m in st.session_state.messages
|
221 |
+
],
|
222 |
+
stream=True,
|
223 |
+
)
|
224 |
+
response = st.write_stream(stream)
|
225 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
requirements.txt
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
-
alpaca_trade_api
|
2 |
-
transformers
|
3 |
-
bitsandbytes
|
4 |
-
yfinance
|
5 |
-
gradio==4.42.0
|
6 |
tf-keras==2.17.0
|
7 |
python-dotenv==1.0.1
|
|
|
8 |
beautifulsoup4==4.12.3
|
|
|
|
|
9 |
fastapi==0.110.3
|
|
|
10 |
GoogleNews==1.6.15
|
11 |
|
12 |
langchain==0.2.14
|
@@ -17,18 +16,8 @@ langchain-openai==0.1.21
|
|
17 |
|
18 |
openai==1.40.8
|
19 |
transformers==4.44.0
|
|
|
20 |
pandas==2.2.2
|
21 |
-
numpy==1.26.4
|
22 |
praw==7.7.1
|
23 |
streamlit==1.37.1
|
24 |
-
typing-inspect==0.9.0
|
25 |
|
26 |
-
matplotlib==3.9.2
|
27 |
-
statsmodels==0.14.2
|
28 |
-
scikit-learn==1.5.1
|
29 |
-
pycoingecko==3.1.0
|
30 |
-
beautifulsoup4==4.12.3
|
31 |
-
requests==2.32.3
|
32 |
-
pytrends==4.9.2
|
33 |
-
yfinance==0.2.41
|
34 |
-
prettytable==3.11.0
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
tf-keras==2.17.0
|
2 |
python-dotenv==1.0.1
|
3 |
+
|
4 |
beautifulsoup4==4.12.3
|
5 |
+
|
6 |
+
|
7 |
fastapi==0.110.3
|
8 |
+
|
9 |
GoogleNews==1.6.15
|
10 |
|
11 |
langchain==0.2.14
|
|
|
16 |
|
17 |
openai==1.40.8
|
18 |
transformers==4.44.0
|
19 |
+
|
20 |
pandas==2.2.2
|
|
|
21 |
praw==7.7.1
|
22 |
streamlit==1.37.1
|
|
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scrape_utils.py
CHANGED
@@ -19,11 +19,14 @@ import yfinance as yf
|
|
19 |
import json
|
20 |
import prettytable
|
21 |
import os
|
|
|
|
|
|
|
22 |
load_dotenv()
|
23 |
|
24 |
-
|
25 |
# Historical crypto data
|
26 |
-
def scrape_historical_series(coin_name,date_start,date_end):
|
27 |
import datetime
|
28 |
""" Scrape historical series on the sample of coins.
|
29 |
|
@@ -49,65 +52,143 @@ def scrape_historical_series(coin_name,date_start,date_end):
|
|
49 |
|
50 |
#empty lists
|
51 |
unix_all=[]
|
52 |
-
coins_names=[]
|
53 |
|
54 |
#create date variable
|
55 |
for val in datum_range:
|
56 |
unix_all=np.append(unix_all,time.mktime(val.timetuple()))
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
# 2. Macro variables, CLI
|
113 |
|
@@ -208,7 +289,7 @@ def scrape_google_trends(currency, currency_short):
|
|
208 |
Hour_end=21
|
209 |
Minute_end=20
|
210 |
|
211 |
-
past=today-datetime.timedelta(days=
|
212 |
Day_start = past.strftime("%d")
|
213 |
Month_start = past.strftime("%m")
|
214 |
Year_start = past.strftime("%Y")
|
|
|
19 |
import json
|
20 |
import prettytable
|
21 |
import os
|
22 |
+
from requests import Request, Session
|
23 |
+
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
|
24 |
+
import json
|
25 |
load_dotenv()
|
26 |
|
27 |
+
COINMARKET_API_KEY=os.environ["COINMARKET_API_KEY"]
|
28 |
# Historical crypto data
|
29 |
+
def scrape_historical_series(coin_name,symbol,date_start,date_end):
|
30 |
import datetime
|
31 |
""" Scrape historical series on the sample of coins.
|
32 |
|
|
|
52 |
|
53 |
#empty lists
|
54 |
unix_all=[]
|
|
|
55 |
|
56 |
#create date variable
|
57 |
for val in datum_range:
|
58 |
unix_all=np.append(unix_all,time.mktime(val.timetuple()))
|
59 |
+
url = ' https://pro-api.coinmarketcap.com/v2/cryptocurrency/quotes/historical'
|
60 |
+
parameters = {
|
61 |
+
'time_start': (int(unix_past)),
|
62 |
+
'time_end': (int(unix_now)),
|
63 |
+
'symbol': symbol,
|
64 |
+
'convert':'USD',
|
65 |
+
'interval': 'daily',
|
66 |
+
}
|
67 |
+
headers = {
|
68 |
+
'Accepts': 'application/json',
|
69 |
+
'X-CMC_PRO_API_KEY': COINMARKET_API_KEY,
|
70 |
+
}
|
71 |
+
|
72 |
+
session = Session()
|
73 |
+
session.headers.update(headers)
|
74 |
+
|
75 |
+
try:
|
76 |
+
response = session.get(url, params=parameters)
|
77 |
+
data_json = json.loads(response.text)
|
78 |
+
#data = json.loads(response.text['data']['quote']['USD'])
|
79 |
+
|
80 |
+
except (ConnectionError, Timeout, TooManyRedirects) as e:
|
81 |
+
print(e)
|
82 |
+
|
83 |
+
#SCRAPE FOR ETH
|
84 |
+
#create date variable
|
85 |
+
for val in datum_range:
|
86 |
+
unix_all=np.append(unix_all,time.mktime(val.timetuple()))
|
87 |
+
url = ' https://pro-api.coinmarketcap.com/v2/cryptocurrency/quotes/historical'
|
88 |
+
parameters = {
|
89 |
+
'time_start': (int(unix_past)),
|
90 |
+
'time_end': (int(unix_now)),
|
91 |
+
'symbol': 'ETH',
|
92 |
+
'convert':'USD',
|
93 |
+
'interval': 'daily',
|
94 |
+
}
|
95 |
+
headers = {
|
96 |
+
'Accepts': 'application/json',
|
97 |
+
'X-CMC_PRO_API_KEY': COINMARKET_API_KEY,
|
98 |
+
}
|
99 |
+
|
100 |
+
session = Session()
|
101 |
+
session.headers.update(headers)
|
102 |
+
|
103 |
+
try:
|
104 |
+
response = session.get(url, params=parameters)
|
105 |
+
data_json_eth = json.loads(response.text)
|
106 |
+
#data = json.loads(response.text['data']['quote']['USD'])
|
107 |
+
|
108 |
+
except (ConnectionError, Timeout, TooManyRedirects) as e:
|
109 |
+
print(e)
|
110 |
+
|
111 |
+
date=[]
|
112 |
+
price=[]
|
113 |
+
price_eth=[]
|
114 |
+
market_caps=[]
|
115 |
+
total_volumes=[]
|
116 |
+
|
117 |
+
for i in range(len(data_json['data'][symbol][0]['quotes'])):
|
118 |
+
date=np.append(date,data_json['data'][symbol][0]['quotes'][i]['quote']['USD']['timestamp'])
|
119 |
+
price=np.append(price,data_json['data'][symbol][0]['quotes'][i]['quote']['USD']['price'])
|
120 |
+
market_caps=np.append(market_caps,data_json['data'][symbol][0]['quotes'][i]['quote']['USD']['market_cap'])
|
121 |
+
total_volumes=np.append(total_volumes,data_json['data'][symbol][0]['quotes'][i]['quote']['USD']['volume_24h'])
|
122 |
+
price_eth=np.append(price_eth,data_json_eth['data']['ETH'][0]['quotes'][i]['quote']['USD']['price'])
|
123 |
+
ts_coins_cut=pd.DataFrame({'date':date, 'prices':price,'market_caps':market_caps,'total_vol':total_volumes,'price_eth':price_eth})
|
124 |
+
ts_coins_cut['id']=np.repeat(coin_name,len(ts_coins_cut))
|
125 |
+
ts_coins_cut['date']=pd.to_datetime(ts_coins_cut['date'])
|
126 |
+
|
127 |
+
# SCRAPE CURRENT DATA
|
128 |
+
unix_all=[]
|
129 |
+
|
130 |
+
#create date variable
|
131 |
+
for val in datum_range:
|
132 |
+
unix_all=np.append(unix_all,time.mktime(val.timetuple()))
|
133 |
+
url = ' https://pro-api.coinmarketcap.com/v2/cryptocurrency/quotes/latest'
|
134 |
+
parameters = {
|
135 |
+
'symbol': symbol,
|
136 |
+
'convert':'USD',
|
137 |
+
}
|
138 |
+
headers = {
|
139 |
+
'Accepts': 'application/json',
|
140 |
+
'X-CMC_PRO_API_KEY': COINMARKET_API_KEY,
|
141 |
+
}
|
142 |
+
|
143 |
+
session = Session()
|
144 |
+
session.headers.update(headers)
|
145 |
+
|
146 |
+
try:
|
147 |
+
response = session.get(url, params=parameters)
|
148 |
+
data_json = json.loads(response.text)
|
149 |
+
#data = json.loads(response.text['data']['quote']['USD'])
|
150 |
+
|
151 |
+
except (ConnectionError, Timeout, TooManyRedirects) as e:
|
152 |
+
print(e)
|
153 |
+
|
154 |
+
# Current data ETH
|
155 |
+
#create date variable
|
156 |
+
for val in datum_range:
|
157 |
+
unix_all=np.append(unix_all,time.mktime(val.timetuple()))
|
158 |
+
url = ' https://pro-api.coinmarketcap.com/v2/cryptocurrency/quotes/latest'
|
159 |
+
parameters = {
|
160 |
+
'symbol': 'ETH',
|
161 |
+
'convert':'USD',
|
162 |
+
}
|
163 |
+
headers = {
|
164 |
+
'Accepts': 'application/json',
|
165 |
+
'X-CMC_PRO_API_KEY': COINMARKET_API_KEY,
|
166 |
+
}
|
167 |
+
|
168 |
+
session = Session()
|
169 |
+
session.headers.update(headers)
|
170 |
+
|
171 |
+
try:
|
172 |
+
response = session.get(url, params=parameters)
|
173 |
+
data_json_eth = json.loads(response.text)
|
174 |
+
#data = json.loads(response.text['data']['quote']['USD'])
|
175 |
+
|
176 |
+
except (ConnectionError, Timeout, TooManyRedirects) as e:
|
177 |
+
print(e)
|
178 |
+
|
179 |
+
date=data_json['data'][str(symbol)][0]['quote']['USD']['last_updated']
|
180 |
+
market_cap=data_json['data'][str(symbol)][0]['quote']['USD']['market_cap']
|
181 |
+
total_volumes=data_json['data'][str(symbol)][0]['quote']['USD']['volume_24h']
|
182 |
+
price=data_json['data'][str(symbol)][0]['quote']['USD']['price']
|
183 |
+
price_eth=data_json_eth['data']['ETH'][0]['quote']['USD']['price']
|
184 |
+
# CREATE CURRENT ROW
|
185 |
+
from datetime import date
|
186 |
+
today = date.today()
|
187 |
+
df_today_row=pd.DataFrame({0:['id','date','prices','market_caps','total_vol','price_eth'],1:[coin_name[0],today.strftime('%Y-%m-%d %H:%M:%S'),price,market_cap,total_volumes,price_eth]}).T
|
188 |
+
df_today_row.columns=df_today_row.iloc[0,:]
|
189 |
+
df_today_row=df_today_row.drop(0)
|
190 |
+
ts_coins_cut.to_csv('ts_coins_cut.csv')
|
191 |
+
return ts_coins_cut, df_today_row
|
192 |
|
193 |
# 2. Macro variables, CLI
|
194 |
|
|
|
289 |
Hour_end=21
|
290 |
Minute_end=20
|
291 |
|
292 |
+
past=today-datetime.timedelta(days=200)
|
293 |
Day_start = past.strftime("%d")
|
294 |
Month_start = past.strftime("%m")
|
295 |
Year_start = past.strftime("%Y")
|
tools/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
tools/.chainlit/config.toml
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
# Whether to enable telemetry (default: true). No personal data is collected.
|
3 |
+
enable_telemetry = true
|
4 |
+
|
5 |
+
# List of environment variables to be provided by each user to use the app.
|
6 |
+
user_env = []
|
7 |
+
|
8 |
+
# Duration (in seconds) during which the session is saved when the connection is lost
|
9 |
+
session_timeout = 3600
|
10 |
+
|
11 |
+
# Enable third parties caching (e.g LangChain cache)
|
12 |
+
cache = false
|
13 |
+
|
14 |
+
# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
|
15 |
+
# follow_symlink = false
|
16 |
+
|
17 |
+
[features]
|
18 |
+
# Show the prompt playground
|
19 |
+
prompt_playground = true
|
20 |
+
|
21 |
+
# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
|
22 |
+
unsafe_allow_html = false
|
23 |
+
|
24 |
+
# Process and display mathematical expressions. This can clash with "$" characters in messages.
|
25 |
+
latex = false
|
26 |
+
|
27 |
+
# Authorize users to upload files with messages
|
28 |
+
multi_modal = true
|
29 |
+
|
30 |
+
# Allows user to use speech to text
|
31 |
+
[features.speech_to_text]
|
32 |
+
enabled = false
|
33 |
+
# See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
|
34 |
+
# language = "en-US"
|
35 |
+
|
36 |
+
[UI]
|
37 |
+
# Name of the app and chatbot.
|
38 |
+
name = "Chatbot"
|
39 |
+
|
40 |
+
# Show the readme while the conversation is empty.
|
41 |
+
show_readme_as_default = true
|
42 |
+
|
43 |
+
# Description of the app and chatbot. This is used for HTML tags.
|
44 |
+
# description = ""
|
45 |
+
|
46 |
+
# Large size content are by default collapsed for a cleaner ui
|
47 |
+
default_collapse_content = true
|
48 |
+
|
49 |
+
# The default value for the expand messages settings.
|
50 |
+
default_expand_messages = false
|
51 |
+
|
52 |
+
# Hide the chain of thought details from the user in the UI.
|
53 |
+
hide_cot = false
|
54 |
+
|
55 |
+
# Link to your github repo. This will add a github button in the UI's header.
|
56 |
+
# github = ""
|
57 |
+
|
58 |
+
# Specify a CSS file that can be used to customize the user interface.
|
59 |
+
# The CSS file can be served from the public directory or via an external link.
|
60 |
+
# custom_css = "/public/test.css"
|
61 |
+
|
62 |
+
# Override default MUI light theme. (Check theme.ts)
|
63 |
+
[UI.theme.light]
|
64 |
+
#background = "#FAFAFA"
|
65 |
+
#paper = "#FFFFFF"
|
66 |
+
|
67 |
+
[UI.theme.light.primary]
|
68 |
+
#main = "#F80061"
|
69 |
+
#dark = "#980039"
|
70 |
+
#light = "#FFE7EB"
|
71 |
+
|
72 |
+
# Override default MUI dark theme. (Check theme.ts)
|
73 |
+
[UI.theme.dark]
|
74 |
+
#background = "#FAFAFA"
|
75 |
+
#paper = "#FFFFFF"
|
76 |
+
|
77 |
+
[UI.theme.dark.primary]
|
78 |
+
#main = "#F80061"
|
79 |
+
#dark = "#980039"
|
80 |
+
#light = "#FFE7EB"
|
81 |
+
|
82 |
+
|
83 |
+
[meta]
|
84 |
+
generated_by = "0.7.700"
|
tools/__pycache__/crypto_sentiment_analysis_util.cpython-311.pyc
ADDED
Binary file (18.1 kB). View file
|
|
tools/__pycache__/data_analyst.cpython-311.pyc
ADDED
Binary file (3.33 kB). View file
|
|
tools/crypto_sentiment_analysis_util.py
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from transformers import pipeline
|
5 |
+
import os
|
6 |
+
import pandas as pd
|
7 |
+
from GoogleNews import GoogleNews
|
8 |
+
from langchain_openai import ChatOpenAI
|
9 |
+
import pandas as pd
|
10 |
+
import praw
|
11 |
+
from datetime import datetime
|
12 |
+
|
13 |
+
load_dotenv()
|
14 |
+
|
15 |
+
def fetch_news(stockticker):
|
16 |
+
|
17 |
+
""" Fetches news articles for a given stock symbol within a specified date range.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
- stockticker (str): Symbol of a particular stock
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
- list: A list of dictionaries containing stock news. """
|
24 |
+
|
25 |
+
load_dotenv()
|
26 |
+
days_to_fetch_news = os.environ["DAYS_TO_FETCH_NEWS"]
|
27 |
+
|
28 |
+
googlenews = GoogleNews()
|
29 |
+
googlenews.set_period(days_to_fetch_news)
|
30 |
+
googlenews.get_news(stockticker)
|
31 |
+
news_json=googlenews.get_texts()
|
32 |
+
urls=googlenews.get_links()
|
33 |
+
|
34 |
+
no_of_news_articles_to_fetch = os.environ["NO_OF_NEWS_ARTICLES_TO_FETCH"]
|
35 |
+
news_article_list = []
|
36 |
+
counter = 0
|
37 |
+
for article in news_json:
|
38 |
+
|
39 |
+
if(counter >= int(no_of_news_articles_to_fetch)):
|
40 |
+
break
|
41 |
+
|
42 |
+
relevant_info = {
|
43 |
+
'News_Article': article,
|
44 |
+
'URL': urls[counter]
|
45 |
+
}
|
46 |
+
news_article_list.append(relevant_info)
|
47 |
+
counter+=1
|
48 |
+
return news_article_list
|
49 |
+
|
50 |
+
def fetch_reddit_news(cryptocurrencyticker):
|
51 |
+
load_dotenv()
|
52 |
+
REDDIT_USER_AGENT= os.environ["REDDIT_USER_AGENT"]
|
53 |
+
REDDIT_CLIENT_ID= os.environ["REDDIT_CLIENT_ID"]
|
54 |
+
REDDIT_CLIENT_SECRET= os.environ["REDDIT_CLIENT_SECRET"]
|
55 |
+
#https://medium.com/geekculture/a-complete-guide-to-web-scraping-reddit-with-python-16e292317a52
|
56 |
+
user_agent = REDDIT_USER_AGENT
|
57 |
+
reddit = praw.Reddit (
|
58 |
+
client_id= REDDIT_CLIENT_ID,
|
59 |
+
client_secret= REDDIT_CLIENT_SECRET,
|
60 |
+
user_agent=user_agent
|
61 |
+
)
|
62 |
+
|
63 |
+
headlines = set ( )
|
64 |
+
for submission in reddit.subreddit('CryptoCurrencyTrading').search(cryptocurrencyticker,time_filter='week'):
|
65 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
66 |
+
|
67 |
+
if len(headlines)<10:
|
68 |
+
for submission in reddit.subreddit('CryptoCurrencyTrading').search(cryptocurrencyticker,time_filter='year'):
|
69 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
70 |
+
if len(headlines)<10:
|
71 |
+
for submission in reddit.subreddit('CryptoCurrencyTrading').search(cryptocurrencyticker): #,time_filter='week'):
|
72 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
73 |
+
|
74 |
+
# coinbase
|
75 |
+
for submission in reddit.subreddit('CoinBase').search(cryptocurrencyticker,time_filter='week'):
|
76 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
77 |
+
|
78 |
+
if len(headlines)<10:
|
79 |
+
for submission in reddit.subreddit('CoinBase').search(cryptocurrencyticker,time_filter='year'):
|
80 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
81 |
+
if len(headlines)<10:
|
82 |
+
for submission in reddit.subreddit('CoinBase').search(cryptocurrencyticker): #,time_filter='week'):
|
83 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
84 |
+
|
85 |
+
# coingecko
|
86 |
+
for submission in reddit.subreddit('coingecko').search(cryptocurrencyticker,time_filter='week'):
|
87 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
88 |
+
|
89 |
+
if len(headlines)<10:
|
90 |
+
for submission in reddit.subreddit('coingecko').search(cryptocurrencyticker,time_filter='year'):
|
91 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
92 |
+
if len(headlines)<10:
|
93 |
+
for submission in reddit.subreddit('coingecko').search(cryptocurrencyticker): #,time_filter='week'):
|
94 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
95 |
+
|
96 |
+
# crypto
|
97 |
+
for submission in reddit.subreddit('CryptoCurrency').search(cryptocurrencyticker,time_filter='week'):
|
98 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
99 |
+
|
100 |
+
if len(headlines)<10:
|
101 |
+
for submission in reddit.subreddit('CryptoCurrency').search(cryptocurrencyticker,time_filter='year'):
|
102 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
103 |
+
if len(headlines)<10:
|
104 |
+
for submission in reddit.subreddit('CryptoCurrency').search(cryptocurrencyticker): #,time_filter='week'):
|
105 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
106 |
+
|
107 |
+
# shitcoin
|
108 |
+
for submission in reddit.subreddit('ShitcoinCentral').search(cryptocurrencyticker,time_filter='week'):
|
109 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
110 |
+
|
111 |
+
if len(headlines)<10:
|
112 |
+
for submission in reddit.subreddit('ShitcoinCentral').search(cryptocurrencyticker,time_filter='year'):
|
113 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
114 |
+
if len(headlines)<10:
|
115 |
+
for submission in reddit.subreddit('ShitcoinCentral').search(cryptocurrencyticker): #,time_filter='week'):
|
116 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
117 |
+
|
118 |
+
# shitcoin
|
119 |
+
for submission in reddit.subreddit('shitcoinmoonshots').search(cryptocurrencyticker,time_filter='week'):
|
120 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
121 |
+
|
122 |
+
if len(headlines)<10:
|
123 |
+
for submission in reddit.subreddit('shitcoinmoonshots').search(cryptocurrencyticker,time_filter='year'):
|
124 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
125 |
+
if len(headlines)<10:
|
126 |
+
for submission in reddit.subreddit('shitcoinmoonshots').search(cryptocurrencyticker): #,time_filter='week'):
|
127 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
128 |
+
|
129 |
+
# solana
|
130 |
+
for submission in reddit.subreddit('solana').search(cryptocurrencyticker,time_filter='week'):
|
131 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
132 |
+
|
133 |
+
if len(headlines)<10:
|
134 |
+
for submission in reddit.subreddit('solana').search(cryptocurrencyticker,time_filter='year'):
|
135 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
136 |
+
if len(headlines)<10:
|
137 |
+
for submission in reddit.subreddit('solana').search(cryptocurrencyticker): #,time_filter='week'):
|
138 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
139 |
+
|
140 |
+
return headlines
|
141 |
+
|
142 |
+
def analyze_sentiment(article):
|
143 |
+
"""
|
144 |
+
Analyzes the sentiment of a given news article.
|
145 |
+
|
146 |
+
Args:
|
147 |
+
- news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
148 |
+
|
149 |
+
Returns:
|
150 |
+
- dict: A dictionary containing sentiment analysis results.
|
151 |
+
"""
|
152 |
+
|
153 |
+
#Analyze sentiment using default model
|
154 |
+
#classifier = pipeline('sentiment-analysis')
|
155 |
+
|
156 |
+
#Analyze sentiment using specific model
|
157 |
+
classifier = pipeline(model='mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis')
|
158 |
+
sentiment_result = classifier(str(article))
|
159 |
+
|
160 |
+
analysis_result = {
|
161 |
+
'News_Article': article,
|
162 |
+
'Sentiment': sentiment_result
|
163 |
+
}
|
164 |
+
|
165 |
+
return analysis_result
|
166 |
+
|
167 |
+
|
168 |
+
def generate_summary_of_sentiment(sentiment_analysis_results):
|
169 |
+
|
170 |
+
|
171 |
+
news_article_sentiment = str(sentiment_analysis_results)
|
172 |
+
print("News article sentiment : " + news_article_sentiment)
|
173 |
+
|
174 |
+
|
175 |
+
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"]
|
176 |
+
model = ChatOpenAI(
|
177 |
+
model="gpt-4o",
|
178 |
+
temperature=0,
|
179 |
+
max_tokens=None,
|
180 |
+
timeout=None,
|
181 |
+
max_retries=2,
|
182 |
+
# api_key="...", # if you prefer to pass api key in directly instaed of using env vars
|
183 |
+
# base_url="...",
|
184 |
+
# organization="...",
|
185 |
+
# other params...
|
186 |
+
)
|
187 |
+
|
188 |
+
messages=[
|
189 |
+
{"role": "system", "content": "You are a helpful assistant that looks at all news articles with their sentiment, hyperlink and date in front of the article text, the articles MUST be ordered by date!, and generate a summary rationalizing dominant sentiment. At the end of the summary, add URL links with dates for all the articles in the markdown format for streamlit. Make sure the articles as well as the links are ordered descending by Date!!!!!!! Example of adding the URLs: The Check out the links: [link](%s) % url, 2024-03-01. "},
|
190 |
+
{"role": "user", "content": f"News articles and their sentiments: {news_article_sentiment}"}
|
191 |
+
]
|
192 |
+
response = model.invoke(messages)
|
193 |
+
|
194 |
+
|
195 |
+
summary = response.content
|
196 |
+
print ("+++++++++++++++++++++++++++++++++++++++++++++++")
|
197 |
+
print(summary)
|
198 |
+
print ("+++++++++++++++++++++++++++++++++++++++++++++++")
|
199 |
+
return summary
|
200 |
+
|
201 |
+
|
202 |
+
def plot_sentiment_graph(sentiment_analysis_results):
|
203 |
+
"""
|
204 |
+
Plots a sentiment analysis graph
|
205 |
+
|
206 |
+
Args:
|
207 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'Review Title : Summary', 'Rating', and 'Sentiment' keys.
|
208 |
+
|
209 |
+
Returns:
|
210 |
+
- dict: A dictionary containing sentiment analysis results.
|
211 |
+
"""
|
212 |
+
df = pd.DataFrame(sentiment_analysis_results)
|
213 |
+
print(df)
|
214 |
+
|
215 |
+
#Group by Rating, sentiment value count
|
216 |
+
grouped = df['Sentiment'].value_counts()
|
217 |
+
|
218 |
+
sentiment_counts = df['Sentiment'].value_counts()
|
219 |
+
|
220 |
+
# Plotting pie chart
|
221 |
+
# fig = plt.figure(figsize=(5, 3))
|
222 |
+
# plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140)
|
223 |
+
# plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
|
224 |
+
|
225 |
+
#Open below when u running this program locally and c
|
226 |
+
#plt.show()
|
227 |
+
|
228 |
+
return sentiment_counts
|
229 |
+
|
230 |
+
|
231 |
+
def get_dominant_sentiment (sentiment_analysis_results):
|
232 |
+
"""
|
233 |
+
Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
|
234 |
+
|
235 |
+
Args:
|
236 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
237 |
+
|
238 |
+
Returns:
|
239 |
+
- dict: A dictionary containing sentiment analysis results.
|
240 |
+
"""
|
241 |
+
df = pd.DataFrame(sentiment_analysis_results)
|
242 |
+
|
243 |
+
# Group by the 'sentiment' column and count the occurrences of each sentiment value
|
244 |
+
sentiment_counts = df['Sentiment'].value_counts().reset_index()
|
245 |
+
sentiment_counts.columns = ['sentiment', 'count']
|
246 |
+
print(sentiment_counts)
|
247 |
+
|
248 |
+
# Find the sentiment with the highest count
|
249 |
+
dominant_sentiment = sentiment_counts.loc[sentiment_counts['count'].idxmax()]
|
250 |
+
|
251 |
+
return dominant_sentiment['sentiment']
|
252 |
+
|
253 |
+
#starting point of the program
|
254 |
+
if __name__ == '__main__':
|
255 |
+
|
256 |
+
#fetch stock news
|
257 |
+
news_articles = fetch_news('AAPL')
|
258 |
+
|
259 |
+
analysis_results = []
|
260 |
+
|
261 |
+
#Perform sentiment analysis for each product review
|
262 |
+
for article in news_articles:
|
263 |
+
sentiment_analysis_result = analyze_sentiment(article['News_Article'])
|
264 |
+
|
265 |
+
# Display sentiment analysis results
|
266 |
+
print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n')
|
267 |
+
|
268 |
+
result = {
|
269 |
+
'News_Article': sentiment_analysis_result["News_Article"],
|
270 |
+
'Sentiment': sentiment_analysis_result["Sentiment"][0]['label']
|
271 |
+
}
|
272 |
+
|
273 |
+
analysis_results.append(result)
|
274 |
+
|
275 |
+
|
276 |
+
#Graph dominant sentiment based on sentiment analysis data of reviews
|
277 |
+
dominant_sentiment = get_dominant_sentiment(analysis_results)
|
278 |
+
print(dominant_sentiment)
|
279 |
+
|
280 |
+
#Plot graph
|
281 |
+
plot_sentiment_graph(analysis_results)
|
282 |
+
|
tools/data_analyst.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, Field
|
2 |
+
from langchain.tools import BaseTool
|
3 |
+
from typing import Optional, Type
|
4 |
+
from langchain.tools import StructuredTool
|
5 |
+
import yfinance as yf
|
6 |
+
from typing import List
|
7 |
+
from datetime import datetime,timedelta
|
8 |
+
from pycoingecko import CoinGeckoAPI
|
9 |
+
cg = CoinGeckoAPI()
|
10 |
+
|
11 |
+
def data_analyst_tools():
|
12 |
+
def get_crypto_price(cryptocurrencyticker: str) -> str:
|
13 |
+
current_data=cg.get_price(ids=cryptocurrencyticker, vs_currencies='usd',include_market_cap='true', include_24hr_vol='true',include_last_updated_at='true')
|
14 |
+
return str(current_data)
|
15 |
+
|
16 |
+
class CryptoPriceCheckInput(BaseModel):
|
17 |
+
"""Input for Crypto price check."""
|
18 |
+
Cryptoticker: str = Field(..., description="Ticker symbol for Crypto or index")
|
19 |
+
|
20 |
+
class CryptoPriceTool(BaseTool):
|
21 |
+
name = "get_crypto_price"
|
22 |
+
description = "Useful for when you need to find out the price of Cryptocurrency. You should input the Crypto ticker used on the Coingecko API"
|
23 |
+
"""Input for Cryptocurrency price check."""
|
24 |
+
Cryptoticker: str = Field(..., description="Ticker symbol for Crypto or index")
|
25 |
+
def _run(self, Cryptoticker: str):
|
26 |
+
# print("i'm running")
|
27 |
+
price_response = get_crypto_price(Cryptoticker)
|
28 |
+
|
29 |
+
return str(price_response)
|
30 |
+
|
31 |
+
def _arun(self, Cryptoticker: str):
|
32 |
+
raise NotImplementedError("This tool does not support async")
|
33 |
+
args_schema: Optional[Type[BaseModel]] = CryptoPriceCheckInput
|
34 |
+
|
35 |
+
|
36 |
+
tools_data_analyst = [StructuredTool.from_function(
|
37 |
+
func=CryptoPriceTool,
|
38 |
+
args_schema=CryptoPriceCheckInput,
|
39 |
+
description="Function to get current Crypto prices.",
|
40 |
+
),
|
41 |
+
]
|
42 |
+
return tools_data_analyst
|
tools/df_history.csv
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,stockticker
|
2 |
+
2024-04-22 00:00:00-04:00,399.3596471827562,402.1246793258712,395.03745670124425,400.2380676269531,20286900,0.0,0.0,MSFT
|
3 |
+
2024-04-23 00:00:00-04:00,403.51216021293357,407.4650522060062,402.33429210205253,406.836181640625,15734500,0.0,0.0,MSFT
|
4 |
+
2024-04-24 00:00:00-04:00,408.82258607970806,411.72735028943725,406.0475926794115,408.323486328125,15065300,0.0,0.0,MSFT
|
5 |
+
2024-04-25 00:00:00-04:00,393.32054400787314,399.1700088625431,387.3313470651067,398.321533203125,40586500,0.0,0.0,MSFT
|
6 |
+
2024-04-26 00:00:00-04:00,411.4279132786848,412.25640548421114,405.02945064216703,405.58843994140625,29694700,0.0,0.0,MSFT
|
7 |
+
2024-04-29 00:00:00-04:00,404.52035539531056,405.5884361925186,398.4712687423875,401.5257568359375,19582100,0.0,0.0,MSFT
|
8 |
+
2024-04-30 00:00:00-04:00,400.76710737423014,401.4359144425664,388.4693126899027,388.6289978027344,28781400,0.0,0.0,MSFT
|
9 |
+
2024-05-01 00:00:00-04:00,391.9030904630616,400.9967037344784,389.6072438016868,394.2289123535156,23562500,0.0,0.0,MSFT
|
10 |
+
2024-05-02 00:00:00-04:00,396.94401914412265,399.20992105581087,393.9394288835304,397.1236877441406,17709400,0.0,0.0,MSFT
|
11 |
+
2024-05-03 00:00:00-04:00,401.55570709720826,406.4169339510819,401.13644988960164,405.9278259277344,17446700,0.0,0.0,MSFT
|
12 |
+
2024-05-06 00:00:00-04:00,408.024048156178,413.1847226485525,405.63833666603693,412.7954406738281,16996600,0.0,0.0,MSFT
|
13 |
+
2024-05-07 00:00:00-04:00,413.91342570011614,413.92341744357753,408.35344694069664,408.6029968261719,20018200,0.0,0.0,MSFT
|
14 |
+
2024-05-08 00:00:00-04:00,407.4351142805277,411.48780192255407,405.97772103822234,409.80084228515625,11792300,0.0,0.0,MSFT
|
15 |
+
2024-05-09 00:00:00-04:00,409.8307875446534,411.97691043744567,408.363433019907,411.57763671875,14689700,0.0,0.0,MSFT
|
16 |
+
2024-05-10 00:00:00-04:00,412.1965086797442,414.6321179246016,411.05854661467066,413.9932556152344,13402300,0.0,0.0,MSFT
|
17 |
+
2024-05-13 00:00:00-04:00,417.2573820335048,417.5967662074119,410.08032520369875,412.97509765625,15440200,0.0,0.0,MSFT
|
18 |
+
2024-05-14 00:00:00-04:00,411.2781631216723,416.73831581889846,410.8090081198034,415.80999755859375,15109300,0.0,0.0,MSFT
|
19 |
+
2024-05-15 00:00:00-04:00,417.8999938964844,423.80999755859375,417.2699890136719,423.0799865722656,22239500,0.75,0.0,MSFT
|
20 |
+
2024-05-16 00:00:00-04:00,421.79998779296875,425.4200134277344,420.3500061035156,420.989990234375,17530100,0.0,0.0,MSFT
|
21 |
+
2024-05-17 00:00:00-04:00,422.5400085449219,422.9200134277344,418.0299987792969,420.2099914550781,15352200,0.0,0.0,MSFT
|
22 |
+
2024-05-20 00:00:00-04:00,420.2099914550781,426.7699890136719,419.989990234375,425.3399963378906,16272100,0.0,0.0,MSFT
|
23 |
+
2024-05-21 00:00:00-04:00,426.8299865722656,432.9700012207031,424.8500061035156,429.0400085449219,21453300,0.0,0.0,MSFT
|
24 |
+
2024-05-22 00:00:00-04:00,430.0899963378906,432.4100036621094,427.1300048828125,430.5199890136719,18073700,0.0,0.0,MSFT
|
25 |
+
2024-05-23 00:00:00-04:00,432.9700012207031,433.6000061035156,425.4200134277344,427.0,17211700,0.0,0.0,MSFT
|
26 |
+
2024-05-24 00:00:00-04:00,427.19000244140625,431.05999755859375,424.4100036621094,430.1600036621094,11845800,0.0,0.0,MSFT
|
27 |
+
2024-05-28 00:00:00-04:00,429.6300048828125,430.82000732421875,426.6000061035156,430.32000732421875,15718000,0.0,0.0,MSFT
|
28 |
+
2024-05-29 00:00:00-04:00,425.69000244140625,430.94000244140625,425.69000244140625,429.1700134277344,15517100,0.0,0.0,MSFT
|
29 |
+
2024-05-30 00:00:00-04:00,424.29998779296875,424.29998779296875,414.239990234375,414.6700134277344,28424800,0.0,0.0,MSFT
|
30 |
+
2024-05-31 00:00:00-04:00,416.75,416.75,404.510009765625,415.1300048828125,47995300,0.0,0.0,MSFT
|
31 |
+
2024-06-03 00:00:00-04:00,415.5299987792969,416.42999267578125,408.9200134277344,413.5199890136719,17484700,0.0,0.0,MSFT
|
32 |
+
2024-06-04 00:00:00-04:00,412.42999267578125,416.44000244140625,409.67999267578125,416.07000732421875,14348900,0.0,0.0,MSFT
|
33 |
+
2024-06-05 00:00:00-04:00,417.80999755859375,424.0799865722656,416.29998779296875,424.010009765625,16988000,0.0,0.0,MSFT
|
34 |
+
2024-06-06 00:00:00-04:00,424.010009765625,425.30999755859375,420.5799865722656,424.5199890136719,14861300,0.0,0.0,MSFT
|
35 |
+
2024-06-07 00:00:00-04:00,426.20001220703125,426.2799987792969,423.0,423.8500061035156,13621700,0.0,0.0,MSFT
|
36 |
+
2024-06-10 00:00:00-04:00,424.70001220703125,428.0799865722656,423.8900146484375,427.8699951171875,14003000,0.0,0.0,MSFT
|
37 |
+
2024-06-11 00:00:00-04:00,425.4800109863281,432.82000732421875,425.25,432.67999267578125,14551100,0.0,0.0,MSFT
|
38 |
+
2024-06-12 00:00:00-04:00,435.32000732421875,443.3999938964844,433.25,441.05999755859375,22366200,0.0,0.0,MSFT
|
39 |
+
2024-06-13 00:00:00-04:00,440.8500061035156,443.3900146484375,439.3699951171875,441.5799865722656,15960600,0.0,0.0,MSFT
|
40 |
+
2024-06-14 00:00:00-04:00,438.2799987792969,443.1400146484375,436.7200012207031,442.57000732421875,13582000,0.0,0.0,MSFT
|
41 |
+
2024-06-17 00:00:00-04:00,442.5899963378906,450.94000244140625,440.7200012207031,448.3699951171875,20790000,0.0,0.0,MSFT
|
42 |
+
2024-06-18 00:00:00-04:00,449.7099914550781,450.1400146484375,444.8900146484375,446.3399963378906,17112500,0.0,0.0,MSFT
|
43 |
+
2024-06-20 00:00:00-04:00,446.29998779296875,446.5299987792969,441.2699890136719,445.70001220703125,19877400,0.0,0.0,MSFT
|
44 |
+
2024-06-21 00:00:00-04:00,447.3800048828125,450.5799865722656,446.510009765625,449.7799987792969,34486200,0.0,0.0,MSFT
|
45 |
+
2024-06-24 00:00:00-04:00,449.79998779296875,452.75,446.4100036621094,447.6700134277344,15913700,0.0,0.0,MSFT
|
46 |
+
2024-06-25 00:00:00-04:00,448.25,451.4200134277344,446.75,450.95001220703125,16747500,0.0,0.0,MSFT
|
47 |
+
2024-06-26 00:00:00-04:00,449.0,453.6000061035156,448.19000244140625,452.1600036621094,16507000,0.0,0.0,MSFT
|
48 |
+
2024-06-27 00:00:00-04:00,452.17999267578125,456.1700134277344,451.7699890136719,452.8500061035156,14806300,0.0,0.0,MSFT
|
49 |
+
2024-06-28 00:00:00-04:00,453.07000732421875,455.3800048828125,446.4100036621094,446.95001220703125,28362300,0.0,0.0,MSFT
|
50 |
+
2024-07-01 00:00:00-04:00,448.6600036621094,457.3699951171875,445.6600036621094,456.7300109863281,17662800,0.0,0.0,MSFT
|
51 |
+
2024-07-02 00:00:00-04:00,453.20001220703125,459.5899963378906,453.1099853515625,459.2799987792969,13979800,0.0,0.0,MSFT
|
52 |
+
2024-07-03 00:00:00-04:00,458.19000244140625,461.0199890136719,457.8800048828125,460.7699890136719,9932800,0.0,0.0,MSFT
|
53 |
+
2024-07-05 00:00:00-04:00,459.6099853515625,468.3500061035156,458.9700012207031,467.55999755859375,16000300,0.0,0.0,MSFT
|
54 |
+
2024-07-08 00:00:00-04:00,466.54998779296875,467.70001220703125,464.4599914550781,466.239990234375,12962300,0.0,0.0,MSFT
|
55 |
+
2024-07-09 00:00:00-04:00,467.0,467.3299865722656,458.0,459.5400085449219,17207200,0.0,0.0,MSFT
|
56 |
+
2024-07-10 00:00:00-04:00,461.2200012207031,466.4599914550781,458.8599853515625,466.25,18196100,0.0,0.0,MSFT
|
57 |
+
2024-07-11 00:00:00-04:00,462.9800109863281,464.7799987792969,451.54998779296875,454.70001220703125,23111200,0.0,0.0,MSFT
|
58 |
+
2024-07-12 00:00:00-04:00,454.3299865722656,456.3599853515625,450.6499938964844,453.54998779296875,16311300,0.0,0.0,MSFT
|
59 |
+
2024-07-15 00:00:00-04:00,453.29998779296875,457.260009765625,451.42999267578125,453.9599914550781,14429400,0.0,0.0,MSFT
|
60 |
+
2024-07-16 00:00:00-04:00,454.2200012207031,454.29998779296875,446.6600036621094,449.5199890136719,17175700,0.0,0.0,MSFT
|
61 |
+
2024-07-17 00:00:00-04:00,442.5899963378906,444.8500061035156,439.17999267578125,443.5199890136719,21778000,0.0,0.0,MSFT
|
62 |
+
2024-07-18 00:00:00-04:00,444.3399963378906,444.6499938964844,434.3999938964844,440.3699951171875,20794800,0.0,0.0,MSFT
|
63 |
+
2024-07-19 00:00:00-04:00,433.1000061035156,441.1400146484375,432.0,437.1099853515625,20862400,0.0,0.0,MSFT
|
tools/df_with_forecast.csv
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,id,prices,market_caps,total_vol,CLI,CPI,Employment,id,google_trend,GSPC,GC=F,EURUSD,TNX
|
2 |
+
2023-01-31,,,,,107.893,,,,,,,,
|
3 |
+
2023-02-28,,,,,,,,,,,,,
|
4 |
+
2023-03-31,,,,,,,,,,,,,
|
5 |
+
2023-04-30,,,,,108.87,,,,,,,,
|
6 |
+
2023-05-31,,,,,,,,,,,,,
|
7 |
+
2023-06-30,,,,,,,,,,,,,
|
8 |
+
2023-07-31,,,,,108.931,,,,,,,,
|
9 |
+
2023-08-31,,,,,,,,,,,,,
|
10 |
+
2023-09-30,,,,,,,,,,,,,
|
11 |
+
2023-10-31,,,,,105.3285,,,,,,,,
|
12 |
+
2023-11-30,,,,,101.961,,,,,,,,
|
13 |
+
2023-12-31,,,,,76.021485,,,,,,,,
|
14 |
+
2024-01-01,,,,,,308.417,161152.0,,,,,,
|
15 |
+
2024-01-31,,,,,82.796104,,,,,,,,
|
16 |
+
2024-02-01,,,,,,310.326,160968.0,,,,,,
|
17 |
+
2024-02-29,,,,,50.859505000000006,,,,,,,,
|
18 |
+
2024-03-01,,,,,,312.332,161466.0,,,,,,
|
19 |
+
2024-03-31,,,,,71.54424900000001,,,,,,,,
|
20 |
+
2024-04-01,,,,,,313.548,161491.0,,,,,,
|
21 |
+
2024-04-30,,,,,70.99926128571428,,,,,,,,
|
22 |
+
2024-05-01,,,,,,314.069,161083.0,,,,,,
|
23 |
+
2024-05-06,ethereum,3114.4007005303224,374072475993.8121,11127068946.811003,,,,,,5142.419921875,2322.800048828125,1.0758124589920044,4.486999988555908
|
24 |
+
2024-05-07,ethereum,3062.1337546278614,367763583617.18,12212682358.435501,,,,,,5187.2001953125,2324.300048828125,1.0767507553100586,4.4730000495910645
|
25 |
+
2024-05-08,ethereum,2999.4869525045415,360243594935.94305,11179447639.771477,,,,,,5168.97998046875,2313.60009765625,1.0750491619110107,4.484000205993652
|
26 |
+
2024-05-09,ethereum,3003.5642861321066,360831840100.2795,10896607994.801586,,,,,,5189.02978515625,2310.699951171875,1.0746910572052002,4.515999794006348
|
27 |
+
2024-05-10,ethereum,2966.441885585809,356315503171.5778,11384672900.217531,,,,,,5225.490234375,2367.300048828125,1.078515887260437,4.4770002365112305
|
28 |
+
2024-05-11,ethereum,2916.0617572305187,350195653103.13715,9198771437.235367,,,,,,,,,
|
29 |
+
2024-05-12,ethereum,2924.2519718055137,351264187115.9079,5460016379.689179,,,,,,,,,
|
30 |
+
2024-05-13,ethereum,2945.101626776707,353629048507.87177,11486452597.64263,,,,,,5233.080078125,2358.300048828125,1.077040672302246,4.486999988555908
|
31 |
+
2024-05-14,ethereum,2900.6563297650755,348385655037.8947,11542903142.535707,,,,,,5221.10009765625,2336.0,1.079354166984558,4.480999946594238
|
32 |
+
2024-05-15,ethereum,2963.759315698215,355948495364.97504,11917611608.681692,,,,,,5263.259765625,2361.60009765625,1.0814552307128906,4.418000221252441
|
33 |
+
2024-05-16,ethereum,2973.3231927105603,357110705527.3933,12982589615.756212,,,,,,5310.06982421875,2389.5,1.0889805555343628,4.329999923706055
|
34 |
+
2024-05-17,ethereum,3055.768061630982,366942477718.2434,13075157085.833336,,,,,,5303.10009765625,2380.699951171875,1.0867793560028076,4.395999908447266
|
35 |
+
2024-05-18,ethereum,3116.6010226358244,374444822902.376,10245527976.19185,,,,,,,,,
|
36 |
+
2024-05-19,ethereum,3089.6899077803787,371102827471.68726,7186818177.12275,,,,,,,,,
|
37 |
+
2024-05-20,ethereum,3275.1726235398733,393064227949.9598,14861247848.577646,,,,,,5305.35009765625,2415.800048828125,1.0875475406646729,4.421999931335449
|
38 |
+
2024-05-21,ethereum,3736.779369147562,449277769707.8145,39114320451.96952,,,,,,5298.68994140625,2429.5,1.086082935333252,4.429999828338623
|
39 |
+
2024-05-22,ethereum,3743.6797798618477,449904838634.0898,27177492763.312775,,,,,,5319.27978515625,2417.5,1.0855170488357544,4.453000068664551
|
40 |
+
2024-05-23,ethereum,3802.195186351903,456440243805.41815,31490184022.903027,,,,,,5340.259765625,2371.199951171875,1.0825204849243164,4.418000221252441
|
41 |
+
2024-05-24,ethereum,3716.584005854382,446214223854.924,36595327650.52252,,,,,,5281.4501953125,2342.60009765625,1.0812681913375854,4.488999843597412
|
42 |
+
2024-05-25,ethereum,3747.9139284087246,450191345047.8288,12320451102.246416,,,,,,,,,
|
43 |
+
2024-05-26,ethereum,3822.9062956505663,459222383843.30835,10446532594.41313,,,,,,,,,
|
44 |
+
2024-05-27,ethereum,3904.4531708423287,468888345618.59467,15875563017.846474,,,,,,,,,
|
45 |
+
2024-05-28,ethereum,3850.4555712911824,462421783973.1804,16981798566.54435,,,,,,5315.91015625,2336.89990234375,1.0847634077072144,4.453000068664551
|
46 |
+
2024-05-29,ethereum,3790.3890178850775,455720009606.71265,16291087718.980705,,,,,,5278.72998046875,2340.300048828125,1.0861891508102417,4.565999984741211
|
47 |
+
2024-05-30,ethereum,3760.0562456291614,451909655831.15967,14241005007.401228,,,,,,5259.77001953125,2336.89990234375,1.0851871967315674,4.593999862670898
|
48 |
+
2024-05-31,ethereum,3766.348666657115,452613341252.82587,13233598539.854492,74.31149225,,,,,5243.2099609375,2344.10009765625,1.0801819562911987,4.552000045776367
|
49 |
+
2024-06-01,ethereum,3797.773661763607,456183752606.3188,10134684638.033493,,314.175,161199.0,,,,,,
|
50 |
+
2024-06-02,ethereum,3789.4643581084024,455395042781.10913,8704992695.338041,,,,,,,,,
|
51 |
+
2024-06-03,ethereum,3792.876939393309,455583728457.67914,11345758405.200346,,,,,,5297.14990234375,2322.60009765625,1.0835410356521606,4.4730000495910645
|
52 |
+
2024-06-04,ethereum,3791.003611630613,455454864609.9826,11583124383.448898,,,,,,5278.240234375,2347.5,1.0853049755096436,4.361000061035156
|
53 |
+
2024-06-05,ethereum,3826.985885858965,459614958452.3508,12970952878.62752,,,,,,5314.47998046875,2326.39990234375,1.0907385349273682,4.326000213623047
|
54 |
+
2024-06-06,ethereum,3831.1242540234693,460276696407.6153,12084914430.50892,,,,,,5357.7998046875,2355.0,1.0881866216659546,4.301000118255615
|
55 |
+
2024-06-07,ethereum,3759.215714495715,451768670762.0454,13410143841.10996,,,,,,5343.81005859375,2379.89990234375,1.087453007698059,4.296999931335449
|
56 |
+
2024-06-08,ethereum,3684.6113824590207,442729750197.81464,12328054405.91001,,,,,,,,,
|
57 |
+
2024-06-09,ethereum,3693.5688944666886,443758507298.5523,6434938580.374867,,,,,,,,,
|
58 |
+
2024-06-10,ethereum,3669.804414640858,441385340635.06006,7678615033.327232,,,,,,5341.22021484375,2290.60009765625,1.0894432067871094,4.453000068664551
|
59 |
+
2024-06-11,ethereum,3511.552216421044,421773678119.8096,16295040761.774801,,,,,,5353.0,2300.0,1.0778882503509521,4.435999870300293
|
60 |
+
2024-06-12,ethereum,3559.325489288785,427430429412.461,16146566925.235643,,,,,,5409.1298828125,2314.89990234375,1.0764262676239014,4.388000011444092
|
61 |
+
2024-06-13,ethereum,3487.56598378508,418921286187.6116,16102374829.54766,,,,,,5441.93017578125,2309.39990234375,1.0740330219268799,4.309999942779541
|
62 |
+
2024-06-14,ethereum,3477.487513961109,417925411247.93335,13751574508.884224,,,,,,5424.080078125,2307.0,1.0813149213790894,4.196000099182129
|
63 |
+
2024-06-15,ethereum,3547.2656444381705,426052557357.5137,14435000601.871336,,,,,,,,,
|
64 |
+
2024-06-16,ethereum,3581.554801676437,430252401136.91156,9265313569.748865,,,,,,,,,
|
65 |
+
2024-06-17,ethereum,3521.540033404755,423525318255.71875,14792637405.883486,,,,,,5431.10986328125,2320.199951171875,1.0738831758499146,4.264999866485596
|
66 |
+
2024-06-18,ethereum,3443.057015401169,420823437646.33813,20647300792.256817,,,,,,5476.14990234375,2311.800048828125,1.0702168941497803,4.2870001792907715
|
67 |
+
2024-06-19,ethereum,3548.6088199945443,433926242387.04596,17174770852.62574,,,,,,,,,
|
68 |
+
2024-06-20,ethereum,3544.840023868359,433416267378.3272,14431531886.666199,,,,,,5499.990234375,2328.89990234375,1.0740677118301392,4.23799991607666
|
69 |
+
2024-06-21,ethereum,3501.4844259897077,428154107816.4788,15591212529.32492,,,,,,5466.77001953125,2331.199951171875,1.0742292404174805,4.223999977111816
|
70 |
+
2024-06-22,ethereum,3498.401912499916,427797791720.6712,10074881877.914883,,,,,,,,,
|
71 |
+
2024-06-23,ethereum,3471.25661843264,424559563614.4485,6991292722.007789,,,,,,,,,
|
72 |
+
2024-06-24,ethereum,3330.9914608412787,405609291184.5871,18078022125.09135,,,,,,5459.580078125,2323.300048828125,1.0751532316207886,4.275000095367432
|
73 |
+
2024-06-25,ethereum,3388.6552364887525,407290827369.29706,17480780190.891705,,,,,,5460.72998046875,2324.39990234375,1.0706409215927124,4.2170000076293945
|
74 |
+
2024-06-26,ethereum,3379.6384972292053,406200008420.7241,11068675554.45744,,,,,,5460.7099609375,2307.89990234375,1.0687757730484009,4.284999847412109
|
75 |
+
2024-06-27,ethereum,3422.9987958226375,411321369785.6611,11381676501.223251,,,,,,5473.58984375,2296.800048828125,1.0732723474502563,4.327000141143799
|
76 |
+
2024-06-28,ethereum,3409.9899950655326,410020829397.1302,12108738591.606865,,,,,,5488.47998046875,2325.39990234375,1.0709619522094727,4.303999900817871
|
77 |
+
2024-06-29,ethereum,3382.1846820729265,406488247465.7656,8118319341.85326,,,,,,,,,
|
78 |
+
2024-06-30,ethereum,3405.653616859288,409202194429.17535,6586293113.04436,74.519282,,,,,,,,
|
79 |
+
2024-07-01,ethereum,3466.7870078346746,416633403003.7347,10972345968.355255,,,161266.0,ethereum,16.81,5471.080078125,2323.800048828125,1.068010926246643,4.423999786376953
|
80 |
+
2024-07-02,ethereum,3425.9867093877297,411805426012.3614,9347179358.150301,,,,ethereum,13.69,5461.83984375,2330.699951171875,1.0708472728729248,4.441999912261963
|
81 |
+
2024-07-03,ethereum,3305.863374711538,397336350754.1616,12936936280.4328,,,,ethereum,18.49,5507.43994140625,2330.89990234375,1.0735257863998413,4.426000118255615
|
82 |
+
2024-07-04,ethereum,3141.9137606677323,377789953332.375,19762357071.82109,,,,ethereum,18.49,,,,
|
83 |
+
2024-07-05,ethereum,2942.873381316971,353947760154.63116,30371642360.497025,,,,ethereum,33.64,5537.91015625,2354.89990234375,1.0737102031707764,4.330999851226807
|
84 |
+
2024-07-06,ethereum,3026.0221393875127,363716353821.9705,14095786002.87394,,,,ethereum,17.64,,,,
|
85 |
+
2024-07-07,ethereum,2980.329795932314,358196417772.35425,10109122874.859922,,,,ethereum,12.959999999999999,,,,
|
86 |
+
2024-07-08,ethereum,2995.42249679924,359848515705.2204,18656672090.05734,,,,ethereum,22.09,5572.75,2381.699951171875,1.0748412609100342,4.306000232696533
|
87 |
+
2024-07-09,ethereum,3067.945362448234,368790204798.86127,17265940071.85487,,,,ethereum,18.49,5584.240234375,2363.10009765625,1.0793308019638062,4.300000190734863
|
88 |
+
2024-07-10,ethereum,3103.355851261978,373109735757.489,14135312863.462542,,,,ethereum,18.49,5591.259765625,2366.300048828125,1.082602620124817,4.2779998779296875
|
89 |
+
2024-07-11,ethereum,3120.2511152159645,375003912446.32135,14601771425.99495,,,,ethereum,20.25,5635.2099609375,2378.699951171875,1.0830950736999512,4.288000106811523
|
90 |
+
2024-07-12,ethereum,3106.665060608468,373479677453.21234,13272564100.61316,,,,ethereum,14.44,5590.759765625,2399.800048828125,1.0814785957336426,4.205999851226807
|
91 |
+
2024-07-13,ethereum,3156.7229997808245,379336539640.4357,9099839817.153015,,,,ethereum,13.69,,,,
|
92 |
+
2024-07-14,ethereum,3214.951666958399,386450506355.89264,9399920281.501238,,,,ethereum,14.44,,,,
|
93 |
+
2024-07-15,ethereum,3390.5558977346586,407705698434.75323,14682856047.608225,,,,ethereum,36.0,5638.16015625,2430.0,1.083329677581787,4.236999988555908
|
94 |
+
2024-07-16,ethereum,3439.2531522603167,413315389170.71234,20249821643.173527,,,,ethereum,33.64,5644.08984375,2427.39990234375,1.0872756242752075,4.175000190734863
|
95 |
+
2024-07-17,ethereum,3445.9301630097125,414204244128.5122,18316846571.47861,,,,ethereum,29.160000000000004,5610.06982421875,2472.89990234375,1.0885539054870605,4.178999900817871
|
96 |
+
2024-07-18,ethereum,3428.483712881156,412151252125.53094,14880766047.388323,,,,ethereum,27.04,5608.56005859375,2466.0,1.0901559591293335,4.183000087738037
|
97 |
+
2024-07-19,ethereum,3459.866194068627,415830197917.89233,15874038814.582893,,,,ethereum,26.01,5543.3701171875,2418.800048828125,1.0902509689331055,4.224999904632568
|
98 |
+
2024-07-20,ethereum,3506.166559401983,421506391873.4467,12819209095.177147,,,,ethereum,24.009999999999998,,,,
|
99 |
+
2024-07-21,ethereum,3505.198785199442,421479666822.44586,9734735125.637077,,,,ethereum,24.009999999999998,,,,
|
100 |
+
2024-07-22,ethereum,3478.5398389521833,418248333345.1597,17165553086.473969,,,,ethereum,43.56,5544.5400390625,2402.10009765625,1.094020128250122,4.224999904632568
|
101 |
+
2024-07-23,ethereum,3471.4537045282686,417282646136.00616,22247562224.91563,,,,ethereum,100.0,5565.2998046875,2395.800048828125,1.0901559591293335,4.236999988555908
|
102 |
+
2024-07-24,ethereum,3389.34554552627,407555827602.13794,19555399218.051266,,,,ethereum,53.29,5505.83984375,2421.0,1.0899182558059692,4.229000091552734
|
103 |
+
2024-07-25,ethereum,3166.8674273376732,380757485120.10834,23309182458.392136,,,,ethereum,49.0,5428.7001953125,2365.5,1.0889805555343628,4.205999851226807
|
104 |
+
2024-07-26,ethereum,3256.836248537211,391618277681.34503,18543581673.430332,,,,ethereum,32.489999999999995,5433.669921875,2368.699951171875,1.0850694179534912,4.244999885559082
|
105 |
+
2024-07-27,ethereum,3268.634820892746,393058948301.01434,12350215232.854982,,,,ethereum,26.01,,,,
|
106 |
+
2024-07-28,ethereum,3261.358165252211,392191417408.00104,11791845044.016218,,,,ethereum,22.09,,,,
|
107 |
+
2024-07-29,ethereum,3339.1396964471446,401594098625.7107,13845546119.727053,,,,ethereum,30.250000000000004,5476.5498046875,2377.300048828125,1.0839520692825317,4.163000106811523
|
108 |
+
2024-07-30,ethereum,3307.2248771443924,397741773052.1379,15364998924.01399,,,,ethereum,21.16,5478.72998046875,2380.89990234375,1.0851283073425293,4.173999786376953
|
109 |
+
2024-07-31,ethereum,3281.768911864367,394601659805.1198,14197119142.220865,99.00630000000001,,,ethereum,19.36,5505.58984375,2407.10009765625,1.0853756666183472,4.127999782562256
|
110 |
+
2024-08-01,ethereum,3164.2867044555733,380493843585.9209,19172882534.0517,,,,ethereum,45.0,5537.83984375,2446.699951171875,1.081946611404419,4.052999973297119
|
111 |
+
2024-08-02,ethereum,3067.1887722202587,368925501105.263,19531566013.812,,,,ethereum,37.309999999999995,5376.6298828125,2490.800048828125,1.0816072225570679,3.940000057220459
|
112 |
+
2024-08-03,ethereum,2946.763174245429,354325094002.6969,19461236907.28371,,,,ethereum,35.6,,,,
|
113 |
+
2024-08-04,ethereum,2911.61,350078479982.3696,13827666339.828363,,,,ethereum,40.42,,,,
|
tools/stock_sentiment_evalutor.py
ADDED
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
from alpaca_trade_api import REST
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from datetime import datetime
|
6 |
+
import pandas as pd
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from datetime import date, timedelta
|
9 |
+
from pydantic import BaseModel, Field
|
10 |
+
from langchain.tools import BaseTool
|
11 |
+
from typing import Optional, Type
|
12 |
+
from langchain.tools import StructuredTool
|
13 |
+
|
14 |
+
|
15 |
+
def sentimental_analysis_tools():
|
16 |
+
|
17 |
+
class AlpacaNewsFetcher:
|
18 |
+
"""
|
19 |
+
A class for fetching news articles related to a specific stock from Alpaca API.
|
20 |
+
|
21 |
+
Attributes:
|
22 |
+
- api_key (str): Alpaca API key for authentication.
|
23 |
+
- api_secret (str): Alpaca API secret for authentication.
|
24 |
+
- rest_client (alpaca_trade_api.REST): Alpaca REST API client.
|
25 |
+
"""
|
26 |
+
|
27 |
+
def __init__(self):
|
28 |
+
"""
|
29 |
+
Initializes the AlpacaNewsFetcher object.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
- api_key (str): Alpaca API key for authentication.
|
33 |
+
- api_secret (str): Alpaca API secret for authentication.
|
34 |
+
"""
|
35 |
+
load_dotenv()
|
36 |
+
self.api_key = os.environ["ALPACA_API_KEY"]
|
37 |
+
self.api_secret = os.environ["ALPACA_SECRET"]
|
38 |
+
self.rest_client = REST(self.api_key, self.api_secret)
|
39 |
+
|
40 |
+
#No of news articles to fetch for the input stock ticker.
|
41 |
+
self.no_of_newsarticles_to_fetch = os.environ["NO_OF_NEWSARTICLES_TO_FETCH"]
|
42 |
+
|
43 |
+
#No of days to fetch news articles for
|
44 |
+
self.no_of_days = os.environ["NO_OF_DAYS_TO_FETCH_NEWS_ARTICLES"]
|
45 |
+
|
46 |
+
|
47 |
+
def fetch_news(self, stockticker):
|
48 |
+
"""
|
49 |
+
Fetches news articles for a given stock symbol within a specified date range.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
- stockticker (str): Stock symbol for which news articles are to be fetched (e.g., "AAPL").
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
- list: A list of dictionaries containing relevant information for each news article.
|
56 |
+
"""
|
57 |
+
|
58 |
+
#Date range for which to get the news
|
59 |
+
start_date = date.today()
|
60 |
+
end_date = date.today() - timedelta(self.no_of_days)
|
61 |
+
|
62 |
+
news_articles = self.rest_client.get_news(stockticker, start_date, end_date, limit=self.no_of_newsarticles_to_fetch )
|
63 |
+
formatted_news = []
|
64 |
+
|
65 |
+
for article in news_articles:
|
66 |
+
summary = article.summary
|
67 |
+
title = article.headline
|
68 |
+
timestamp = article.created_at
|
69 |
+
|
70 |
+
relevant_info = {
|
71 |
+
'timestamp': timestamp,
|
72 |
+
'title': title,
|
73 |
+
'summary': summary
|
74 |
+
}
|
75 |
+
|
76 |
+
formatted_news.append(relevant_info)
|
77 |
+
|
78 |
+
return formatted_news
|
79 |
+
|
80 |
+
|
81 |
+
class NewsSentimentAnalysis:
|
82 |
+
"""
|
83 |
+
A class for sentiment analysis of news articles using the Transformers library.
|
84 |
+
|
85 |
+
Attributes:
|
86 |
+
- classifier (pipeline): Sentiment analysis pipeline from Transformers.
|
87 |
+
"""
|
88 |
+
|
89 |
+
def __init__(self):
|
90 |
+
"""
|
91 |
+
Initializes the NewsSentimentAnalysis object.
|
92 |
+
"""
|
93 |
+
self.classifier = pipeline('sentiment-analysis')
|
94 |
+
|
95 |
+
|
96 |
+
def analyze_sentiment(self, news_article):
|
97 |
+
"""
|
98 |
+
Analyzes the sentiment of a given news article.
|
99 |
+
|
100 |
+
Args:
|
101 |
+
- news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
102 |
+
|
103 |
+
Returns:
|
104 |
+
- dict: A dictionary containing sentiment analysis results.
|
105 |
+
"""
|
106 |
+
summary = news_article['summary']
|
107 |
+
title = news_article['title']
|
108 |
+
timestamp = news_article['timestamp']
|
109 |
+
|
110 |
+
relevant_text = summary + title
|
111 |
+
sentiment_result = self.classifier(relevant_text)
|
112 |
+
|
113 |
+
analysis_result = {
|
114 |
+
'timestamp': timestamp,
|
115 |
+
'title': title,
|
116 |
+
'summary': summary,
|
117 |
+
'sentiment': sentiment_result
|
118 |
+
}
|
119 |
+
|
120 |
+
return analysis_result
|
121 |
+
|
122 |
+
def plot_sentiment_graph(self, sentiment_analysis_result):
|
123 |
+
"""
|
124 |
+
Plots a sentiment analysis graph
|
125 |
+
|
126 |
+
Args:
|
127 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
128 |
+
|
129 |
+
Returns:
|
130 |
+
- dict: A dictionary containing sentiment analysis results.
|
131 |
+
"""
|
132 |
+
df = pd.DataFrame(sentiment_analysis_result)
|
133 |
+
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
|
134 |
+
df['Date'] = df['Timestamp'].dt.date
|
135 |
+
|
136 |
+
#Group by Date, sentiment value count
|
137 |
+
grouped = df.groupby(by='Date')['Sentiment'].value_counts()
|
138 |
+
|
139 |
+
grouped.plot.pie()
|
140 |
+
|
141 |
+
def get_dominant_sentiment (self, sentiment_analysis_result):
|
142 |
+
"""
|
143 |
+
Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
|
144 |
+
|
145 |
+
Args:
|
146 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
147 |
+
|
148 |
+
Returns:
|
149 |
+
- dict: A dictionary containing sentiment analysis results.
|
150 |
+
"""
|
151 |
+
df = pd.DataFrame(sentiment_analysis_result)
|
152 |
+
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
|
153 |
+
df['Date'] = df['Timestamp'].dt.date
|
154 |
+
|
155 |
+
#Group by Date, sentiment value count
|
156 |
+
grouped = df.groupby(by='Date')['Sentiment'].value_counts()
|
157 |
+
df = pd.DataFrame(list(grouped.items()), columns=['Sentiment', 'count'])
|
158 |
+
df['date'] = df['Sentiment'].apply(lambda x: x[0])
|
159 |
+
df['sentiment'] = df['Sentiment'].apply(lambda x: x[1])
|
160 |
+
df.drop('Sentiment', axis=1, inplace=True)
|
161 |
+
result = df.groupby('sentiment')['count'].sum().reset_index()
|
162 |
+
|
163 |
+
# Determine the sentiment with the most count
|
164 |
+
dominant_sentiment = result.loc[result['count'].idxmax()]
|
165 |
+
|
166 |
+
return dominant_sentiment
|
167 |
+
|
168 |
+
|
169 |
+
#Function to get the stock sentiment
|
170 |
+
def get_stock_sentiment(stockticker: str):
|
171 |
+
|
172 |
+
#Initialize AlpacaNewsFetcher, a class for fetching news articles related to a specific stock from Alpaca API.
|
173 |
+
news_fetcher = AlpacaNewsFetcher()
|
174 |
+
|
175 |
+
|
176 |
+
# Fetch news (contains - title of the news, timestamp and summary) for specified stocksticker
|
177 |
+
news_data = news_fetcher.fetch_news(stockticker)
|
178 |
+
|
179 |
+
# Initialize the NewsSentimentAnalysis object
|
180 |
+
news_sentiment_analyzer = NewsSentimentAnalysis()
|
181 |
+
analysis_result = []
|
182 |
+
|
183 |
+
# Assume 'news_data' is a list of news articles (each as a dictionary), analyze sentiment of each news
|
184 |
+
for article in news_data:
|
185 |
+
sentiment_analysis_result = news_sentiment_analyzer.analyze_sentiment(article)
|
186 |
+
|
187 |
+
# Display sentiment analysis results
|
188 |
+
print(f'Timestamp: {sentiment_analysis_result["timestamp"]}, '
|
189 |
+
f'Title: {sentiment_analysis_result["title"]}, '
|
190 |
+
f'Summary: {sentiment_analysis_result["summary"]}')
|
191 |
+
|
192 |
+
print(f'Sentiment: {sentiment_analysis_result["sentiment"]}', '\n')
|
193 |
+
|
194 |
+
result = {
|
195 |
+
'Timestamp': sentiment_analysis_result["timestamp"],
|
196 |
+
'News- Title:Summar': sentiment_analysis_result["title"] + sentiment_analysis_result["summary"],
|
197 |
+
'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
|
198 |
+
}
|
199 |
+
analysis_result.append(result)
|
200 |
+
|
201 |
+
#Extracting timestamp of article and sentiment of article for graphing
|
202 |
+
""" result_for_graph = {
|
203 |
+
'Timestamp': sentiment_analysis_result["timestamp"],
|
204 |
+
'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
|
205 |
+
}
|
206 |
+
|
207 |
+
analysis_result.append(result_for_graph)
|
208 |
+
"""
|
209 |
+
|
210 |
+
#Get dominant sentiment
|
211 |
+
dominant_sentiment = news_sentiment_analyzer.get_dominant_sentiment(sentiment_analysis_result)
|
212 |
+
|
213 |
+
#Build response string for news sentiment
|
214 |
+
output_string = ""
|
215 |
+
for result in analysis_result:
|
216 |
+
output_string = output_string + f'{result["Timestamp"]} : {result["News- Title:Summary"]} : {result["Sentiment"]}' + '\n'
|
217 |
+
|
218 |
+
final_result = {
|
219 |
+
'Sentiment-analysis-result' : output_string,
|
220 |
+
'Dominant-sentiment' : dominant_sentiment['sentiment']
|
221 |
+
}
|
222 |
+
|
223 |
+
return final_result
|
224 |
+
|
225 |
+
|
226 |
+
class StockSentimentCheckInput(BaseModel):
|
227 |
+
"""Input for Stock price check."""
|
228 |
+
stockticker: str = Field(..., description="Ticker symbol for stock or index")
|
229 |
+
|
230 |
+
class StockSentimentAnalysisTool(BaseTool):
|
231 |
+
name = "get_stock_sentiment"
|
232 |
+
description = """Useful for finding sentiment of stock, based on published news articles.
|
233 |
+
Fetches configured number of news items for the sentiment,
|
234 |
+
determines sentiment of each news items and then returns
|
235 |
+
List of sentiment analysit result & domainant sentiment of the news
|
236 |
+
"""
|
237 |
+
|
238 |
+
"""Input for Stock sentiment analysis."""
|
239 |
+
stockticker: str = Field(..., description="Ticker symbol for stock or index")
|
240 |
+
def _run(self, stockticker: str):
|
241 |
+
# print("i'm running")
|
242 |
+
sentiment_response = get_stock_sentiment(stockticker)
|
243 |
+
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
|
244 |
+
print(str(sentiment_response))
|
245 |
+
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
|
246 |
+
|
247 |
+
return sentiment_response
|
248 |
+
|
249 |
+
def _arun(self, stockticker: str):
|
250 |
+
raise NotImplementedError("This tool does not support async")
|
251 |
+
|
252 |
+
args_schema: Optional[Type[BaseModel]] = StockSentimentCheckInput
|
253 |
+
|
254 |
+
|
255 |
+
tools_sentiment_analyst = [StructuredTool.from_function(
|
256 |
+
func=StockSentimentAnalysisTool,
|
257 |
+
args_schema=StockSentimentCheckInput,
|
258 |
+
description="Function to get stock sentiment.",
|
259 |
+
)
|
260 |
+
]
|
261 |
+
return tools_sentiment_analyst
|