Spaces:

shubhayu64
/

fin

Build error

App Files Files Community

Shubhayu Majumdar commited on May 28, 2024

Commit

c133ff5

1 Parent(s): c7b4f60

unpaid intern fixes

Browse files

Files changed (4) hide show

app.py +122 -16
lstm.py +80 -0
preds.py +87 -0
requirements.txt +5 -0

app.py CHANGED Viewed

@@ -1,23 +1,33 @@
-from typing import List, Optional
 import yfinance as yf
 from datetime import date, timedelta
 import streamlit as st
 import pandas as pd
 import numpy as np
 from Models.datamodels import StockNameModel
-import matplotlib as mpl
 import matplotlib.pyplot as plt
-import matplotlib.style as style
 from matplotlib.dates import date2num, DateFormatter, WeekdayLocator,\
     DayLocator, MONDAY
 import seaborn as sns
 import mplfinance as mpf
 from mplfinance.original_flavor import candlestick_ohlc
 class Stonks:
     def __init__(self, stocks_filepath: str) -> None:
         # Classwise global variables
         self.stocks = None
         self.selected_stock = None
@@ -424,7 +434,25 @@ class Stonks:
         plt.xticks(rotation = 45)
         ax.legend()
         return fig
     def plot_obv_ema(self, data, title_txt: str):
         fig, ax = plt.subplots(figsize=(17, 8))
         plt.style.use('ggplot')
@@ -447,7 +475,7 @@ class Stonks:
         ax.set_ylabel('Price', fontsize = 15)
         ax.legend(loc = 'upper left')
         return fig
     def ui_renderer(self):
         st.title('Stonks 📈')
@@ -505,10 +533,12 @@ class Stonks:
             st.error("Error: No data found for selected stock.")
             st.stop()
-        # Download Stock data after fetched
-        st.sidebar.markdown("""---""")
-        # st.sidebar.button("Download Data", self.stock_df.to_csv(f"{self.selected_stock}_data.csv", index=False, header=True, encoding='utf-8-sig'))
-        st.sidebar.download_button(label="Download Data", data=self.stock_df.to_csv(index=False, header=True, encoding='utf-8-sig'), file_name=f"{self.selected_stock}_data.csv", mime='text/csv')
         st.dataframe(self.stock_df)
@@ -550,8 +580,7 @@ class Stonks:
             The moving average crossover trading strategy will be to take two moving averages - 20-day (fast) and 200-day (slow) - and to go long (buy) when the fast MA goes above the slow MA and to go short (sell) when the fast MA goes below the slow MA.
         """)
         temp_df = self.stock_df.copy()
         temp_df["20d"] = np.round(temp_df["Adj Close"].rolling(window = 20, center = False).mean(), 2)
         temp_df["50d"] = np.round(temp_df["Adj Close"].rolling(window = 50, center = False).mean(), 2)
@@ -597,14 +626,14 @@ class Stonks:
             Single Exponential Smoothing, also known as Simple Exponential Smoothing, is a time series forecasting method for univariate data without a trend or seasonality. It requires an alpha parameter, also called the smoothing factor or smoothing coefficient, to control the rate at which the influence of the observations at prior time steps decay exponentially.
         """)
-        st.pyplot(self.plot_exponential_smoothing(self.stock_df["Adj Close"], [0.3, 0.05], label_txt=f"{self.selected_stock}", title_txt=f"Single Exponential Smoothing for {self.selected_stock} stock using 0.05 and 0.3 as alpha values"))
         st.markdown("""
             The smaller the smoothing factor (coefficient), the smoother the time series will be. As the smoothing factor approaches 0, we approach the moving average model so the smoothing factor of 0.05 produces a smoother time series than 0.3. This indicates slow learning (past observations have a large influence on forecasts). A value close to 1 indicates fast learning (that is, only the most recent values influence the forecasts).
             **Double Exponential Smoothing (Holt’s Linear Trend Model)** is an extension being a recursive use of Exponential Smoothing twice where beta is the trend smoothing factor, and takes values between 0 and 1. It explicitly adds support for trends.
             """)
-        st.pyplot(self.plot_double_exponential_smoothing(self.stock_df["Adj Close"], alphas=[0.9, 0.02], betas=[0.9, 0.02], label_txt=f"{self.selected_stock}", title_txt=f"Double Exponential Smoothing for {self.selected_stock} stock with different alpha and beta values"))
         st.markdown("""
                 The third main type is Triple Exponential Smoothing (Holt Winters Method) which is an extension of Exponential Smoothing that explicitly adds support for seasonality, or periodic fluctuations.
@@ -809,6 +838,7 @@ class Stonks:
         temp_df = get_roc()
         st.set_option('deprecation.showPyplotGlobalUse', False)
         st.pyplot(mpf.plot(temp_df, type='candle',  style='yahoo', figsize=(15,8),  title=f"{self.selected_stock} Daily Price", volume=True))
         st.markdown("""
@@ -923,13 +953,89 @@ class Stonks:
         temp_df['Buy'], temp_df['Sell'] = buy_sell_obv(temp_df, 'OBV', 'OBV_EMA')
         st.pyplot(self.buy_sell_obv_plot(temp_df, title_txt=f"On Balance Volume Buy and Sell Signals for {self.selected_stock} stock"))
         st.markdown("""---""")
         st.markdown("""
             ## Conclusion
-            It is almost certainly better to choose technical indicators that complement each other, not just those that move in unison and generate the same signals. The intuition here is that the more indicators you have that confirm each other, the better your chances are to profit. This can be done by combining strategies to form a system, and looking for multiple signals.
-        """)
-stonks = Stonks(stocks_filepath="Models/stocknames.csv")
-stonks.ui_renderer()

+import logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s',  filename='logger.log')
+from typing import List
+from preds import Predictions
 import yfinance as yf
 from datetime import date, timedelta
 import streamlit as st
 import pandas as pd
 import numpy as np
 from Models.datamodels import StockNameModel
 import matplotlib.pyplot as plt
 from matplotlib.dates import date2num, DateFormatter, WeekdayLocator,\
     DayLocator, MONDAY
 import seaborn as sns
 import mplfinance as mpf
 from mplfinance.original_flavor import candlestick_ohlc
+from lstm import __lstm__
 class Stonks:
     def __init__(self, stocks_filepath: str) -> None:
+        logging.info("Initializing Stonks class")
         # Classwise global variables
         self.stocks = None
         self.selected_stock = None
         plt.xticks(rotation = 45)
         ax.legend()
         return fig
+    def plot_lstm_timefm_prediction(self, data, lstm_prediction):
+        fig, ax = plt.subplots(figsize=(17, 8))
+        sns.set_style('whitegrid')
+        ax.plot(data['Adj Close'], label='Actual Close', color='tab:blue', alpha=0.8)
+        ax.plot(data['pred_timesfm'], label='TimeFM Prediction', color='tab:red', alpha=0.8)
+        ax.plot(lstm_prediction, label="LSTM Prediction", color='tab:purple', alpha=0.8)
+        ax.set_title("LSTM and TimeFM Predictions vs Actual Close", fontsize=16)
+        ax.set_xlabel("Date", fontsize=14)
+        ax.set_ylabel("Price", fontsize=14)
+        ax.legend(fontsize=12)
+        ax.grid(True, linestyle='--', alpha=0.5)
+        plt.xticks(rotation=45)
+        plt.tight_layout()
+        return fig
     def plot_obv_ema(self, data, title_txt: str):
         fig, ax = plt.subplots(figsize=(17, 8))
         plt.style.use('ggplot')
         ax.set_ylabel('Price', fontsize = 15)
         ax.legend(loc = 'upper left')
         return fig
     def ui_renderer(self):
         st.title('Stonks 📈')
             st.error("Error: No data found for selected stock.")
             st.stop()
+        # # Download Stock data after fetched
+        # st.sidebar.markdown("""---""")
+        # # st.sidebar.button("Download Data", self.stock_df.to_csv(f"{self.selected_stock}_data.csv", index=False, header=True, encoding='utf-8-sig'))
+        # st.sidebar.download_button(label="Download Data", data=self.stock_df.to_csv(index=False, header=True, encoding='utf-8-sig'), file_name=f"{self.selected_stock}_data.csv", mime='text/csv')
         st.dataframe(self.stock_df)
             The moving average crossover trading strategy will be to take two moving averages - 20-day (fast) and 200-day (slow) - and to go long (buy) when the fast MA goes above the slow MA and to go short (sell) when the fast MA goes below the slow MA.
         """)
         temp_df = self.stock_df.copy()
         temp_df["20d"] = np.round(temp_df["Adj Close"].rolling(window = 20, center = False).mean(), 2)
         temp_df["50d"] = np.round(temp_df["Adj Close"].rolling(window = 50, center = False).mean(), 2)
             Single Exponential Smoothing, also known as Simple Exponential Smoothing, is a time series forecasting method for univariate data without a trend or seasonality. It requires an alpha parameter, also called the smoothing factor or smoothing coefficient, to control the rate at which the influence of the observations at prior time steps decay exponentially.
         """)
+        # st.pyplot(self.plot_exponential_smoothing(self.stock_df["Adj Close"], [0.3, 0.05], label_txt=f"{self.selected_stock}", title_txt=f"Single Exponential Smoothing for {self.selected_stock} stock using 0.05 and 0.3 as alpha values"))
         st.markdown("""
             The smaller the smoothing factor (coefficient), the smoother the time series will be. As the smoothing factor approaches 0, we approach the moving average model so the smoothing factor of 0.05 produces a smoother time series than 0.3. This indicates slow learning (past observations have a large influence on forecasts). A value close to 1 indicates fast learning (that is, only the most recent values influence the forecasts).
             **Double Exponential Smoothing (Holt’s Linear Trend Model)** is an extension being a recursive use of Exponential Smoothing twice where beta is the trend smoothing factor, and takes values between 0 and 1. It explicitly adds support for trends.
             """)
+        # st.pyplot(self.plot_double_exponential_smoothing(self.stock_df["Adj Close"], alphas=[0.9, 0.02], betas=[0.9, 0.02], label_txt=f"{self.selected_stock}", title_txt=f"Double Exponential Smoothing for {self.selected_stock} stock with different alpha and beta values"))
         st.markdown("""
                 The third main type is Triple Exponential Smoothing (Holt Winters Method) which is an extension of Exponential Smoothing that explicitly adds support for seasonality, or periodic fluctuations.
         temp_df = get_roc()
         st.set_option('deprecation.showPyplotGlobalUse', False)
+        temp_df.index = pd.to_datetime(temp_df.index)
         st.pyplot(mpf.plot(temp_df, type='candle',  style='yahoo', figsize=(15,8),  title=f"{self.selected_stock} Daily Price", volume=True))
         st.markdown("""
         temp_df['Buy'], temp_df['Sell'] = buy_sell_obv(temp_df, 'OBV', 'OBV_EMA')
         st.pyplot(self.buy_sell_obv_plot(temp_df, title_txt=f"On Balance Volume Buy and Sell Signals for {self.selected_stock} stock"))
+        st.markdown("""---""")
+        # Predictions start here
+        st.header("Predictions")
+        st.markdown("""
+                We used TimesFM (200M parameters) and LSTM (66K parameters) for stock price prediction, achieving strong alignment with actual data. TimesFM's zero-shot performance on diverse datasets approached state-of-the-art supervised models. Discrepancies noted are expected to reduce with increased parameter size and further training.
+        """)
+        st.subheader("TimesFM (Time Series Foundation Model)")
+        st.markdown("""
+                TimesFM (200M parameters) uses long output patches to reduce error accumulation, enabling accurate long-horizon forecasts. Trained on sequences with varying prediction horizons, it excels in zero-shot predictions across diverse datasets, effectively predicting stock price movements.
+                """)
+        # ------------------------------------- Times FM ---------------------------------
+        temp_df = self.stock_df.copy()
+        temp_df.reset_index(inplace=True)
+        temp_df.rename(columns={'index': 'Date'}, inplace=True)
+        if 'pred' not in st.session_state:
+            st.session_state.pred = Predictions()
+        if "pred" in st.session_state:
+            st.session_state.pred.data_preprocess(
+                data = temp_df,
+                target_colm="Adj Close",
+                date_colm="Date",
+            )
+            stock_preds = st.session_state.pred.predict()
+            temp_df["pred_timesfm"] = stock_preds
+        fig, ax = plt.subplots(figsize=(20, 10))
+        temp_df[["Adj Close", "pred_timesfm"]].plot(ax=ax)
+        ax.set_title(f"{self.selected_stock} Price vs TimesFM Predictions", fontsize=18)
+        ax.set_xlabel("Date", fontsize=14)
+        ax.set_ylabel("Price", fontsize=14)
+        ax.legend(["Actual Price", "Predicted Price"], loc="upper left", fontsize=12)
+        ax.grid(True, linestyle='--', alpha=0.7)
+        st.pyplot(fig)
+        # -------------------------------- LSTM ------------------------------------------
+        st.subheader("LSTM (Long Short Term Memory)")
+        st.markdown("""
+                LSTM, equipped with 66,000 parameters, effectively captures long-term dependencies in stock market data. Its recurrent architecture enables accurate prediction of stock price movements, making it a valuable tool for financial forecasting.
+                """)
+        lstm_predictions = __lstm__(temp_df)
+        fig, ax = plt.subplots(figsize=(20, 10))
+        temp_df[["Adj Close"]].plot(ax=ax)
+        ax.plot(lstm_predictions, label = "LSTM", alpha = 0.5)
+        ax.set_title(f"{self.selected_stock} Price vs LSTM Predictions", fontsize=18)
+        ax.set_xlabel("Date", fontsize=14)
+        ax.set_ylabel("Price", fontsize=14)
+        ax.legend(["Actual Price", "Predicted Price"], loc="upper left", fontsize=12)
+        ax.grid(True, linestyle='--', alpha=0.7)
+        st.pyplot(fig)
+        # -------------------------------- TimesFM + LSTM --------------------------------
+        st.subheader("Comparison: TimesFM vs. LSTM for Stock Price Prediction")
+        st.markdown("""
+                While TimesFM utilizes transformer-based architecture with 200M parameters and focuses on capturing complex temporal dependencies for accurate long-horizon forecasts, LSTM, with 66,000 parameters, leverages its recurrent structure to capture long-term dependencies in stock market data, offering effective prediction of price movements.
+            """)
+        st.pyplot(self.plot_lstm_timefm_prediction(data = temp_df, lstm_prediction = lstm_predictions))
+        # --------------------------------------------------------------------------------
         st.markdown("""---""")
         st.markdown("""
             ## Conclusion
+            In conclusion, the success of stock market analysis relies on combining complementary technical indicators rather than solely relying on uniform signals. This diversification increases the chance of profitable outcomes by forming a robust system. Our comparison between TimesFM and LSTM emphasizes the importance of selecting models based on specific analytical needs. While TimesFM captures complex temporal dependencies effectively, LSTM excels in capturing long-term patterns. By integrating these insights, investors can make more informed decisions and navigate financial markets with greater confidence.        """)
+if __name__ == "__main__":
+    stonks = Stonks(stocks_filepath="Models/stocknames.csv")
+    stonks.ui_renderer()

lstm.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from sklearn.preprocessing import MinMaxScaler
+from keras.models import Sequential
+from keras.layers import Dense, LSTM
+import numpy as np
+def __lstm__(stock_df):
+    # fig, ax = plt.subplots(figsize=(20, 10))
+    # self.stock_df[["Adj Close", "pred_timesfm"]].plot(ax=ax)
+    # st.pyplot(fig)
+    # --------------------------------------- LSTM -----------------------------------
+    data = stock_df.filter(['Close'])
+    # Convert the dataframe to a numpy array
+    dataset = data.values
+    # Get the number of rows to train the model on
+    training_data_len = int(np.ceil( len(dataset) * .80 ))
+    scaler = MinMaxScaler(feature_range=(0,1))
+    scaled_data = scaler.fit_transform(dataset)
+    train_data = scaled_data[0:int(training_data_len), :]
+    x_train = []
+    y_train = []
+    for i in range(60, len(train_data)):
+        x_train.append(train_data[i - 60 : i, 0])
+        y_train.append(train_data[i, 0])
+    # Convert the x_train and y_train to numpy arrays
+    x_train, y_train = np.array(x_train), np.array(y_train)
+    # Reshape the data
+    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
+    # Build the LSTM model
+    model = Sequential()
+    # -> (B, 60, 1)
+    model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1), use_bias = False))
+    # -> (B, 60, 128)
+    model.add(LSTM(64, return_sequences=False, use_bias = False))
+    # -> (B, 64)
+    model.add(Dense(25))
+    # -> (B, 25)
+    model.add(Dense(1))
+    # -> (B, 1)
+    # Compile the model
+    model.compile(optimizer='adam', loss='mean_squared_error')
+    # Train the model
+    model.fit(x_train, y_train, batch_size=1, epochs=1)
+    # Create the testing data set
+    test_data = scaled_data[: , :]
+    # Create the data sets x_test and y_test
+    x_test = []
+    y_test = dataset[:, :]
+    for i in range(60, len(test_data)):
+        x_test.append(test_data[i - 60:i, 0])
+    # Convert the data to a numpy array
+    x_test = np.array(x_test)
+    # Reshape the data
+    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))
+    # Get the models predicted price values
+    preds = model.predict(x_test)
+    preds = scaler.inverse_transform(preds)
+    print (preds.shape)
+    predictions = np.full((60,1), np.nan)
+    predictions = np.concatenate((predictions, preds), axis=0)
+    print(predictions.shape)
+    return predictions
+    # -------------------------------- timefm + LSTM ---------------------------------
+    # st.pyplot(plot_lstm_timefm_prediction(data = stock_df, lstm_prediction = predictions))

preds.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import pandas as pd
+import timesfm
+import logging
+class Predictions:
+    def __init__(self,
+                 context_length: int = 512,
+                 horizon_length: int = 14,
+                 backend: str = "cpu",
+                 checkpoint: str = "google/timesfm-1.0-200m",
+                 ) -> None:
+        logging.info("Initializing Predictions class")
+        self.default_step_size = horizon_length
+        """
+            Initialize timesfm model with the following parameters:
+            context_len: The max length of context on which the predictions will be done | Currently supports a max of 512
+            horizon_len: The number of future days for which the predictions will be made | reccomended horizon length <= context length
+            input_patch_len: Fixed value to load 200m model
+            output_patch_len: Fixed value to load 200m model
+            num_layers: Fixed value to load 200m model
+            model_dims: Fixed value to load 200m model
+            backend: The backend to be used for the model | Currently supports "cpu", "gpu", "tpu"
+        """
+        self.tfm = timesfm.TimesFm(
+            context_len = min(context_length, 512),
+            horizon_len=horizon_length,
+            input_patch_len=32,
+            output_patch_len=128,
+            num_layers=20,
+            model_dims=1280,
+            backend=backend,
+        )
+        logging.info("Loading model from checkpoint")
+        self.tfm.load_from_checkpoint(repo_id=checkpoint)
+        logging.info("Model loaded successfully")
+    def data_preprocess(self, data: pd.DataFrame, target_colm: str, date_colm: str) -> None:
+        self.data = data.copy()
+        self.target_colm = target_colm
+        self.default_window_size = len(self.data)//10
+        self.data["ds"] = pd.to_datetime(self.data[date_colm])
+        # self.data.drop(date_colm, axis=1, inplace=True)
+        self.data = self.data.astype({self.target_colm: float})
+    def _iter_split(self, current_window: int, step_size: int):
+        window_data = self.data[:current_window]
+        if current_window + step_size > len(self.data):
+            step_size = len(self.data) - current_window
+        return window_data, step_size
+    def predict(self, intial_window_size: int = None, step: int = None, freq: str = "D"):
+        window_size = intial_window_size or self.default_window_size
+        step_size = step or self.default_step_size
+        # Run iterations and return a pd series of predictions
+        self.data["unique_id"] = 0
+        window = window_size
+        predictions = pd.Series()
+        while window < len(self.data):
+            logging.info(f"Predicting for window size: {window}")
+            current_window, step_size = self._iter_split(window, step_size)
+            batch_pred = self.tfm.forecast_on_df(current_window, freq=freq, value_name=self.target_colm)['timesfm']
+            predictions = pd.concat([predictions, batch_pred])
+            window += step_size
+        supp = len(predictions) - (window - window_size)
+        predictions = predictions[:-supp]
+        predictions.index = [i for i in range(window_size, window)]
+        return predictions

requirements.txt CHANGED Viewed

@@ -1,8 +1,13 @@
 matplotlib
 mplfinance
 numpy
 pandas
 pydantic
 seaborn
 streamlit
 yfinance

+keras
 matplotlib
 mplfinance
 numpy
 pandas
 pydantic
+scikit_learn
 seaborn
 streamlit
 yfinance
+jax[cpu]
+jaxlib
+git+https://github.com/google-research/timesfm.git