File size: 3,439 Bytes
c133ff5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pandas as pd
import timesfm
import logging


class Predictions:
    def __init__(self, 
                 context_length: int = 512, 
                 horizon_length: int = 14, 
                 backend: str = "cpu", 
                 checkpoint: str = "google/timesfm-1.0-200m",
                 ) -> None:
        
        logging.info("Initializing Predictions class")

        self.default_step_size = horizon_length

        """
            Initialize timesfm model with the following parameters:
            context_len: The max length of context on which the predictions will be done | Currently supports a max of 512
            horizon_len: The number of future days for which the predictions will be made | reccomended horizon length <= context length
            input_patch_len: Fixed value to load 200m model
            output_patch_len: Fixed value to load 200m model
            num_layers: Fixed value to load 200m model
            model_dims: Fixed value to load 200m model
            backend: The backend to be used for the model | Currently supports "cpu", "gpu", "tpu"
        """
        self.tfm = timesfm.TimesFm(
            context_len = min(context_length, 512), 
            horizon_len=horizon_length,             
            input_patch_len=32,                     
            output_patch_len=128,                   
            num_layers=20,                          
            model_dims=1280,                        
            backend=backend,                        
        )
        
        logging.info("Loading model from checkpoint")
        self.tfm.load_from_checkpoint(repo_id=checkpoint)
        
        logging.info("Model loaded successfully")

    
    def data_preprocess(self, data: pd.DataFrame, target_colm: str, date_colm: str) -> None:
        
        self.data = data.copy()
        self.target_colm = target_colm
        self.default_window_size = len(self.data)//10
        
        self.data["ds"] = pd.to_datetime(self.data[date_colm])
        # self.data.drop(date_colm, axis=1, inplace=True)
        self.data = self.data.astype({self.target_colm: float})

    
    def _iter_split(self, current_window: int, step_size: int):
        window_data = self.data[:current_window]
        
        if current_window + step_size > len(self.data):
            step_size = len(self.data) - current_window
        
        return window_data, step_size


    def predict(self, intial_window_size: int = None, step: int = None, freq: str = "D"):        
        window_size = intial_window_size or self.default_window_size
        step_size = step or self.default_step_size
        
        # Run iterations and return a pd series of predictions
        self.data["unique_id"] = 0
        window = window_size
        predictions = pd.Series()
        
        while window < len(self.data):
            logging.info(f"Predicting for window size: {window}")
            current_window, step_size = self._iter_split(window, step_size)
            batch_pred = self.tfm.forecast_on_df(current_window, freq=freq, value_name=self.target_colm)['timesfm']
            predictions = pd.concat([predictions, batch_pred])
            window += step_size
        supp = len(predictions) - (window - window_size)
        predictions = predictions[:-supp]
        predictions.index = [i for i in range(window_size, window)]
        return predictions