Asif Ahmad
commited on
Commit
·
0a6a1d7
1
Parent(s):
c98d29f
Create xgb-mining-model.py
Browse files- xgb-mining-model.py +161 -0
xgb-mining-model.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# developer: Taoshidev
|
2 |
+
# Copyright © 2023 Taoshi, LLC
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
import tensorflow
|
6 |
+
from numpy import ndarray
|
7 |
+
import xgboost as xgb
|
8 |
+
|
9 |
+
|
10 |
+
class BaseMiningModel:
|
11 |
+
def __init__(self, features):
|
12 |
+
self.neurons = [[50,0]]
|
13 |
+
self.features = features
|
14 |
+
self.loaded_model = None
|
15 |
+
self.window_size = 100
|
16 |
+
self.model_dir = None
|
17 |
+
self.batch_size = 16
|
18 |
+
self.learning_rate = 0.01
|
19 |
+
|
20 |
+
def set_neurons(self, neurons):
|
21 |
+
self.neurons = neurons
|
22 |
+
return self
|
23 |
+
|
24 |
+
def set_window_size(self, window_size):
|
25 |
+
self.window_size = window_size
|
26 |
+
return self
|
27 |
+
|
28 |
+
def set_model_dir(self, model, stream_id=None):
|
29 |
+
if model is None and stream_id is not None:
|
30 |
+
# self.model_dir = f'mining_models/{stream_id}.keras'
|
31 |
+
self.model_dir = f'./mining_models/{stream_id}.model'
|
32 |
+
elif model is not None:
|
33 |
+
self.model_dir = model
|
34 |
+
else:
|
35 |
+
raise Exception("stream_id is not provided to define model")
|
36 |
+
return self
|
37 |
+
|
38 |
+
def set_batch_size(self, batch_size):
|
39 |
+
self.batch_size = batch_size
|
40 |
+
return self
|
41 |
+
|
42 |
+
def set_learning_rate(self, learning_rate):
|
43 |
+
self.learning_rate = learning_rate
|
44 |
+
return self
|
45 |
+
|
46 |
+
def load_model(self):
|
47 |
+
# self.loaded_model = tensorflow.keras.models.load_model(self.model_dir)
|
48 |
+
self.loaded_model = xgb.Booster()
|
49 |
+
self.loaded_model.load_model(self.model_dir)
|
50 |
+
return self
|
51 |
+
|
52 |
+
def train(self, data: ndarray)#, epochs: int = 100):
|
53 |
+
try:
|
54 |
+
model = tensorflow.keras.models.load_model(self.model_dir)
|
55 |
+
except OSError:
|
56 |
+
model = None
|
57 |
+
|
58 |
+
output_sequence_length = 100
|
59 |
+
|
60 |
+
if model is None:
|
61 |
+
model = xgb.XGBRegressor(random_state = 1)
|
62 |
+
|
63 |
+
# model = tensorflow.keras.models.Sequential()
|
64 |
+
|
65 |
+
# if len(self.neurons) > 1:
|
66 |
+
# model.add(tensorflow.keras.layers.LSTM(self.neurons[0][0],
|
67 |
+
# input_shape=(self.window_size, self.features),
|
68 |
+
# return_sequences=True))
|
69 |
+
# for ind, stack in enumerate(self.neurons[1:]):
|
70 |
+
# return_sequences = True
|
71 |
+
# if ind+1 == len(self.neurons)-1:
|
72 |
+
# return_sequences = False
|
73 |
+
# model.add(tensorflow.keras.layers.Dropout(stack[1]))
|
74 |
+
# model.add(tensorflow.keras.layers.LSTM(stack[0], return_sequences=return_sequences))
|
75 |
+
# else:
|
76 |
+
# model.add(tensorflow.keras.layers.LSTM(self.neurons[0][0],
|
77 |
+
# input_shape=(self.window_size, self.features)))
|
78 |
+
|
79 |
+
# model.add(tensorflow.keras.layers.Dense(1))
|
80 |
+
|
81 |
+
# optimizer = tensorflow.keras.optimizers.Adam(learning_rate=self.learning_rate)
|
82 |
+
# model.compile(optimizer=optimizer, loss='mean_squared_error')
|
83 |
+
|
84 |
+
# X_train, Y_train = [], []
|
85 |
+
|
86 |
+
# X_train_data = data
|
87 |
+
# Y_train_data = data.T[0].T
|
88 |
+
|
89 |
+
# for i in range(len(Y_train_data) - output_sequence_length - self.window_size):
|
90 |
+
# target_sequence = Y_train_data[i+self.window_size+output_sequence_length:i+self.window_size+output_sequence_length+1]
|
91 |
+
# Y_train.append(target_sequence)
|
92 |
+
|
93 |
+
# for i in range(len(X_train_data) - output_sequence_length - self.window_size):
|
94 |
+
# input_sequence = X_train_data[i:i+self.window_size]
|
95 |
+
# X_train.append(input_sequence)
|
96 |
+
|
97 |
+
# X_train = np.array(X_train)
|
98 |
+
# Y_train = np.array(Y_train)
|
99 |
+
|
100 |
+
# X_train = tensorflow.convert_to_tensor(np.array(X_train, dtype=np.float32))
|
101 |
+
# Y_train = tensorflow.convert_to_tensor(np.array(Y_train, dtype=np.float32))
|
102 |
+
|
103 |
+
X_train, Y_train = [], []
|
104 |
+
|
105 |
+
target = data[:, 0] # First column for the target
|
106 |
+
# X_train = data[:, 1:] # All other columns for features
|
107 |
+
|
108 |
+
|
109 |
+
#for i in range(len(data) - self.window_size):
|
110 |
+
# input_sequence = data[i:i + self.window_size]
|
111 |
+
# target_value = data[i + self.window_size]
|
112 |
+
|
113 |
+
# X_train.append(input_sequence)
|
114 |
+
# Y_train.append(target_value)
|
115 |
+
|
116 |
+
for i in range(len(data) - 1):
|
117 |
+
input_sequence = data[i]
|
118 |
+
target_value = target[i+1]
|
119 |
+
X_train.append(input_sequence)
|
120 |
+
Y_train.append(target_value)
|
121 |
+
|
122 |
+
|
123 |
+
X_train = np.array(X_train)
|
124 |
+
Y_train = np.array(Y_train)
|
125 |
+
|
126 |
+
X_train = tensorflow.convert_to_tensor(np.array(X_train, dtype=np.float32))
|
127 |
+
Y_train = tensorflow.convert_to_tensor(np.array(Y_train, dtype=np.float32))
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
# early_stopping = tensorflow.keras.callbacks.EarlyStopping(monitor="loss", patience=10,
|
132 |
+
|
133 |
+
# restore_best_weights=True)
|
134 |
+
|
135 |
+
model.fit(X_train, Y_train)#, epochs=epochs, batch_size=self.batch_size, callbacks=[early_stopping])
|
136 |
+
model.save(self.model_dir)
|
137 |
+
|
138 |
+
def predict(self, data: ndarray):
|
139 |
+
predictions = []
|
140 |
+
|
141 |
+
window_data = data[-1:]
|
142 |
+
# window_data = window_data.reshape(1, 1, self.features)
|
143 |
+
dtest = xgb.DMatrix(window_data)
|
144 |
+
predicted_value = self.loaded_model.predict(dtest)
|
145 |
+
predictions.append(predicted_value)
|
146 |
+
|
147 |
+
return predictions
|
148 |
+
|
149 |
+
@staticmethod
|
150 |
+
def base_model_dataset(samples):
|
151 |
+
min_cutoff = 0
|
152 |
+
|
153 |
+
cutoff_close = samples.tolist()[1][min_cutoff:]
|
154 |
+
cutoff_high = samples.tolist()[2][min_cutoff:]
|
155 |
+
cutoff_low = samples.tolist()[3][min_cutoff:]
|
156 |
+
cutoff_volume = samples.tolist()[4][min_cutoff:]
|
157 |
+
|
158 |
+
return np.array([cutoff_close,
|
159 |
+
cutoff_high,
|
160 |
+
cutoff_low,
|
161 |
+
cutoff_volume]).T
|