HachiML commited on
Commit
cc07e3e
·
verified ·
1 Parent(s): 7f82313

Upload feature extractor

Browse files
feature_extraction_moment.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FeatureExtractorでの実施事項
2
+ # - 時系列データをdataframe, numpy array, torch tensorの状態からtorch tensor化
3
+ # - input validation
4
+
5
+ from typing import List, Optional, Union
6
+
7
+ from pandas import DataFrame
8
+ import numpy as np
9
+ import torch
10
+ import tensorflow as tf
11
+ import jax.numpy as jnp
12
+
13
+ from transformers import FeatureExtractionMixin
14
+ from transformers import TensorType
15
+ from transformers import BatchFeature
16
+ from transformers.utils import logging
17
+
18
+ logger = logging.get_logger(__name__)
19
+
20
+
21
+ class MomentFeatureExtractor(FeatureExtractionMixin):
22
+
23
+ # TODO: 本来はMoment側のTokenizerもts_tokenizerとして入れたかったが、モデルに組み込まれてしまっている。
24
+ # refers: https://github.com/moment-timeseries-foundation-model/moment/blob/088b253a1138ac7e48a7efc9bf902336c9eec8d9/momentfm/models/moment.py#L105
25
+
26
+ model_input_names = ["time_series_values", "input_mask"]
27
+
28
+ def __init__(self, **kwargs):
29
+ super().__init__(**kwargs)
30
+
31
+
32
+ def __call__(
33
+ self,
34
+ time_series: Union[DataFrame, np.ndarray, torch.Tensor, List[DataFrame], List[np.ndarray], List[torch.Tensor]] = None,
35
+ return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
36
+ torch_dtype: Optional[Union[str, torch.dtype]] = torch.float,
37
+ ) -> BatchFeature:
38
+ if time_series is not None:
39
+ time_series_values = self._convert_time_series(time_series, return_tensors, torch_dtype)
40
+ else:
41
+ time_series_values = None
42
+
43
+ return BatchFeature(data={"time_series_values": time_series_values})
44
+
45
+
46
+ def _convert_time_series(self, time_series, return_tensors, torch_dtype):
47
+ # DataFrame, np.ndarray, または torch.Tensor を torch.Tensor に変換
48
+ if isinstance(time_series, list):
49
+ # リスト内の各要素を torch.Tensor に変換し、最終的には1つのTensorに結合
50
+ time_series_tensor = torch.stack([self._convert_to_tensor(ts, torch_dtype) for ts in time_series])
51
+ else:
52
+ time_series_tensor = self._convert_to_tensor(time_series, torch_dtype)
53
+
54
+ # 次元数の確認
55
+ if time_series_tensor.dim() > 3:
56
+ raise ValueError("time_series_tensor must not have more than 3 dimensions")
57
+ elif time_series_tensor.dim() == 2:
58
+ time_series_tensor = time_series_tensor.unsqueeze(0)
59
+ elif time_series_tensor.dim() == 1:
60
+ time_series_tensor = time_series_tensor.unsqueeze(0).unsqueeze(0)
61
+
62
+ # 形式の出力
63
+ batch_size, n_channels, d_model = time_series_tensor.shape
64
+ logger.info(f"Batch size: {batch_size}, Number of channels: {n_channels}, Dimension of model: {d_model}")
65
+
66
+ # seq_lenを最大値512までに絞り込み
67
+ if time_series_tensor.shape[2] > 512:
68
+ time_series_tensor = time_series_tensor[:, :, :512]
69
+ logger.info("Sequence length has been truncated to 512.")
70
+
71
+ # return_tensorsの指定に応じてデータ形式を変換
72
+ if return_tensors == 'pt' or return_tensors == TensorType.PYTORCH:
73
+ return time_series_tensor
74
+ elif return_tensors == 'np' or return_tensors == TensorType.NUMPY:
75
+ return time_series_tensor.numpy()
76
+ elif return_tensors == 'tf' or return_tensors == TensorType.TENSORFLOW:
77
+ return tf.convert_to_tensor(time_series_tensor.numpy())
78
+ elif return_tensors == 'jax' or return_tensors == TensorType.JAX:
79
+ return jnp.array(time_series_tensor.numpy())
80
+ else:
81
+ raise ValueError("Unsupported return_tensors type")
82
+
83
+ def _convert_to_tensor(self, time_series, torch_dtype):
84
+ if isinstance(time_series, DataFrame):
85
+ time_series_tensor = torch.tensor(time_series.values, dtype=torch_dtype).t()
86
+ elif isinstance(time_series, np.ndarray):
87
+ time_series_tensor = torch.tensor(time_series, dtype=torch_dtype)
88
+ elif isinstance(time_series, torch.Tensor):
89
+ time_series_tensor = time_series.to(torch_dtype)
90
+
91
+ return time_series_tensor
preprocessor_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoFeatureExtractor": "feature_extraction_moment.MomentFeatureExtractor"
4
+ },
5
+ "feature_extractor_type": "MomentFeatureExtractor"
6
+ }