{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install wandb tensorflow_probability tensorflow_addons" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.layers import Add, Dense, Dropout, Layer, LayerNormalization, MultiHeadAttention\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.initializers import TruncatedNormal\n", "from tensorflow.keras.metrics import MeanSquaredError, RootMeanSquaredError, MeanAbsoluteError, MeanAbsolutePercentageError\n", "from tensorflow_addons.metrics import RSquare\n", "\n", "import pandas as pd\n", "import tensorflow as tf\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plotting" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def plot_prediction(targets, predictions, max_subplots=3):\n", " plt.figure(figsize=(12, 15))\n", " max_n = min(max_subplots, len(targets))\n", " for n in range(max_n):\n", " # input\n", " plt.subplot(max_n, 1, n+1)\n", " plt.ylabel('Solar irradiance [kW-hr/m^2/day]', fontfamily=\"Arial\", fontsize=16)\n", " plt.plot(np.arange(targets.shape[1]-horizon), targets[n, :-horizon, 0, -1], label='Inputs', marker='.', zorder=-10)\n", "\n", " # real\n", " plt.scatter(np.arange(1, targets.shape[1]), targets[n, 1:, 0, -1], edgecolors='k', label='Targets', c='#2cb01d', s=64)\n", " \n", " # predicted\n", " plt.scatter(np.arange(1, targets.shape[1]), predictions[n, :, 0, -1], marker='X', edgecolors='k', label='Predictions', c='#fe7e0f', s=64)\n", "\n", " if n == 0:\n", " plt.legend()\n", "\n", " plt.xlabel('Time [day]', fontfamily=\"Arial\", fontsize=16)\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def patch_similarity_plot(pos):\n", " similarity_scores = np.dot(\n", " pos, np.transpose(pos)\n", " ) / (\n", " np.linalg.norm(pos, axis=-1)\n", " * np.linalg.norm(pos, axis=-1)\n", " )\n", "\n", " plt.figure(figsize=(7, 7), dpi=300)\n", " ax = sns.heatmap(similarity_scores, center=0)\n", " ax.set_title(\"Spatial Positional Embedding\", fontfamily=\"Arial\", fontsize=16)\n", " ax.set_xlabel(\"Patch\", fontfamily=\"Arial\", fontsize=16)\n", " ax.set_ylabel(\"Patch\", fontfamily=\"Arial\", fontsize=16)\n", " plt.show()\n", "\n", "def timestep_similarity_plot(pos):\n", " similarity_scores = np.dot(\n", " pos, np.transpose(pos)\n", " ) / (\n", " np.linalg.norm(pos, axis=-1)\n", " * np.linalg.norm(pos, axis=-1)\n", " )\n", "\n", " plt.figure(figsize=(7, 7), dpi=300)\n", " ax = sns.heatmap(similarity_scores, center=0)\n", " ax.set_title(\"Temporal Positional Embedding\", fontfamily=\"Arial\", fontsize=16)\n", " ax.set_xlabel(\"Timestep\", fontfamily=\"Arial\", fontsize=16)\n", " ax.set_ylabel(\"Timestep\", fontfamily=\"Arial\", fontsize=16)\n", " plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Layer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Normalization(tf.keras.layers.experimental.preprocessing.PreprocessingLayer):\n", " \"\"\"A preprocessing layer which normalizes continuous features.\n", " This layer will shift and scale inputs into a distribution centered around\n", " 0 with standard deviation 1. It accomplishes this by precomputing the mean\n", " and variance of the data, and calling `(input - mean) / sqrt(var)` at\n", " runtime.\n", " The mean and variance values for the layer must be either supplied on\n", " construction or learned via `adapt()`. `adapt()` will compute the mean and\n", " variance of the data and store them as the layer's weights. `adapt()` should\n", " be called before `fit()`, `evaluate()`, or `predict()`.\n", " For an overview and full list of preprocessing layers, see the preprocessing\n", " [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).\n", " Args:\n", " axis: Integer, tuple of integers, or None. The axis or axes that should\n", " have a separate mean and variance for each index in the shape. For\n", " example, if shape is `(None, 5)` and `axis=1`, the layer will track 5\n", " separate mean and variance values for the last axis. If `axis` is set\n", " to `None`, the layer will normalize all elements in the input by a\n", " scalar mean and variance. Defaults to -1, where the last axis of the\n", " input is assumed to be a feature dimension and is normalized per\n", " index. Note that in the specific case of batched scalar inputs where\n", " the only axis is the batch axis, the default will normalize each index\n", " in the batch separately. In this case, consider passing `axis=None`.\n", " mean: The mean value(s) to use during normalization. The passed value(s)\n", " will be broadcast to the shape of the kept axes above; if the value(s)\n", " cannot be broadcast, an error will be raised when this layer's\n", " `build()` method is called.\n", " variance: The variance value(s) to use during normalization. The passed\n", " value(s) will be broadcast to the shape of the kept axes above; if the\n", " value(s) cannot be broadcast, an error will be raised when this\n", " layer's `build()` method is called.\n", " invert: If True, this layer will apply the inverse transformation\n", " to its inputs: it would turn a normalized input back into its\n", " original form.\n", " Examples:\n", " Calculate a global mean and variance by analyzing the dataset in `adapt()`.\n", " >>> adapt_data = np.array([1., 2., 3., 4., 5.], dtype='float32')\n", " >>> input_data = np.array([1., 2., 3.], dtype='float32')\n", " >>> layer = tf.keras.layers.Normalization(axis=None)\n", " >>> layer.adapt(adapt_data)\n", " >>> layer(input_data)\n", " \n", " Calculate a mean and variance for each index on the last axis.\n", " >>> adapt_data = np.array([[0., 7., 4.],\n", " ... [2., 9., 6.],\n", " ... [0., 7., 4.],\n", " ... [2., 9., 6.]], dtype='float32')\n", " >>> input_data = np.array([[0., 7., 4.]], dtype='float32')\n", " >>> layer = tf.keras.layers.Normalization(axis=-1)\n", " >>> layer.adapt(adapt_data)\n", " >>> layer(input_data)\n", " \n", " Pass the mean and variance directly.\n", " >>> input_data = np.array([[1.], [2.], [3.]], dtype='float32')\n", " >>> layer = tf.keras.layers.Normalization(mean=3., variance=2.)\n", " >>> layer(input_data)\n", " \n", " Use the layer to de-normalize inputs (after adapting the layer).\n", " >>> adapt_data = np.array([[0., 7., 4.],\n", " ... [2., 9., 6.],\n", " ... [0., 7., 4.],\n", " ... [2., 9., 6.]], dtype='float32')\n", " >>> input_data = np.array([[1., 2., 3.]], dtype='float32')\n", " >>> layer = tf.keras.layers.Normalization(axis=-1, invert=True)\n", " >>> layer.adapt(adapt_data)\n", " >>> layer(input_data)\n", " \n", " \"\"\"\n", "\n", " def __init__(\n", " self, axis=-1, mean=None, variance=None, invert=False, **kwargs\n", " ):\n", " super().__init__(**kwargs)\n", "\n", " # Standardize `axis` to a tuple.\n", " if axis is None:\n", " axis = ()\n", " elif isinstance(axis, int):\n", " axis = (axis,)\n", " else:\n", " axis = tuple(axis)\n", " self.axis = axis\n", "\n", " # Set `mean` and `variance` if passed.\n", " if isinstance(mean, tf.Variable):\n", " raise ValueError(\n", " \"Normalization does not support passing a Variable \"\n", " \"for the `mean` init arg.\"\n", " )\n", " if isinstance(variance, tf.Variable):\n", " raise ValueError(\n", " \"Normalization does not support passing a Variable \"\n", " \"for the `variance` init arg.\"\n", " )\n", " if (mean is not None) != (variance is not None):\n", " raise ValueError(\n", " \"When setting values directly, both `mean` and `variance` \"\n", " \"must be set. Got mean: {} and variance: {}\".format(\n", " mean, variance\n", " )\n", " )\n", " self.input_mean = mean\n", " self.input_variance = variance\n", " self.invert = invert\n", "\n", " def build(self, input_shape):\n", " super().build(input_shape)\n", "\n", " if isinstance(input_shape, (list, tuple)) and all(\n", " isinstance(shape, tf.TensorShape) for shape in input_shape\n", " ):\n", " raise ValueError(\n", " \"Normalization only accepts a single input. If you are \"\n", " \"passing a python list or tuple as a single input, \"\n", " \"please convert to a numpy array or `tf.Tensor`.\"\n", " )\n", "\n", " input_shape = tf.TensorShape(input_shape).as_list()\n", " ndim = len(input_shape)\n", "\n", " if any(a < -ndim or a >= ndim for a in self.axis):\n", " raise ValueError(\n", " \"All `axis` values must be in the range [-ndim, ndim). \"\n", " \"Found ndim: `{}`, axis: {}\".format(ndim, self.axis)\n", " )\n", "\n", " # Axes to be kept, replacing negative values with positive equivalents.\n", " # Sorted to avoid transposing axes.\n", " self._keep_axis = sorted([d if d >= 0 else d + ndim for d in self.axis])\n", " # All axes to be kept should have known shape.\n", " for d in self._keep_axis:\n", " if input_shape[d] is None:\n", " raise ValueError(\n", " \"All `axis` values to be kept must have known shape. \"\n", " \"Got axis: {}, \"\n", " \"input shape: {}, with unknown axis at index: {}\".format(\n", " self.axis, input_shape, d\n", " )\n", " )\n", " # Axes to be reduced.\n", " self._reduce_axis = [d for d in range(ndim) if d not in self._keep_axis]\n", " # 1 if an axis should be reduced, 0 otherwise.\n", " self._reduce_axis_mask = [\n", " 0 if d in self._keep_axis else 1 for d in range(ndim)\n", " ]\n", " # Broadcast any reduced axes.\n", " self._broadcast_shape = [\n", " input_shape[d] if d in self._keep_axis else 1 for d in range(ndim)\n", " ]\n", " mean_and_var_shape = tuple(input_shape[d] for d in self._keep_axis)\n", "\n", " if self.input_mean is None:\n", " self.adapt_mean = self.add_weight(\n", " name=\"mean\",\n", " shape=mean_and_var_shape,\n", " dtype=self.compute_dtype,\n", " initializer=\"zeros\",\n", " trainable=False,\n", " )\n", " self.adapt_variance = self.add_weight(\n", " name=\"variance\",\n", " shape=mean_and_var_shape,\n", " dtype=self.compute_dtype,\n", " initializer=\"ones\",\n", " trainable=False,\n", " )\n", " self.count = self.add_weight(\n", " name=\"count\",\n", " shape=(),\n", " dtype=tf.int64,\n", " initializer=\"zeros\",\n", " trainable=False,\n", " )\n", " self.finalize_state()\n", " else:\n", " # In the no adapt case, make constant tensors for mean and variance\n", " # with proper broadcast shape for use during call.\n", " mean = self.input_mean * np.ones(mean_and_var_shape)\n", " variance = self.input_variance * np.ones(mean_and_var_shape)\n", " mean = tf.reshape(mean, self._broadcast_shape)\n", " variance = tf.reshape(variance, self._broadcast_shape)\n", " self.mean = tf.cast(mean, self.compute_dtype)\n", " self.variance = tf.cast(variance, self.compute_dtype)\n", "\n", " # We override this method solely to generate a docstring.\n", " def adapt(self, data, batch_size=None, steps=None):\n", " \"\"\"Computes the mean and variance of values in a dataset.\n", " Calling `adapt()` on a `Normalization` layer is an alternative to\n", " passing in `mean` and `variance` arguments during layer construction. A\n", " `Normalization` layer should always either be adapted over a dataset or\n", " passed `mean` and `variance`.\n", " During `adapt()`, the layer will compute a `mean` and `variance`\n", " separately for each position in each axis specified by the `axis`\n", " argument. To calculate a single `mean` and `variance` over the input\n", " data, simply pass `axis=None`.\n", " In order to make `Normalization` efficient in any distribution context,\n", " the computed mean and variance are kept static with respect to any\n", " compiled `tf.Graph`s that call the layer. As a consequence, if the layer\n", " is adapted a second time, any models using the layer should be\n", " re-compiled. For more information see\n", " `tf.keras.layers.experimental.preprocessing.PreprocessingLayer.adapt`.\n", " `adapt()` is meant only as a single machine utility to compute layer\n", " state. To analyze a dataset that cannot fit on a single machine, see\n", " [Tensorflow Transform](\n", " https://www.tensorflow.org/tfx/transform/get_started)\n", " for a multi-machine, map-reduce solution.\n", " Arguments:\n", " data: The data to train on. It can be passed either as a\n", " `tf.data.Dataset`, or as a numpy array.\n", " batch_size: Integer or `None`.\n", " Number of samples per state update.\n", " If unspecified, `batch_size` will default to 32.\n", " Do not specify the `batch_size` if your data is in the\n", " form of datasets, generators, or `keras.utils.Sequence` instances\n", " (since they generate batches).\n", " steps: Integer or `None`.\n", " Total number of steps (batches of samples)\n", " When training with input tensors such as\n", " TensorFlow data tensors, the default `None` is equal to\n", " the number of samples in your dataset divided by\n", " the batch size, or 1 if that cannot be determined. If x is a\n", " `tf.data` dataset, and 'steps' is None, the epoch will run until\n", " the input dataset is exhausted. When passing an infinitely\n", " repeating dataset, you must specify the `steps` argument. This\n", " argument is not supported with array inputs.\n", " \"\"\"\n", " super().adapt(data, batch_size=batch_size, steps=steps)\n", "\n", " def update_state(self, data):\n", " if self.input_mean is not None:\n", " raise ValueError(\n", " \"Cannot `adapt` a Normalization layer that is initialized with \"\n", " \"static `mean` and `variance`, \"\n", " \"you passed mean {} and variance {}.\".format(\n", " self.input_mean, self.input_variance\n", " )\n", " )\n", "\n", " if not self.built:\n", " raise RuntimeError(\"`build` must be called before `update_state`.\")\n", "\n", " data = self._standardize_inputs(data)\n", " data = tf.cast(data, self.adapt_mean.dtype)\n", " batch_mean, batch_variance = tf.nn.moments(data, axes=self._reduce_axis)\n", " batch_shape = tf.shape(data, out_type=self.count.dtype)\n", " if self._reduce_axis:\n", " batch_reduce_shape = tf.gather(batch_shape, self._reduce_axis)\n", " batch_count = tf.reduce_prod(batch_reduce_shape)\n", " else:\n", " batch_count = 1\n", "\n", " total_count = batch_count + self.count\n", " batch_weight = tf.cast(batch_count, dtype=self.compute_dtype) / tf.cast(\n", " total_count, dtype=self.compute_dtype\n", " )\n", " existing_weight = 1.0 - batch_weight\n", "\n", " total_mean = (\n", " self.adapt_mean * existing_weight + batch_mean * batch_weight\n", " )\n", " # The variance is computed using the lack-of-fit sum of squares\n", " # formula (see\n", " # https://en.wikipedia.org/wiki/Lack-of-fit_sum_of_squares).\n", " total_variance = (\n", " self.adapt_variance + (self.adapt_mean - total_mean) ** 2\n", " ) * existing_weight + (\n", " batch_variance + (batch_mean - total_mean) ** 2\n", " ) * batch_weight\n", " self.adapt_mean.assign(total_mean)\n", " self.adapt_variance.assign(total_variance)\n", " self.count.assign(total_count)\n", "\n", " def reset_state(self):\n", " if self.input_mean is not None or not self.built:\n", " return\n", "\n", " self.adapt_mean.assign(tf.zeros_like(self.adapt_mean))\n", " self.adapt_variance.assign(tf.ones_like(self.adapt_variance))\n", " self.count.assign(tf.zeros_like(self.count))\n", "\n", " def finalize_state(self):\n", " if self.input_mean is not None or not self.built:\n", " return\n", "\n", " # In the adapt case, we make constant tensors for mean and variance with\n", " # proper broadcast shape and dtype each time `finalize_state` is called.\n", " self.mean = tf.reshape(self.adapt_mean, self._broadcast_shape)\n", " self.mean = tf.cast(self.mean, self.compute_dtype)\n", " self.variance = tf.reshape(self.adapt_variance, self._broadcast_shape)\n", " self.variance = tf.cast(self.variance, self.compute_dtype)\n", "\n", " def call(self, inputs):\n", " inputs = self._standardize_inputs(inputs)\n", " # The base layer automatically casts floating-point inputs, but we\n", " # explicitly cast here to also allow integer inputs to be passed\n", " inputs = tf.cast(inputs, self.compute_dtype)\n", " if self.invert:\n", " return (inputs + self.mean) * tf.maximum(\n", " tf.sqrt(self.variance), tf.keras.backend.epsilon()\n", " )\n", " else:\n", " return (inputs - self.mean) / tf.maximum(\n", " tf.sqrt(self.variance), tf.keras.backend.epsilon()\n", " )\n", "\n", " def compute_output_shape(self, input_shape):\n", " return input_shape\n", "\n", " def compute_output_signature(self, input_spec):\n", " return input_spec\n", "\n", " def get_config(self):\n", " config = super().get_config()\n", " config.update(\n", " {\n", " \"axis\": self.axis,\n", " \"mean\": tf.keras.layers.experimental.preprocessing.preprocessing_utils.utils.listify_tensors(self.input_mean),\n", " \"variance\": tf.keras.layers.experimental.preprocessing.preprocessing_utils.utils.listify_tensors(self.input_variance),\n", " }\n", " )\n", " return config\n", "\n", " def _standardize_inputs(self, inputs):\n", " inputs = tf.convert_to_tensor(inputs)\n", " if inputs.dtype != self.compute_dtype:\n", " inputs = tf.cast(inputs, self.compute_dtype)\n", " return inputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class PositionalEmbedding(Layer):\n", " def __init__(self, units, dropout_rate, **kwargs):\n", " super(PositionalEmbedding, self).__init__(**kwargs)\n", "\n", " self.units = units\n", "\n", " self.projection = Dense(units, kernel_initializer=TruncatedNormal(stddev=0.02))\n", " self.dropout = Dropout(rate=dropout_rate)\n", "\n", " def build(self, input_shape):\n", " super(PositionalEmbedding, self).build(input_shape)\n", "\n", " print(\"pos_embbeding: \", input_shape)\n", " self.temporal_position = self.add_weight(\n", " name=\"temporal_position\",\n", " shape=(1, input_shape[1], 1, self.units),\n", " initializer=TruncatedNormal(stddev=0.02),\n", " trainable=True,\n", " )\n", " self.spatial_position = self.add_weight(\n", " name=\"spatial_position\",\n", " shape=(1, 1, input_shape[2], self.units),\n", " initializer=TruncatedNormal(stddev=0.02),\n", " trainable=True,\n", " )\n", "\n", " def call(self, inputs, training):\n", " x = self.projection(inputs)\n", " x += self.temporal_position\n", " x += self.spatial_position\n", "\n", " return self.dropout(x, training=training)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Encoder(Layer):\n", " def __init__(\n", " self, embed_dim, mlp_dim, num_heads, dropout_rate, attention_dropout_rate, **kwargs\n", " ):\n", " super(Encoder, self).__init__(**kwargs)\n", "\n", " # Multi-head Attention\n", " self.mha = MultiHeadAttention(\n", " num_heads=num_heads,\n", " key_dim=embed_dim,\n", " dropout=attention_dropout_rate,\n", " kernel_initializer=TruncatedNormal(stddev=0.02),\n", " attention_axes=(1, 2), # 2D attention (timestep, patch)\n", " )\n", "\n", " # Point wise feed forward network\n", " self.dense_0 = Dense(\n", " units=mlp_dim,\n", " activation=\"gelu\",\n", " kernel_initializer=TruncatedNormal(stddev=0.02),\n", " )\n", " self.dense_1 = Dense(\n", " units=embed_dim, kernel_initializer=TruncatedNormal(stddev=0.02)\n", " )\n", "\n", " self.dropout_0 = Dropout(rate=dropout_rate)\n", " self.dropout_1 = Dropout(rate=dropout_rate)\n", "\n", " self.norm_0 = LayerNormalization(epsilon=1e-12)\n", " self.norm_1 = LayerNormalization(epsilon=1e-12)\n", "\n", " self.add_0 = Add()\n", " self.add_1 = Add()\n", "\n", " def call(self, inputs, training):\n", " # Attention block\n", " x = self.norm_0(inputs)\n", " x = self.mha(\n", " query=x,\n", " key=x,\n", " value=x,\n", " training=training,\n", " )\n", " x = self.dropout_0(x, training=training)\n", " x = self.add_0([x, inputs])\n", "\n", " # MLP block\n", " y = self.norm_1(x)\n", " y = self.dense_0(y)\n", " y = self.dense_1(y)\n", " y = self.dropout_1(y, training=training)\n", "\n", " return self.add_1([x, y])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Decoder(Layer):\n", " def __init__(\n", " self, embed_dim, mlp_dim, num_heads, dropout_rate, attention_dropout_rate, **kwargs\n", " ):\n", " super(Decoder, self).__init__(**kwargs)\n", "\n", " # MultiHeadAttention\n", " self.mha_0 = MultiHeadAttention(\n", " num_heads=num_heads,\n", " key_dim=embed_dim,\n", " dropout=attention_dropout_rate,\n", " kernel_initializer=TruncatedNormal(stddev=0.02),\n", " attention_axes=(1, 2), # 2D attention (timestep, patch)\n", " )\n", " self.mha_1 = MultiHeadAttention(\n", " num_heads=num_heads,\n", " key_dim=embed_dim,\n", " dropout=attention_dropout_rate,\n", " kernel_initializer=TruncatedNormal(stddev=0.02),\n", " attention_axes=(1, 2), # 2D attention (timestep, patch)\n", " )\n", "\n", " # Point wise feed forward network\n", " self.dense_0 = Dense(\n", " units=mlp_dim,\n", " activation=\"gelu\",\n", " kernel_initializer=TruncatedNormal(stddev=0.02),\n", " )\n", " self.dense_1 = Dense(\n", " units=embed_dim, kernel_initializer=TruncatedNormal(stddev=0.02)\n", " )\n", "\n", " self.dropout_0 = Dropout(rate=dropout_rate)\n", " self.dropout_1 = Dropout(rate=dropout_rate)\n", " self.dropout_2 = Dropout(rate=dropout_rate)\n", "\n", " self.norm_0 = LayerNormalization(epsilon=1e-12)\n", " self.norm_1 = LayerNormalization(epsilon=1e-12)\n", " self.norm_2 = LayerNormalization(epsilon=1e-12)\n", "\n", " self.add_0 = Add()\n", " self.add_1 = Add()\n", " self.add_2 = Add()\n", "\n", " def call(self, inputs, enc_output, training):\n", " # Attention block\n", " x = self.norm_0(inputs)\n", " x = self.mha_0(\n", " query=x,\n", " key=x,\n", " value=x,\n", " training=training,\n", " )\n", " x = self.dropout_0(x, training=training)\n", " x = self.add_0([x, inputs])\n", "\n", " # Attention block\n", " y = self.norm_1(x)\n", " y = self.mha_1(\n", " query=y,\n", " key=enc_output,\n", " value=enc_output,\n", " training=training,\n", " )\n", " y = self.dropout_1(y, training=training)\n", " y = self.add_1([x, y])\n", "\n", " # MLP block\n", " z = self.norm_2(y)\n", " z = self.dense_0(z)\n", " z = self.dense_1(z)\n", " z = self.dropout_2(z, training=training)\n", "\n", " return self.add_2([y, z])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transformer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class DailyTransformer(Model):\n", " def __init__(\n", " self,\n", " num_encoder_layers,\n", " num_decoder_layers,\n", " embed_dim,\n", " mlp_dim,\n", " num_heads,\n", " num_outputs,\n", " dropout_rate,\n", " attention_dropout_rate,\n", " **kwargs\n", " ):\n", " super(DailyTransformer, self).__init__(**kwargs)\n", "\n", " # Input (normalization of RAW measurements)\n", " self.input_norm_enc = Normalization(invert=False)\n", " self.input_norm_dec1 = Normalization(invert=False)\n", " self.input_norm_dec2 = Normalization(invert=True)\n", "\n", " # Input\n", " self.pos_embs_0 = PositionalEmbedding(embed_dim, dropout_rate)\n", " self.pos_embs_1 = PositionalEmbedding(embed_dim, dropout_rate)\n", "\n", " # Encoder\n", " self.enc_layers = [\n", " Encoder(embed_dim, mlp_dim, num_heads, dropout_rate, attention_dropout_rate)\n", " for _ in range(num_encoder_layers)\n", " ]\n", " self.norm_0 = LayerNormalization(epsilon=1e-12)\n", "\n", " # Decoder\n", " self.dec_layers = [\n", " Decoder(embed_dim, mlp_dim, num_heads, dropout_rate, attention_dropout_rate)\n", " for _ in range(num_decoder_layers)\n", " ]\n", " self.norm_1 = LayerNormalization(epsilon=1e-12)\n", "\n", " # Output\n", " self.final_layer = Dense(\n", " units=num_outputs,\n", " kernel_initializer=TruncatedNormal(stddev=0.02),\n", " )\n", "\n", " def call(self, inputs, training):\n", " inputs, targets = inputs\n", "\n", " # Encoder input\n", " x_e = self.input_norm_enc(inputs)\n", " x_e = self.pos_embs_0(x_e, training=training)\n", "\n", " # Encoder\n", " for layer in self.enc_layers:\n", " x_e = layer(x_e, training=training)\n", " x_e = self.norm_0(x_e)\n", "\n", " # Decoder input\n", " x_d = self.input_norm_dec1(targets)\n", " x_d = self.pos_embs_1(x_d, training=training)\n", "\n", " # Decoder\n", " for layer in self.dec_layers:\n", " x_d = layer(x_d, x_e, training=training)\n", " x_d = self.norm_1(x_d)\n", "\n", " # Output\n", " final_output = self.final_layer(x_d)\n", " final_output = self.input_norm_dec2(final_output)\n", "\n", " return final_output\n", "\n", " def train_step(self, inputs):\n", " inputs, targets = inputs\n", " inputs = inputs[:, :-1]\n", " targets_inputs = targets[:, :-1]\n", " targets_real = targets[:, 1:, :, -1:]\n", "\n", " with tf.GradientTape() as tape:\n", " y_pred = self([inputs, targets_inputs], training=True)\n", " loss = self.compiled_loss(targets_real, y_pred, regularization_losses=self.losses)\n", "\n", " print(y_pred)\n", " print(targets_real)\n", "\n", " # Compute gradients\n", " trainable_vars = self.trainable_variables\n", " gradients = tape.gradient(loss, trainable_vars)\n", "\n", " # Update weights\n", " self.optimizer.apply_gradients(zip(gradients, trainable_vars))\n", "\n", " # Update metrics (includes the metric that tracks the loss)\n", " self.compiled_metrics.update_state(targets_real[:, -1], y_pred[:, -1])\n", "\n", " # Return a dict mapping metric names to current value\n", " return {m.name: m.result() for m in self.metrics}\n", " \n", " def test_step(self, inputs):\n", " inputs, targets = inputs\n", " inputs = inputs[:, :-1]\n", " targets_inputs = targets[:, :-1]\n", " targets_real = targets[:, 1:, :, -1:]\n", "\n", " # Compute predictions\n", " y_pred = self([inputs, targets_inputs], training=False)\n", "\n", " # Updates the metrics tracking the loss\n", " self.compiled_loss(targets_real, y_pred, regularization_losses=self.losses)\n", "\n", " # Update the metrics\n", " self.compiled_metrics.update_state(targets_real[:, -1], y_pred[:, -1])\n", "\n", " # Return a dict mapping metric names to current value\n", " # Note that it will include the loss (tracked in self.metrics)\n", " return {m.name: m.result() for m in self.metrics}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Simulator" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Simulator(tf.Module):\n", " def __init__(self, transformer):\n", " self.transformer = transformer\n", " self.pi = tf.constant(np.pi)\n", "\n", " def __call__(self, inputs, horizon_length):\n", " inputs, targets = inputs\n", " output_array = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)\n", "\n", " for i in tf.range(horizon_length):\n", " tar = targets[:, i:]\n", " #print(\"target_old:\", tar[0])\n", " \n", " # Concatenate history with the predicted future\n", " if i > 0:\n", " output = tf.transpose(output_array.stack(), perm=[1, 0, 2, 3])\n", " if i > tf.shape(inputs)[1]:\n", " tar = tf.concat([tar, output[:, (i - tf.shape(inputs)[1]):]], axis=1)\n", " else:\n", " tar = tf.concat([tar, output], axis=1)\n", " #print(\"target_new[\", i, \"]:\", tar[0])\n", "\n", " #print(\"day sin/cos_OLD:\", tar[0, -1, 0, :-1])\n", "\n", " day = (tf.atan2(tar[:, -1, :, 0], tar[:, -1, :, 1]) * 183.0) / self.pi\n", " day = tf.round(tf.where(day > 0, day, day + 366))\n", " \n", " day_sin = tf.expand_dims(tf.sin(2.0 * self.pi * (day + 1) / 366.0), axis=-1)\n", " day_cos = tf.expand_dims(tf.cos(2.0 * self.pi * (day + 1) / 366.0), axis=-1)\n", "\n", " #print(\"day: \", day)\n", " #print(\"day sin/cos_NEW:\", day_sin[0], day_cos[0])\n", "\n", " predictions = self.transformer([inputs, tar], training=False)\n", " #print(\"predictions: \", predictions[0])\n", "\n", " if i == 0:\n", " zero_predictions = predictions[:, :-1]\n", "\n", " # concatentate the prediction to the output which is given to the decoder as its input\n", " output_array = output_array.write(i, tf.concat([day_sin, day_cos, predictions[:, -1]], axis=-1))\n", "\n", " output = tf.transpose(output_array.stack(), perm=[1, 0, 2, 3])\n", " #print(output.shape)\n", "\n", " return tf.concat([zero_predictions, output[:, :, :, -1:]], axis=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df_X = pd.read_csv(\"./dataset/1984_2022/X_all_daily.csv\")\n", "df_y_daily = pd.read_csv(\"./dataset/1984_2022/y_all_daily.csv\")\n", "\n", "num_of_patches = df_X['Name'].nunique()\n", "\n", "df_X = df_X.drop(\n", " columns=['DateTime', 'Name', 'Latitude', 'Longitude'] +\n", " [c for c in df_X.columns if c[:9] == 'WindSpeed'] +\n", " [c for c in df_X.columns if c[:12] == 'WindSpeedMin'] +\n", " [c for c in df_X.columns if c[:12] == 'WindSpeedMax'] +\n", " [c for c in df_X.columns if c[:13] == 'WindDirection']\n", ")\n", "df_y_daily = df_y_daily.drop(\n", " columns=['DateTime', 'Name', 'Latitude', 'Longitude'] +\n", " [c for c in df_y_daily.columns if c[:9] == 'WindSpeed'] +\n", " [c for c in df_y_daily.columns if c[:12] == 'WindSpeedMin'] +\n", " [c for c in df_y_daily.columns if c[:12] == 'WindSpeedMax'] +\n", " [c for c in df_y_daily.columns if c[:13] == 'WindDirection']\n", ")\n", "\n", "loc_names = [\n", " \"54 MW PV SOLAR POWER PLANT\",\n", " \"5MW Solar Power Plant Varroc\",\n", " \"Adani Green Energy Tamilnadu Limited\",\n", " \"Arete Elena Energy Pvt Ltd\",\n", " \"Bitta Solar Power Plant\",\n", " \"Charanka Solar Park\",\n", " \"Chennai Metropolitan Area\",\n", " \"Ctrls Data Center Mumbai\",\n", " \"Indira Paryavaran Bhawan\",\n", " \"Kurnool Ultra Mega Solar Park\",\n", " \"Pavagada Solar Park\",\n", " \"Rewa Ultra Mega Solar\",\n", " \"Solar Power Plant Chandasar\",\n", " \"Solar Power Plant Khera Silajit\",\n", " \"Solar power plant Koppal\",\n", " \"Target 1\",\n", " \"Target 2\",\n", " \"Welspun Solar MP project\",\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(df_X.head())\n", "print(df_y_daily.head())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def make_dataset(data, sequence_length, sequence_stride, sampling_rate):\n", " def make_window(data):\n", " dataset = tf.data.Dataset.from_tensor_slices(data)\n", " dataset = dataset.window(sequence_length, shift=sequence_stride, stride=sampling_rate, drop_remainder=True)\n", " dataset = dataset.flat_map(lambda x: x.batch(sequence_length, drop_remainder=True)) \n", " return dataset\n", "\n", " data = np.array(data, dtype=np.float32)\n", " data = np.reshape(data, (-1, num_of_patches, data.shape[-1]))\n", "\n", " # Split the data\n", " # (80%, 10%, 10%)\n", " n = data.shape[0]\n", " n_train = int(n*0.8)\n", " n_val = int(n*0.9)\n", " train_data = data[0:n_train]\n", " val_data = data[n_train:n_val]\n", " test_data = data[n_val:]\n", "\n", " return (\n", " (n_train, make_window(train_data)),\n", " (n_val - n_train, make_window(val_data)),\n", " make_window(test_data)\n", " )\n", "\n", "def merge_dataset(datasets, batch_size, shuffle):\n", " dataset = tf.data.Dataset.zip(datasets)\n", " dataset = dataset.prefetch(tf.data.AUTOTUNE)\n", "\n", " if shuffle:\n", " # Shuffle locally at each iteration\n", " dataset = dataset.shuffle(buffer_size=1000)\n", " dataset = dataset.batch(batch_size)\n", " \n", " return dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Simulation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "horizon = 7\n", "window_size = 7\n", "batch_size = 32\n", "\n", "_, _, test_X_ds = make_dataset(df_X, (window_size + horizon), 1, 1)\n", "_, _, test_y_daily_ds = make_dataset(df_y_daily, (window_size + horizon), 1, 1)\n", "\n", "test_ds = merge_dataset(\n", " (test_X_ds, test_y_daily_ds),\n", " batch_size,\n", " shuffle=False,\n", ")\n", "\n", "daily_model = DailyTransformer(\n", " attention_dropout_rate=0.25,\n", " dropout_rate=0.15,\n", " embed_dim=64,\n", " mlp_dim=256,\n", " num_decoder_layers=6,\n", " num_encoder_layers=3,\n", " num_heads=6,\n", " num_outputs=1,\n", ")\n", "daily_model.build([(None, window_size, num_of_patches, 302), (None, window_size, num_of_patches, 3)])\n", "daily_model.load_weights(\"./models/model-best.h5\")\n", "simulator = Simulator(daily_model)\n", "\n", "print(daily_model.input_norm_enc.variables)\n", "print(daily_model.input_norm_dec1.variables)\n", "print(daily_model.input_norm_dec2.variables)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "patch_similarity_plot(daily_model.pos_embs_0.spatial_position[0, 0])\n", "patch_similarity_plot(daily_model.pos_embs_1.spatial_position[0, 0])\n", "\n", "timestep_similarity_plot(daily_model.pos_embs_0.temporal_position[0, :, 0])\n", "timestep_similarity_plot(daily_model.pos_embs_1.temporal_position[0, :, 0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Results" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metrics = [MeanSquaredError(), RootMeanSquaredError(), MeanAbsoluteError(), MeanAbsolutePercentageError(), RSquare()]\n", "\n", "# Location 1 = 15 (64.67 % na 4 dni), (80.6 % na 1 den)\n", "# Location 2 = 16 (69.8 % na 4 dni), (83.67 % na 1 den)\n", "\n", "# Chennai = 6 (69.8 % na 4 dni), (83.67 % na 1 den)\n", "# Mumbai = 7 (69.8 % na 4 dni), (83.67 % na 1 den)\n", "\n", "for loc in range(num_of_patches):\n", " print(\"Location: \", loc_names[loc])\n", " print(\"-----------------------------------------------------\")\n", " for inputs in test_ds:\n", " inputs, targets = inputs\n", " inputs = inputs[:, :-horizon]\n", " targets_inputs = targets[:, :-horizon]\n", " targets_real = targets[:, 1:, loc, -1:]\n", "\n", " #y_pred = daily_model([inputs, targets_inputs], training=False)\n", " y_pred = simulator([inputs, targets_inputs], horizon_length=horizon)\n", "\n", " # Update the metrics\n", " for m in metrics:\n", " m.update_state(targets_real, y_pred[:, :, loc, -1:])\n", "\n", " # visualize the last results\n", " plot_prediction(targets, y_pred)\n", "\n", " print({m.name: m.result() for m in metrics}, \"\\n\")\n", " for m in metrics:\n", " m.reset_states()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.10 ('base')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "9185113d2128201d66faecd4f34fb34e89a635073a034991399523e584519355" } } }, "nbformat": 4, "nbformat_minor": 2 }