Spaces:

jonasmaltebecker
/

vae_celeba

Sleeping

App Files Files Community

Jonas Becker commited on Nov 30, 2023

Commit

c53ddec

1 Parent(s): e8305d9

1st try

Browse files

Files changed (19) hide show

.gitignore +2 -0
app.bat +2 -0
app.py +38 -0
disvae/__init__.py +6 -0
disvae/evaluate.py +317 -0
disvae/main.py +145 -0
disvae/models/__init__.py +0 -0
disvae/models/decoders.py +84 -0
disvae/models/discriminator.py +73 -0
disvae/models/encoders.py +89 -0
disvae/models/losses.py +544 -0
disvae/models/vae.py +101 -0
disvae/training.py +212 -0
disvae/utils/__init__.py +0 -0
disvae/utils/initialization.py +61 -0
disvae/utils/math.py +73 -0
disvae/utils/modelIO.py +200 -0
requirements.txt +11 -0
transforms.py +168 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv
2	+ __pycache__

app.bat ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ call venv\scripts\activate.bat
2	+ call streamlit run app.py

app.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import numpy as np
+import streamlit as st
+import torch
+import disvae
+import transforms as trans
+P_MODEL = "models/btcvae_celeba"
+# Decode Funktion --------------------------------------------------
+sorter = trans.LatentSorter(disvae.get_kl_dict(P_MODEL))
+vae = disvae.load_model(P_MODEL)
+scaler = trans.MinMaxScaler(_min=torch.tensor([1.3]),_max=torch.tensor([4.0]),min_norm=0.3,max_norm=0.6)
+imaging = trans.SumField()
+_dec = trans.sequential_function(
+    sorter.inv,
+    vae.decoder
+)
+def decode(latent):
+    with torch.no_grad():
+        return trans.np_sample(_dec)(latent)
+# GUI -----------------------------------------------------------
+latent_vector = np.array([st.slider(f"L{l}",min_value=-3.0,max_value=3.0,value=0.0) for l in range(3)])
+latent_vector = np.concatenate([latent_vector,np.zeros(7)],axis=0)
+value = decode(latent_vector)
+value = np.swapaxes(np.swapaxes(value, 0, 2), 0, 1)# * 255
+# st.write(value)
+st.image(value, use_column_width="always")
+# x = st.slider("Select a value")
+# st.write(x, "squared is", x * x)

disvae/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from disvae.evaluate import Evaluator
+from disvae.main import get_kl_dict
+from disvae.training import Trainer
+from disvae.utils.modelIO import load_model, save_model
+# from disvae.models.vae import init_specific_model  # notwendig?

disvae/evaluate.py ADDED Viewed

	@@ -0,0 +1,317 @@

+import os
+import logging
+import math
+from functools import reduce
+from collections import defaultdict
+import json
+from timeit import default_timer
+from tqdm import trange, tqdm
+import numpy as np
+import torch
+from disvae.models.losses import get_loss_f
+from disvae.utils.math import log_density_gaussian
+from disvae.utils.modelIO import save_metadata
+TEST_LOSSES_FILE = "test_losses.log"
+METRICS_FILENAME = "metrics.log"
+METRIC_HELPERS_FILE = "metric_helpers.pth"
+class Evaluator:
+    """
+    Class to handle training of model.
+    Parameters
+    ----------
+    model: disvae.vae.VAE
+    loss_f: disvae.models.BaseLoss
+        Loss function.
+    device: torch.device, optional
+        Device on which to run the code.
+    logger: logging.Logger, optional
+        Logger.
+    save_dir : str, optional
+        Directory for saving logs.
+    is_progress_bar: bool, optional
+        Whether to use a progress bar for training.
+    """
+    def __init__(self, model, loss_f,
+                 device=torch.device("cpu"),
+                 logger=logging.getLogger(__name__),
+                 save_dir="results",
+                 is_progress_bar=True):
+        self.device = device
+        self.loss_f = loss_f
+        self.model = model.to(self.device)
+        self.logger = logger
+        self.save_dir = save_dir
+        self.is_progress_bar = is_progress_bar
+        self.logger.info("Testing Device: {}".format(self.device))
+    def __call__(self, data_loader, is_metrics=False, is_losses=True):
+        """Compute all test losses.
+        Parameters
+        ----------
+        data_loader: torch.utils.data.DataLoader
+        is_metrics: bool, optional
+            Whether to compute and store the disentangling metrics.
+        is_losses: bool, optional
+            Whether to compute and store the test losses.
+        """
+        start = default_timer()
+        is_still_training = self.model.training
+        self.model.eval()
+        metric, losses = None, None
+        if is_metrics:
+            self.logger.info('Computing metrics...')
+            metrics = self.compute_metrics(data_loader)
+            self.logger.info('Losses: {}'.format(metrics))
+            save_metadata(metrics, self.save_dir, filename=METRICS_FILENAME)
+        if is_losses:
+            self.logger.info('Computing losses...')
+            losses = self.compute_losses(data_loader)
+            self.logger.info('Losses: {}'.format(losses))
+            save_metadata(losses, self.save_dir, filename=TEST_LOSSES_FILE)
+        if is_still_training:
+            self.model.train()
+        self.logger.info('Finished evaluating after {:.1f} min.'.format((default_timer() - start) / 60))
+        return metric, losses
+    def compute_losses(self, dataloader):
+        """Compute all test losses.
+        Parameters
+        ----------
+        data_loader: torch.utils.data.DataLoader
+        """
+        storer = defaultdict(list)
+        for data, _ in tqdm(dataloader, leave=False, disable=not self.is_progress_bar):
+            data = data.to(self.device)
+            try:
+                recon_batch, latent_dist, latent_sample = self.model(data)
+                _ = self.loss_f(data, recon_batch, latent_dist, self.model.training,
+                                storer, latent_sample=latent_sample)
+            except ValueError:
+                # for losses that use multiple optimizers (e.g. Factor)
+                _ = self.loss_f.call_optimize(data, self.model, None, storer)
+            losses = {k: sum(v) / len(dataloader) for k, v in storer.items()}
+            return losses
+    def compute_metrics(self, dataloader):
+        """Compute all the metrics.
+        Parameters
+        ----------
+        data_loader: torch.utils.data.DataLoader
+        """
+        try:
+            lat_sizes = dataloader.dataset.lat_sizes
+            lat_names = dataloader.dataset.lat_names
+        except AttributeError:
+            raise ValueError("Dataset needs to have known true factors of variations to compute the metric. This does not seem to be the case for {}".format(type(dataloader.__dict__["dataset"]).__name__))
+        self.logger.info("Computing the empirical distribution q(z|x).")
+        samples_zCx, params_zCx = self._compute_q_zCx(dataloader)
+        len_dataset, latent_dim = samples_zCx.shape
+        self.logger.info("Estimating the marginal entropy.")
+        # marginal entropy H(z_j)
+        H_z = self._estimate_latent_entropies(samples_zCx, params_zCx)
+        # conditional entropy H(z|v)
+        samples_zCx = samples_zCx.view(*lat_sizes, latent_dim)
+        params_zCx = tuple(p.view(*lat_sizes, latent_dim) for p in params_zCx)
+        H_zCv = self._estimate_H_zCv(samples_zCx, params_zCx, lat_sizes, lat_names)
+        H_z = H_z.cpu()
+        H_zCv = H_zCv.cpu()
+        # I[z_j;v_k] = E[log \sum_x q(z_j|x)p(x|v_k)] + H[z_j] = - H[z_j|v_k] + H[z_j]
+        mut_info = - H_zCv + H_z
+        sorted_mut_info = torch.sort(mut_info, dim=1, descending=True)[0].clamp(min=0)
+        metric_helpers = {'marginal_entropies': H_z, 'cond_entropies': H_zCv}
+        mig = self._mutual_information_gap(sorted_mut_info, lat_sizes, storer=metric_helpers)
+        aam = self._axis_aligned_metric(sorted_mut_info, storer=metric_helpers)
+        metrics = {'MIG': mig.item(), 'AAM': aam.item()}
+        torch.save(metric_helpers, os.path.join(self.save_dir, METRIC_HELPERS_FILE))
+        return metrics
+    def _mutual_information_gap(self, sorted_mut_info, lat_sizes, storer=None):
+        """Compute the mutual information gap as in [1].
+        References
+        ----------
+           [1] Chen, Tian Qi, et al. "Isolating sources of disentanglement in variational
+           autoencoders." Advances in Neural Information Processing Systems. 2018.
+        """
+        # difference between the largest and second largest mutual info
+        delta_mut_info = sorted_mut_info[:, 0] - sorted_mut_info[:, 1]
+        # NOTE: currently only works if balanced dataset for every factor of variation
+        # then H(v_k) = - |V_k|/|V_k| log(1/|V_k|) = log(|V_k|)
+        H_v = torch.from_numpy(lat_sizes).float().log()
+        mig_k = delta_mut_info / H_v
+        mig = mig_k.mean()  # mean over factor of variations
+        if storer is not None:
+            storer["mig_k"] = mig_k
+            storer["mig"] = mig
+        return mig
+    def _axis_aligned_metric(self, sorted_mut_info, storer=None):
+        """Compute the proposed axis aligned metrics."""
+        numerator = (sorted_mut_info[:, 0] - sorted_mut_info[:, 1:].sum(dim=1)).clamp(min=0)
+        aam_k = numerator / sorted_mut_info[:, 0]
+        aam_k[torch.isnan(aam_k)] = 0
+        aam = aam_k.mean()  # mean over factor of variations
+        if storer is not None:
+            storer["aam_k"] = aam_k
+            storer["aam"] = aam
+        return aam
+    def _compute_q_zCx(self, dataloader):
+        """Compute the empiricall disitribution of q(z|x).
+        Parameter
+        ---------
+        dataloader: torch.utils.data.DataLoader
+            Batch data iterator.
+        Return
+        ------
+        samples_zCx: torch.tensor
+            Tensor of shape (len_dataset, latent_dim) containing a sample of
+            q(z|x) for every x in the dataset.
+        params_zCX: tuple of torch.Tensor
+            Sufficient statistics q(z|x) for each training example. E.g. for
+            gaussian (mean, log_var) each of shape : (len_dataset, latent_dim).
+        """
+        len_dataset = len(dataloader.dataset)
+        latent_dim = self.model.latent_dim
+        n_suff_stat = 2
+        q_zCx = torch.zeros(len_dataset, latent_dim, n_suff_stat, device=self.device)
+        n = 0
+        with torch.no_grad():
+            for x, label in dataloader:
+                batch_size = x.size(0)
+                idcs = slice(n, n + batch_size)
+                q_zCx[idcs, :, 0], q_zCx[idcs, :, 1] = self.model.encoder(x.to(self.device))
+                n += batch_size
+        params_zCX = q_zCx.unbind(-1)
+        samples_zCx = self.model.reparameterize(*params_zCX)
+        return samples_zCx, params_zCX
+    def _estimate_latent_entropies(self, samples_zCx, params_zCX,
+                                   n_samples=10000):
+        r"""Estimate :math:`H(z_j) = E_{q(z_j)} [-log q(z_j)] = E_{p(x)} E_{q(z_j|x)} [-log q(z_j)]`
+        using the emperical distribution of :math:`p(x)`.
+        Note
+        ----
+        - the expectation over the emperical distributio is: :math:`q(z) = 1/N sum_{n=1}^N q(z|x_n)`.
+        - we assume that q(z|x) is factorial i.e. :math:`q(z|x) = \prod_j q(z_j|x)`.
+        - computes numerically stable NLL: :math:`- log q(z) = log N - logsumexp_n=1^N log q(z|x_n)`.
+        Parameters
+        ----------
+        samples_zCx: torch.tensor
+            Tensor of shape (len_dataset, latent_dim) containing a sample of
+            q(z|x) for every x in the dataset.
+        params_zCX: tuple of torch.Tensor
+            Sufficient statistics q(z|x) for each training example. E.g. for
+            gaussian (mean, log_var) each of shape : (len_dataset, latent_dim).
+        n_samples: int, optional
+            Number of samples to use to estimate the entropies.
+        Return
+        ------
+        H_z: torch.Tensor
+            Tensor of shape (latent_dim) containing the marginal entropies H(z_j)
+        """
+        len_dataset, latent_dim = samples_zCx.shape
+        device = samples_zCx.device
+        H_z = torch.zeros(latent_dim, device=device)
+        # sample from p(x)
+        samples_x = torch.randperm(len_dataset, device=device)[:n_samples]
+        # sample from p(z|x)
+        samples_zCx = samples_zCx.index_select(0, samples_x).view(latent_dim, n_samples)
+        mini_batch_size = 10
+        samples_zCx = samples_zCx.expand(len_dataset, latent_dim, n_samples)
+        mean = params_zCX[0].unsqueeze(-1).expand(len_dataset, latent_dim, n_samples)
+        log_var = params_zCX[1].unsqueeze(-1).expand(len_dataset, latent_dim, n_samples)
+        log_N = math.log(len_dataset)
+        with trange(n_samples, leave=False, disable=self.is_progress_bar) as t:
+            for k in range(0, n_samples, mini_batch_size):
+                # log q(z_j|x) for n_samples
+                idcs = slice(k, k + mini_batch_size)
+                log_q_zCx = log_density_gaussian(samples_zCx[..., idcs],
+                                                 mean[..., idcs],
+                                                 log_var[..., idcs])
+                # numerically stable log q(z_j) for n_samples:
+                # log q(z_j) = -log N + logsumexp_{n=1}^N log q(z_j|x_n)
+                # As we don't know q(z) we appoximate it with the monte carlo
+                # expectation of q(z_j|x_n) over x. => fix a single z and look at
+                # proba for every x to generate it. n_samples is not used here !
+                log_q_z = -log_N + torch.logsumexp(log_q_zCx, dim=0, keepdim=False)
+                # H(z_j) = E_{z_j}[- log q(z_j)]
+                # mean over n_samples (i.e. dimesnion 1 because already summed over 0).
+                H_z += (-log_q_z).sum(1)
+                t.update(mini_batch_size)
+        H_z /= n_samples
+        return H_z
+    def _estimate_H_zCv(self, samples_zCx, params_zCx, lat_sizes, lat_names):
+        """Estimate conditional entropies :math:`H[z|v]`."""
+        latent_dim = samples_zCx.size(-1)
+        len_dataset = reduce((lambda x, y: x * y), lat_sizes)
+        H_zCv = torch.zeros(len(lat_sizes), latent_dim, device=self.device)
+        for i_fac_var, (lat_size, lat_name) in enumerate(zip(lat_sizes, lat_names)):
+            idcs = [slice(None)] * len(lat_sizes)
+            for i in range(lat_size):
+                self.logger.info("Estimating conditional entropies for the {}th value of {}.".format(i, lat_name))
+                idcs[i_fac_var] = i
+                # samples from q(z,x|v)
+                samples_zxCv = samples_zCx[idcs].contiguous().view(len_dataset // lat_size,
+                                                                   latent_dim)
+                params_zxCv = tuple(p[idcs].contiguous().view(len_dataset // lat_size, latent_dim)
+                                    for p in params_zCx)
+                H_zCv[i_fac_var] += self._estimate_latent_entropies(samples_zxCv, params_zxCv
+                                                                    ) / lat_size
+        return H_zCv

disvae/main.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# Pip-Packages -----------------------------------------------------
+import importlib
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import torch
+from torch import optim
+from torch.utils.data import DataLoader
+# From local package -----------------------------------------------
+from disvae.models.losses import get_loss_f
+from disvae.models.vae import init_specific_model
+from disvae.training import Trainer
+from disvae.utils.modelIO import save_model
+# Loss stuff:
+def parse_losses(p_model, filename="train_losses.log"):
+    df = pd.read_csv(Path(p_model) / filename)
+    losses = df["Loss"].unique()
+    rtn = [np.array(df[df["Loss"] == l]["Value"]) for l in losses]
+    rtn = pd.DataFrame(np.array(rtn).T, columns=losses)
+    return rtn
+def get_kl_loss_latent(df):
+    """df muss bereits geparsed sein!"""
+    rtn = {int(c.split("_")[-1]): df[c].iloc[-1] for c in df if "kl_loss_" in c}
+    rtn = dict(sorted(rtn.items(), key=lambda item: item[1], reverse=True))
+    return rtn
+def get_kl_dict(p_model):
+    df = parse_losses(p_model)
+    rtn = get_kl_loss_latent(df)
+    return rtn
+# Datalaader convinience stuff
+# def get_dataloader(dataset: torch.data.Dataset, batch_size, num_workers):
+#     # Funktion ist recht kompliziert. Das geht im Notebook schnell
+#     # Diese Dinge werden auch zur Visualisierung des Datasets benötigt
+#     # p_dataset_module, dataset_class, dataset_args
+#     # Import module
+#     # if p_dataset_module not in sys.path:
+#     #     sys.path.append(str(Path(p_dataset_module).parent))
+#     # Dataset = getattr(
+#     #     importlib.import_module(Path(p_dataset_module).stem), dataset_class
+#     # )
+#     # # Ab hier an, wenn das normal importiert würde
+#     # ds = Dataset(**dataset_args)
+#
+#     return loader
+def get_export_dir(base_dir: str, folder_name):
+    if folder_name is None:
+        folder_name = "Model_" + (
+            datetime.now().replace(microsecond=0).isoformat()
+        ).replace(" ", "_").replace(":", "-")
+    rtn = Path(base_dir) / folder_name
+    if not rtn.exists():
+        os.makedirs(rtn)
+    else:
+        raise ValueError("Output directory already exists.")
+    return rtn
+def train_model(model, data_loader, loss_f, device, lr, epochs, export_dir):
+    trainer = Trainer(
+        model,
+        optim.Adam(model.parameters(), lr=lr),
+        loss_f,
+        device=device,
+        # logger=logger,
+        save_dir=export_dir,
+        is_progress_bar=True,
+    )  # ,
+    # gif_visualizer=gif_visualizer)
+    trainer(data_loader, epochs=epochs, checkpoint_every=10)
+    save_model(trainer.model, export_dir)
+    # , metadata=config) # Speichern passiert auch schon vorher
+    # gif_visualizer = GifTraversalsTraining(model, args.dataset, exp_dir)
+def train(dataset, config) -> str:
+    # Validate Config?
+    print("1) Set device")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Device:\t\t {device}")
+    print("2) Get dataloader")
+    dataloader = DataLoader(
+        dataset,
+        batch_size=config["data_params"]["batch_size"],
+        shuffle=True,
+        pin_memory=torch.cuda.is_available,
+        num_workers=config["data_params"]["num_workers"],
+    )
+    print("3) Build model")
+    img_size = list(dataloader.dataset[0][0].shape)
+    print(f"Image size: \t {img_size}")
+    model = init_specific_model(img_size=img_size, **config["model_params"])
+    model = model.to(device)  # make sure trainer and viz on same device
+    print("4) Build loss function")
+    loss_f = get_loss_f(
+        n_data=len(dataloader.dataset), device=device, **config["loss_params"]
+    )
+    print("5) Parse Export Params")
+    export_dir = get_export_dir(**config["export_params"])
+    print("6) Training model")
+    train_model(
+        model=model,
+        data_loader=dataloader,
+        loss_f=loss_f,
+        device=device,
+        export_dir=export_dir,
+        **config["trainer_params"],
+    )
+    return export_dir

disvae/models/__init__.py ADDED Viewed

File without changes

disvae/models/decoders.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""
+Module containing the decoders.
+"""
+import numpy as np
+import torch
+from torch import nn
+# ALL decoders should be called Decoder<Model>
+def get_decoder(model_type):
+    model_type = model_type.lower().capitalize()
+    return eval("Decoder{}".format(model_type))
+class DecoderBurgess(nn.Module):
+    def __init__(self, img_size,
+                 latent_dim=10):
+        r"""Decoder of the model proposed in [1].
+        Parameters
+        ----------
+        img_size : tuple of ints
+            Size of images. E.g. (1, 32, 32) or (3, 64, 64).
+        latent_dim : int
+            Dimensionality of latent output.
+        Model Architecture (transposed for decoder)
+        ------------
+        - 4 convolutional layers (each with 32 channels), (4 x 4 kernel), (stride of 2)
+        - 2 fully connected layers (each of 256 units)
+        - Latent distribution:
+            - 1 fully connected layer of 20 units (log variance and mean for 10 Gaussians)
+        References:
+            [1] Burgess, Christopher P., et al. "Understanding disentangling in
+            $\beta$-VAE." arXiv preprint arXiv:1804.03599 (2018).
+        """
+        super(DecoderBurgess, self).__init__()
+        # Layer parameters
+        hid_channels = 32
+        kernel_size = 4
+        hidden_dim = 256
+        self.img_size = img_size
+        # Shape required to start transpose convs
+        self.reshape = (hid_channels, kernel_size, kernel_size)
+        n_chan = self.img_size[0]
+        self.img_size = img_size
+        # Fully connected layers
+        self.lin1 = nn.Linear(latent_dim, hidden_dim)
+        self.lin2 = nn.Linear(hidden_dim, hidden_dim)
+        self.lin3 = nn.Linear(hidden_dim, np.product(self.reshape))
+        # Convolutional layers
+        cnn_kwargs = dict(stride=2, padding=1)
+        # If input image is 64x64 do fourth convolution
+        if self.img_size[1] == self.img_size[2] == 64:
+            self.convT_64 = nn.ConvTranspose2d(hid_channels, hid_channels, kernel_size, **cnn_kwargs)
+        self.convT1 = nn.ConvTranspose2d(hid_channels, hid_channels, kernel_size, **cnn_kwargs)
+        self.convT2 = nn.ConvTranspose2d(hid_channels, hid_channels, kernel_size, **cnn_kwargs)
+        self.convT3 = nn.ConvTranspose2d(hid_channels, n_chan, kernel_size, **cnn_kwargs)
+    def forward(self, z):
+        batch_size = z.size(0)
+        # Fully connected layers with ReLu activations
+        x = torch.relu(self.lin1(z))
+        x = torch.relu(self.lin2(x))
+        x = torch.relu(self.lin3(x))
+        x = x.view(batch_size, *self.reshape)
+        # Convolutional layers with ReLu activations
+        if self.img_size[1] == self.img_size[2] == 64:
+            x = torch.relu(self.convT_64(x))
+        x = torch.relu(self.convT1(x))
+        x = torch.relu(self.convT2(x))
+        # Sigmoid activation for final conv layer
+        x = torch.sigmoid(self.convT3(x))
+        return x

disvae/models/discriminator.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""
+Module containing discriminator for FactorVAE.
+"""
+from torch import nn
+from disvae.utils.initialization import weights_init
+class Discriminator(nn.Module):
+    def __init__(self,
+                 neg_slope=0.2,
+                 latent_dim=10,
+                 hidden_units=1000):
+        """Discriminator proposed in [1].
+        Parameters
+        ----------
+        neg_slope: float
+            Hyperparameter for the Leaky ReLu
+        latent_dim : int
+            Dimensionality of latent variables.
+        hidden_units: int
+            Number of hidden units in the MLP
+        Model Architecture
+        ------------
+        - 6 layer multi-layer perceptron, each with 1000 hidden units
+        - Leaky ReLu activations
+        - Output 2 logits
+        References:
+            [1] Kim, Hyunjik, and Andriy Mnih. "Disentangling by factorising."
+            arXiv preprint arXiv:1802.05983 (2018).
+        """
+        super(Discriminator, self).__init__()
+        # Activation parameters
+        self.neg_slope = neg_slope
+        self.leaky_relu = nn.LeakyReLU(self.neg_slope, True)
+        # Layer parameters
+        self.z_dim = latent_dim
+        self.hidden_units = hidden_units
+        # theoretically 1 with sigmoid but gives bad results => use 2 and softmax
+        out_units = 2
+        # Fully connected layers
+        self.lin1 = nn.Linear(self.z_dim, hidden_units)
+        self.lin2 = nn.Linear(hidden_units, hidden_units)
+        self.lin3 = nn.Linear(hidden_units, hidden_units)
+        self.lin4 = nn.Linear(hidden_units, hidden_units)
+        self.lin5 = nn.Linear(hidden_units, hidden_units)
+        self.lin6 = nn.Linear(hidden_units, out_units)
+        self.reset_parameters()
+    def forward(self, z):
+        # Fully connected layers with leaky ReLu activations
+        z = self.leaky_relu(self.lin1(z))
+        z = self.leaky_relu(self.lin2(z))
+        z = self.leaky_relu(self.lin3(z))
+        z = self.leaky_relu(self.lin4(z))
+        z = self.leaky_relu(self.lin5(z))
+        z = self.lin6(z)
+        return z
+    def reset_parameters(self):
+        self.apply(weights_init)

disvae/models/encoders.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""
+Module containing the encoders.
+"""
+import numpy as np
+import torch
+from torch import nn
+# ALL encoders should be called Enccoder<Model>
+def get_encoder(model_type):
+    model_type = model_type.lower().capitalize()
+    return eval("Encoder{}".format(model_type))
+class EncoderBurgess(nn.Module):
+    def __init__(self, img_size,
+                 latent_dim=10):
+        r"""Encoder of the model proposed in [1].
+        Parameters
+        ----------
+        img_size : tuple of ints
+            Size of images. E.g. (1, 32, 32) or (3, 64, 64).
+        latent_dim : int
+            Dimensionality of latent output.
+        Model Architecture (transposed for decoder)
+        ------------
+        - 4 convolutional layers (each with 32 channels), (4 x 4 kernel), (stride of 2)
+        - 2 fully connected layers (each of 256 units)
+        - Latent distribution:
+            - 1 fully connected layer of 20 units (log variance and mean for 10 Gaussians)
+        References:
+            [1] Burgess, Christopher P., et al. "Understanding disentangling in
+            $\beta$-VAE." arXiv preprint arXiv:1804.03599 (2018).
+        """
+        super(EncoderBurgess, self).__init__()
+        # Layer parameters
+        hid_channels = 32
+        kernel_size = 4
+        hidden_dim = 256
+        self.latent_dim = latent_dim
+        self.img_size = img_size
+        # Shape required to start transpose convs
+        self.reshape = (hid_channels, kernel_size, kernel_size)
+        n_chan = self.img_size[0]
+        # Convolutional layers
+        cnn_kwargs = dict(stride=2, padding=1)
+        self.conv1 = nn.Conv2d(n_chan, hid_channels, kernel_size, **cnn_kwargs)
+        self.conv2 = nn.Conv2d(hid_channels, hid_channels, kernel_size, **cnn_kwargs)
+        self.conv3 = nn.Conv2d(hid_channels, hid_channels, kernel_size, **cnn_kwargs)
+        # If input image is 64x64 do fourth convolution
+        if self.img_size[1] == self.img_size[2] == 64:
+            self.conv_64 = nn.Conv2d(hid_channels, hid_channels, kernel_size, **cnn_kwargs)
+        # Fully connected layers
+        self.lin1 = nn.Linear(np.product(self.reshape), hidden_dim)
+        self.lin2 = nn.Linear(hidden_dim, hidden_dim)
+        # Fully connected layers for mean and variance
+        self.mu_logvar_gen = nn.Linear(hidden_dim, self.latent_dim * 2)
+    def forward(self, x):
+        batch_size = x.size(0)
+        # Convolutional layers with ReLu activations
+        x = torch.relu(self.conv1(x))
+        x = torch.relu(self.conv2(x))
+        x = torch.relu(self.conv3(x))
+        if self.img_size[1] == self.img_size[2] == 64:
+            x = torch.relu(self.conv_64(x))
+        # Fully connected layers with ReLu activations
+        x = x.view((batch_size, -1))
+        x = torch.relu(self.lin1(x))
+        x = torch.relu(self.lin2(x))
+        # Fully connected layer for log variance and mean
+        # Log std-dev in paper (bear in mind)
+        mu_logvar = self.mu_logvar_gen(x)
+        mu, logvar = mu_logvar.view(-1, self.latent_dim, 2).unbind(-1)
+        return mu, logvar

disvae/models/losses.py ADDED Viewed

	@@ -0,0 +1,544 @@

+"""
+Module containing all vae losses.
+"""
+import abc
+import math
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from torch import optim
+from .discriminator import Discriminator
+from disvae.utils.math import (log_density_gaussian, log_importance_weight_matrix,
+                               matrix_log_density_gaussian)
+LOSSES = ["VAE", "betaH", "betaB", "factor", "btcvae"]
+RECON_DIST = ["bernoulli", "laplace", "gaussian"]
+# TO-DO: clean n_data and device
+def get_loss_f(loss_name, **kwargs_parse):
+    """Return the correct loss function given the argparse arguments."""
+    kwargs_all = dict(rec_dist=kwargs_parse["rec_dist"],
+                      steps_anneal=kwargs_parse["reg_anneal"])
+    if loss_name == "betaH":
+        return BetaHLoss(beta=kwargs_parse["betaH_B"], **kwargs_all)
+    elif loss_name == "VAE":
+        return BetaHLoss(beta=1, **kwargs_all)
+    elif loss_name == "betaB":
+        return BetaBLoss(C_init=kwargs_parse["betaB_initC"],
+                         C_fin=kwargs_parse["betaB_finC"],
+                         gamma=kwargs_parse["betaB_G"],
+                         **kwargs_all)
+    elif loss_name == "factor":
+        return FactorKLoss(kwargs_parse["device"],
+                           gamma=kwargs_parse["factor_G"],
+                           disc_kwargs=dict(latent_dim=kwargs_parse["latent_dim"]),
+                           optim_kwargs=dict(lr=kwargs_parse["lr_disc"], betas=(0.5, 0.9)),
+                           **kwargs_all)
+    elif loss_name == "btcvae":
+        return BtcvaeLoss(kwargs_parse["n_data"],
+                          alpha=kwargs_parse["btcvae_A"],
+                          beta=kwargs_parse["btcvae_B"],
+                          gamma=kwargs_parse["btcvae_G"],
+                          **kwargs_all)
+    else:
+        assert loss_name not in LOSSES
+        raise ValueError("Uknown loss : {}".format(loss_name))
+class BaseLoss(abc.ABC):
+    """
+    Base class for losses.
+    Parameters
+    ----------
+    record_loss_every: int, optional
+        Every how many steps to recorsd the loss.
+    rec_dist: {"bernoulli", "gaussian", "laplace"}, optional
+        Reconstruction distribution istribution of the likelihood on the each pixel.
+        Implicitely defines the reconstruction loss. Bernoulli corresponds to a
+        binary cross entropy (bse), Gaussian corresponds to MSE, Laplace
+        corresponds to L1.
+    steps_anneal: nool, optional
+        Number of annealing steps where gradually adding the regularisation.
+    """
+    def __init__(self, record_loss_every=50, rec_dist="bernoulli", steps_anneal=0):
+        self.n_train_steps = 0
+        self.record_loss_every = record_loss_every
+        self.rec_dist = rec_dist
+        self.steps_anneal = steps_anneal
+    @abc.abstractmethod
+    def __call__(self, data, recon_data, latent_dist, is_train, storer, **kwargs):
+        """
+        Calculates loss for a batch of data.
+        Parameters
+        ----------
+        data : torch.Tensor
+            Input data (e.g. batch of images). Shape : (batch_size, n_chan,
+            height, width).
+        recon_data : torch.Tensor
+            Reconstructed data. Shape : (batch_size, n_chan, height, width).
+        latent_dist : tuple of torch.tensor
+            sufficient statistics of the latent dimension. E.g. for gaussian
+            (mean, log_var) each of shape : (batch_size, latent_dim).
+        is_train : bool
+            Whether currently in train mode.
+        storer : dict
+            Dictionary in which to store important variables for vizualisation.
+        kwargs:
+            Loss specific arguments
+        """
+    def _pre_call(self, is_train, storer):
+        if is_train:
+            self.n_train_steps += 1
+        if not is_train or self.n_train_steps % self.record_loss_every == 1:
+            storer = storer
+        else:
+            storer = None
+        return storer
+class BetaHLoss(BaseLoss):
+    """
+    Compute the Beta-VAE loss as in [1]
+    Parameters
+    ----------
+    beta : float, optional
+        Weight of the kl divergence.
+    kwargs:
+        Additional arguments for `BaseLoss`, e.g. rec_dist`.
+    References
+    ----------
+        [1] Higgins, Irina, et al. "beta-vae: Learning basic visual concepts with
+        a constrained variational framework." (2016).
+    """
+    def __init__(self, beta=4, **kwargs):
+        super().__init__(**kwargs)
+        self.beta = beta
+    def __call__(self, data, recon_data, latent_dist, is_train, storer, **kwargs):
+        storer = self._pre_call(is_train, storer)
+        rec_loss = _reconstruction_loss(data, recon_data,
+                                        storer=storer,
+                                        distribution=self.rec_dist)
+        kl_loss = _kl_normal_loss(*latent_dist, storer)
+        anneal_reg = (linear_annealing(0, 1, self.n_train_steps, self.steps_anneal)
+                      if is_train else 1)
+        loss = rec_loss + anneal_reg * (self.beta * kl_loss)
+        if storer is not None:
+            storer['loss'].append(loss.item())
+        return loss
+class BetaBLoss(BaseLoss):
+    """
+    Compute the Beta-VAE loss as in [1]
+    Parameters
+    ----------
+    C_init : float, optional
+        Starting annealed capacity C.
+    C_fin : float, optional
+        Final annealed capacity C.
+    gamma : float, optional
+        Weight of the KL divergence term.
+    kwargs:
+        Additional arguments for `BaseLoss`, e.g. rec_dist`.
+    References
+    ----------
+        [1] Burgess, Christopher P., et al. "Understanding disentangling in
+        $\beta$-VAE." arXiv preprint arXiv:1804.03599 (2018).
+    """
+    def __init__(self, C_init=0., C_fin=20., gamma=100., **kwargs):
+        super().__init__(**kwargs)
+        self.gamma = gamma
+        self.C_init = C_init
+        self.C_fin = C_fin
+    def __call__(self, data, recon_data, latent_dist, is_train, storer, **kwargs):
+        storer = self._pre_call(is_train, storer)
+        rec_loss = _reconstruction_loss(data, recon_data,
+                                        storer=storer,
+                                        distribution=self.rec_dist)
+        kl_loss = _kl_normal_loss(*latent_dist, storer)
+        C = (linear_annealing(self.C_init, self.C_fin, self.n_train_steps, self.steps_anneal)
+             if is_train else self.C_fin)
+        loss = rec_loss + self.gamma * (kl_loss - C).abs()
+        if storer is not None:
+            storer['loss'].append(loss.item())
+        return loss
+class FactorKLoss(BaseLoss):
+    """
+    Compute the Factor-VAE loss as per Algorithm 2 of [1]
+    Parameters
+    ----------
+    device : torch.device
+    gamma : float, optional
+        Weight of the TC loss term. `gamma` in the paper.
+    discriminator : disvae.discriminator.Discriminator
+    optimizer_d : torch.optim
+    kwargs:
+        Additional arguments for `BaseLoss`, e.g. rec_dist`.
+    References
+    ----------
+        [1] Kim, Hyunjik, and Andriy Mnih. "Disentangling by factorising."
+        arXiv preprint arXiv:1802.05983 (2018).
+    """
+    def __init__(self, device,
+                 gamma=10.,
+                 disc_kwargs={},
+                 optim_kwargs=dict(lr=5e-5, betas=(0.5, 0.9)),
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.gamma = gamma
+        self.device = device
+        self.discriminator = Discriminator(**disc_kwargs).to(self.device)
+        self.optimizer_d = optim.Adam(self.discriminator.parameters(), **optim_kwargs)
+    def __call__(self, *args, **kwargs):
+        raise ValueError("Use `call_optimize` to also train the discriminator")
+    def call_optimize(self, data, model, optimizer, storer):
+        storer = self._pre_call(model.training, storer)
+        # factor-vae split data into two batches. In the paper they sample 2 batches
+        batch_size = data.size(dim=0)
+        half_batch_size = batch_size // 2
+        data = data.split(half_batch_size)
+        data1 = data[0]
+        data2 = data[1]
+        # Factor VAE Loss
+        recon_batch, latent_dist, latent_sample1 = model(data1)
+        rec_loss = _reconstruction_loss(data1, recon_batch,
+                                        storer=storer,
+                                        distribution=self.rec_dist)
+        kl_loss = _kl_normal_loss(*latent_dist, storer)
+        d_z = self.discriminator(latent_sample1)
+        # We want log(p_true/p_false). If not using logisitc regression but softmax
+        # then p_true = exp(logit_true) / Z; p_false = exp(logit_false) / Z
+        # so log(p_true/p_false) = logit_true - logit_false
+        tc_loss = (d_z[:, 0] - d_z[:, 1]).mean()
+        # with sigmoid (not good results) should be `tc_loss = (2 * d_z.flatten()).mean()`
+        anneal_reg = (linear_annealing(0, 1, self.n_train_steps, self.steps_anneal)
+                      if model.training else 1)
+        vae_loss = rec_loss + kl_loss + anneal_reg * self.gamma * tc_loss
+        if storer is not None:
+            storer['loss'].append(vae_loss.item())
+            storer['tc_loss'].append(tc_loss.item())
+        if not model.training:
+            # don't backprop if evaluating
+            return vae_loss
+        # Compute VAE gradients
+        optimizer.zero_grad()
+        vae_loss.backward(retain_graph=True)
+        # Discriminator Loss
+        # Get second sample of latent distribution
+        latent_sample2 = model.sample_latent(data2)
+        z_perm = _permute_dims(latent_sample2).detach()
+        d_z_perm = self.discriminator(z_perm)
+        # Calculate total correlation loss
+        # for cross entropy the target is the index => need to be long and says
+        # that it's first output for d_z and second for perm
+        ones = torch.ones(half_batch_size, dtype=torch.long, device=self.device)
+        zeros = torch.zeros_like(ones)
+        d_tc_loss = 0.5 * (F.cross_entropy(d_z, zeros) + F.cross_entropy(d_z_perm, ones))
+        # with sigmoid would be :
+        # d_tc_loss = 0.5 * (self.bce(d_z.flatten(), ones) + self.bce(d_z_perm.flatten(), 1 - ones))
+        # TO-DO: check ifshould also anneals discriminator if not becomes too good ???
+        #d_tc_loss = anneal_reg * d_tc_loss
+        # Compute discriminator gradients
+        self.optimizer_d.zero_grad()
+        d_tc_loss.backward()
+        # Update at the end (since pytorch 1.5. complains if update before)
+        optimizer.step()
+        self.optimizer_d.step()
+        if storer is not None:
+            storer['discrim_loss'].append(d_tc_loss.item())
+        return vae_loss
+class BtcvaeLoss(BaseLoss):
+    """
+    Compute the decomposed KL loss with either minibatch weighted sampling or
+    minibatch stratified sampling according to [1]
+    Parameters
+    ----------
+    n_data: int
+        Number of data in the training set
+    alpha : float
+        Weight of the mutual information term.
+    beta : float
+        Weight of the total correlation term.
+    gamma : float
+        Weight of the dimension-wise KL term.
+    is_mss : bool
+        Whether to use minibatch stratified sampling instead of minibatch
+        weighted sampling.
+    kwargs:
+        Additional arguments for `BaseLoss`, e.g. rec_dist`.
+    References
+    ----------
+       [1] Chen, Tian Qi, et al. "Isolating sources of disentanglement in variational
+       autoencoders." Advances in Neural Information Processing Systems. 2018.
+    """
+    def __init__(self, n_data, alpha=1., beta=6., gamma=1., is_mss=True, **kwargs):
+        super().__init__(**kwargs)
+        self.n_data = n_data
+        self.beta = beta
+        self.alpha = alpha
+        self.gamma = gamma
+        self.is_mss = is_mss  # minibatch stratified sampling
+    def __call__(self, data, recon_batch, latent_dist, is_train, storer,
+                 latent_sample=None):
+        storer = self._pre_call(is_train, storer)
+        batch_size, latent_dim = latent_sample.shape
+        rec_loss = _reconstruction_loss(data, recon_batch,
+                                        storer=storer,
+                                        distribution=self.rec_dist)
+        log_pz, log_qz, log_prod_qzi, log_q_zCx = _get_log_pz_qz_prodzi_qzCx(latent_sample,
+                                                                             latent_dist,
+                                                                             self.n_data,
+                                                                             is_mss=self.is_mss)
+        # I[z;x] = KL[q(z,x)||q(x)q(z)] = E_x[KL[q(z|x)||q(z)]]
+        mi_loss = (log_q_zCx - log_qz).mean()
+        # TC[z] = KL[q(z)||\prod_i z_i]
+        tc_loss = (log_qz - log_prod_qzi).mean()
+        # dw_kl_loss is KL[q(z)||p(z)] instead of usual KL[q(z|x)||p(z))]
+        dw_kl_loss = (log_prod_qzi - log_pz).mean()
+        anneal_reg = (linear_annealing(0, 1, self.n_train_steps, self.steps_anneal)
+                      if is_train else 1)
+        # total loss
+        loss = rec_loss + (self.alpha * mi_loss +
+                           self.beta * tc_loss +
+                           anneal_reg * self.gamma * dw_kl_loss)
+        if storer is not None:
+            storer['loss'].append(loss.item())
+            storer['mi_loss'].append(mi_loss.item())
+            storer['tc_loss'].append(tc_loss.item())
+            storer['dw_kl_loss'].append(dw_kl_loss.item())
+            # computing this for storing and comparaison purposes
+            _ = _kl_normal_loss(*latent_dist, storer)
+        return loss
+def _reconstruction_loss(data, recon_data, distribution="bernoulli", storer=None):
+    """
+    Calculates the per image reconstruction loss for a batch of data. I.e. negative
+    log likelihood.
+    Parameters
+    ----------
+    data : torch.Tensor
+        Input data (e.g. batch of images). Shape : (batch_size, n_chan,
+        height, width).
+    recon_data : torch.Tensor
+        Reconstructed data. Shape : (batch_size, n_chan, height, width).
+    distribution : {"bernoulli", "gaussian", "laplace"}
+        Distribution of the likelihood on the each pixel. Implicitely defines the
+        loss Bernoulli corresponds to a binary cross entropy (bse) loss and is the
+        most commonly used. It has the issue that it doesn't penalize the same
+        way (0.1,0.2) and (0.4,0.5), which might not be optimal. Gaussian
+        distribution corresponds to MSE, and is sometimes used, but hard to train
+        ecause it ends up focusing only a few pixels that are very wrong. Laplace
+        distribution corresponds to L1 solves partially the issue of MSE.
+    storer : dict
+        Dictionary in which to store important variables for vizualisation.
+    Returns
+    -------
+    loss : torch.Tensor
+        Per image cross entropy (i.e. normalized per batch but not pixel and
+        channel)
+    """
+    batch_size, n_chan, height, width = recon_data.size()
+    is_colored = n_chan == 3
+    if distribution == "bernoulli":
+        loss = F.binary_cross_entropy(recon_data, data, reduction="sum")
+    elif distribution == "gaussian":
+        # loss in [0,255] space but normalized by 255 to not be too big
+        loss = F.mse_loss(recon_data * 255, data * 255, reduction="sum") / 255
+    elif distribution == "laplace":
+        # loss in [0,255] space but normalized by 255 to not be too big but
+        # multiply by 255 and divide 255, is the same as not doing anything for L1
+        loss = F.l1_loss(recon_data, data, reduction="sum")
+        loss = loss * 3  # emperical value to give similar values than bernoulli => use same hyperparam
+        loss = loss * (loss != 0)  # masking to avoid nan
+    else:
+        assert distribution not in RECON_DIST
+        raise ValueError("Unkown distribution: {}".format(distribution))
+    loss = loss / batch_size
+    if storer is not None:
+        storer['recon_loss'].append(loss.item())
+    return loss
+def _kl_normal_loss(mean, logvar, storer=None):
+    """
+    Calculates the KL divergence between a normal distribution
+    with diagonal covariance and a unit normal distribution.
+    Parameters
+    ----------
+    mean : torch.Tensor
+        Mean of the normal distribution. Shape (batch_size, latent_dim) where
+        D is dimension of distribution.
+    logvar : torch.Tensor
+        Diagonal log variance of the normal distribution. Shape (batch_size,
+        latent_dim)
+    storer : dict
+        Dictionary in which to store important variables for vizualisation.
+    """
+    latent_dim = mean.size(1)
+    # batch mean of kl for each latent dimension
+    latent_kl = 0.5 * (-1 - logvar + mean.pow(2) + logvar.exp()).mean(dim=0)
+    total_kl = latent_kl.sum()
+    if storer is not None:
+        storer['kl_loss'].append(total_kl.item())
+        for i in range(latent_dim):
+            storer['kl_loss_' + str(i)].append(latent_kl[i].item())
+    return total_kl
+def _permute_dims(latent_sample):
+    """
+    Implementation of Algorithm 1 in ref [1]. Randomly permutes the sample from
+    q(z) (latent_dist) across the batch for each of the latent dimensions (mean
+    and log_var).
+    Parameters
+    ----------
+    latent_sample: torch.Tensor
+        sample from the latent dimension using the reparameterisation trick
+        shape : (batch_size, latent_dim).
+    References
+    ----------
+        [1] Kim, Hyunjik, and Andriy Mnih. "Disentangling by factorising."
+        arXiv preprint arXiv:1802.05983 (2018).
+    """
+    perm = torch.zeros_like(latent_sample)
+    batch_size, dim_z = perm.size()
+    for z in range(dim_z):
+        pi = torch.randperm(batch_size).to(latent_sample.device)
+        perm[:, z] = latent_sample[pi, z]
+    return perm
+def linear_annealing(init, fin, step, annealing_steps):
+    """Linear annealing of a parameter."""
+    if annealing_steps == 0:
+        return fin
+    assert fin > init
+    delta = fin - init
+    annealed = min(init + delta * step / annealing_steps, fin)
+    return annealed
+# Batch TC specific
+# TO-DO: test if mss is better!
+def _get_log_pz_qz_prodzi_qzCx(latent_sample, latent_dist, n_data, is_mss=True):
+    batch_size, hidden_dim = latent_sample.shape
+    # calculate log q(z|x)
+    log_q_zCx = log_density_gaussian(latent_sample, *latent_dist).sum(dim=1)
+    # calculate log p(z)
+    # mean and log var is 0
+    zeros = torch.zeros_like(latent_sample)
+    log_pz = log_density_gaussian(latent_sample, zeros, zeros).sum(1)
+    mat_log_qz = matrix_log_density_gaussian(latent_sample, *latent_dist)
+    if is_mss:
+        # use stratification
+        log_iw_mat = log_importance_weight_matrix(batch_size, n_data).to(latent_sample.device)
+        mat_log_qz = mat_log_qz + log_iw_mat.view(batch_size, batch_size, 1)
+    log_qz = torch.logsumexp(mat_log_qz.sum(2), dim=1, keepdim=False)
+    log_prod_qzi = torch.logsumexp(mat_log_qz, dim=1, keepdim=False).sum(1)
+    return log_pz, log_qz, log_prod_qzi, log_q_zCx

disvae/models/vae.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+Module containing the main VAE class.
+"""
+import torch
+from torch import nn, optim
+from torch.nn import functional as F
+from disvae.utils.initialization import weights_init
+from .encoders import get_encoder
+from .decoders import get_decoder
+MODELS = ["Burgess"]
+def init_specific_model(model_type, img_size, latent_dim):
+    """Return an instance of a VAE with encoder and decoder from `model_type`."""
+    model_type = model_type.lower().capitalize()
+    if model_type not in MODELS:
+        err = "Unkown model_type={}. Possible values: {}"
+        raise ValueError(err.format(model_type, MODELS))
+    encoder = get_encoder(model_type)
+    decoder = get_decoder(model_type)
+    model = VAE(img_size, encoder, decoder, latent_dim)
+    model.model_type = model_type  # store to help reloading
+    return model
+class VAE(nn.Module):
+    def __init__(self, img_size, encoder, decoder, latent_dim):
+        """
+        Class which defines model and forward pass.
+        Parameters
+        ----------
+        img_size : tuple of ints
+            Size of images. E.g. (1, 32, 32) or (3, 64, 64).
+        """
+        super(VAE, self).__init__()
+        if list(img_size[1:]) not in [[32, 32], [64, 64]]:
+            raise RuntimeError("{} sized images not supported. Only (None, 32, 32) and (None, 64, 64) supported. Build your own architecture or reshape images!".format(img_size))
+        self.latent_dim = latent_dim
+        self.img_size = img_size
+        self.num_pixels = self.img_size[1] * self.img_size[2]
+        self.encoder = encoder(img_size, self.latent_dim)
+        self.decoder = decoder(img_size, self.latent_dim)
+        self.reset_parameters()
+    def reparameterize(self, mean, logvar):
+        """
+        Samples from a normal distribution using the reparameterization trick.
+        Parameters
+        ----------
+        mean : torch.Tensor
+            Mean of the normal distribution. Shape (batch_size, latent_dim)
+        logvar : torch.Tensor
+            Diagonal log variance of the normal distribution. Shape (batch_size,
+            latent_dim)
+        """
+        if self.training:
+            std = torch.exp(0.5 * logvar)
+            eps = torch.randn_like(std)
+            return mean + std * eps
+        else:
+            # Reconstruction mode
+            return mean
+    def forward(self, x):
+        """
+        Forward pass of model.
+        Parameters
+        ----------
+        x : torch.Tensor
+            Batch of data. Shape (batch_size, n_chan, height, width)
+        """
+        latent_dist = self.encoder(x)
+        latent_sample = self.reparameterize(*latent_dist)
+        reconstruct = self.decoder(latent_sample)
+        return reconstruct, latent_dist, latent_sample
+    def reset_parameters(self):
+        self.apply(weights_init)
+    def sample_latent(self, x):
+        """
+        Returns a sample from the latent distribution.
+        Parameters
+        ----------
+        x : torch.Tensor
+            Batch of data. Shape (batch_size, n_chan, height, width)
+        """
+        latent_dist = self.encoder(x)
+        latent_sample = self.reparameterize(*latent_dist)
+        return latent_sample

disvae/training.py ADDED Viewed

	@@ -0,0 +1,212 @@

+# import imageio
+import logging
+import os
+from collections import defaultdict
+from timeit import default_timer
+import torch
+from torch.nn import functional as F
+from tqdm import trange
+from disvae.utils.modelIO import save_model
+TRAIN_LOSSES_LOGFILE = "train_losses.log"
+class Trainer:
+    """
+    Class to handle training of model.
+    Parameters
+    ----------
+    model: disvae.vae.VAE
+    optimizer: torch.optim.Optimizer
+    loss_f: disvae.models.BaseLoss
+        Loss function.
+    device: torch.device, optional
+        Device on which to run the code.
+    logger: logging.Logger, optional
+        Logger.
+    save_dir : str, optional
+        Directory for saving logs.
+    gif_visualizer : viz.Visualizer, optional
+        Gif Visualizer that should return samples at every epochs.
+    is_progress_bar: bool, optional
+        Whether to use a progress bar for training.
+    """
+    def __init__(
+        self,
+        model,
+        optimizer,
+        loss_f,
+        device=torch.device("cpu"),
+        logger=logging.getLogger(__name__),
+        save_dir="results",
+        gif_visualizer=None,
+        is_progress_bar=True,
+    ):
+        self.device = device
+        self.model = model.to(self.device)
+        self.loss_f = loss_f
+        self.optimizer = optimizer
+        self.save_dir = save_dir
+        self.is_progress_bar = is_progress_bar
+        self.logger = logger
+        self.losses_logger = LossesLogger(
+            os.path.join(self.save_dir, TRAIN_LOSSES_LOGFILE)
+        )
+        self.gif_visualizer = gif_visualizer
+        self.logger.info("Training Device: {}".format(self.device))
+    def __call__(self, data_loader, epochs=10, checkpoint_every=10):
+        """
+        Trains the model.
+        Parameters
+        ----------
+        data_loader: torch.utils.data.DataLoader
+        epochs: int, optional
+            Number of epochs to train the model for.
+        checkpoint_every: int, optional
+            Save a checkpoint of the trained model every n epoch.
+        """
+        start = default_timer()
+        self.model.train()
+        for epoch in range(epochs):
+            storer = defaultdict(list)
+            mean_epoch_loss = self._train_epoch(data_loader, storer, epoch)
+            self.logger.info(
+                "Epoch: {} Average loss per image: {:.2f}".format(
+                    epoch + 1, mean_epoch_loss
+                )
+            )
+            self.losses_logger.log(epoch, storer)
+            if self.gif_visualizer is not None:
+                self.gif_visualizer()
+            if epoch % checkpoint_every == 0:
+                save_model(
+                    self.model, self.save_dir, filename="model-{}.pt".format(epoch)
+                )
+        if self.gif_visualizer is not None:
+            self.gif_visualizer.save_reset()
+        self.model.eval()
+        delta_time = (default_timer() - start) / 60
+        self.logger.info("Finished training after {:.1f} min.".format(delta_time))
+    def _train_epoch(self, data_loader, storer, epoch):
+        """
+        Trains the model for one epoch.
+        Parameters
+        ----------
+        data_loader: torch.utils.data.DataLoader
+        storer: dict
+            Dictionary in which to store important variables for vizualisation.
+        epoch: int
+            Epoch number
+        Return
+        ------
+        mean_epoch_loss: float
+            Mean loss per image
+        """
+        epoch_loss = 0.0
+        kwargs = dict(
+            desc="Epoch {}".format(epoch + 1),
+            leave=False,
+            disable=not self.is_progress_bar,
+        )
+        with trange(len(data_loader), **kwargs) as t:
+            for _, (data, _) in enumerate(data_loader):
+                iter_loss = self._train_iteration(data, storer)
+                epoch_loss += iter_loss
+                t.set_postfix(loss=iter_loss)
+                t.update()
+        mean_epoch_loss = epoch_loss / len(data_loader)
+        return mean_epoch_loss
+    def _train_iteration(self, data, storer):
+        """
+        Trains the model for one iteration on a batch of data.
+        Parameters
+        ----------
+        data: torch.Tensor
+            A batch of data. Shape : (batch_size, channel, height, width).
+        storer: dict
+            Dictionary in which to store important variables for vizualisation.
+        """
+        batch_size, channel, height, width = data.size()
+        data = data.to(self.device)
+        try:
+            recon_batch, latent_dist, latent_sample = self.model(data)
+            loss = self.loss_f(
+                data,
+                recon_batch,
+                latent_dist,
+                self.model.training,
+                storer,
+                latent_sample=latent_sample,
+            )
+            self.optimizer.zero_grad()
+            loss.backward()
+            self.optimizer.step()
+        except ValueError:
+            # for losses that use multiple optimizers (e.g. Factor)
+            loss = self.loss_f.call_optimize(data, self.model, self.optimizer, storer)
+        return loss.item()
+class LossesLogger(object):
+    """Class definition for objects to write data to log files in a
+    form which is then easy to be plotted.
+    """
+    def __init__(self, file_path_name):
+        """Create a logger to store information for plotting."""
+        if os.path.isfile(file_path_name):
+            os.remove(file_path_name)
+        self.logger = logging.getLogger("losses_logger")
+        self.logger.setLevel(1)  # always store
+        file_handler = logging.FileHandler(file_path_name)
+        file_handler.setLevel(1)
+        self.logger.addHandler(file_handler)
+        header = ",".join(["Epoch", "Loss", "Value"])
+        self.logger.debug(header)
+    def log(self, epoch, losses_storer):
+        """Write to the log file"""
+        for k, v in losses_storer.items():
+            log_string = ",".join(str(item) for item in [epoch, k, mean(v)])
+            self.logger.debug(log_string)
+# HELPERS
+def mean(l):
+    """Compute the mean of a list"""
+    return sum(l) / len(l)

disvae/utils/__init__.py ADDED Viewed

File without changes

disvae/utils/initialization.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import torch
+from torch import nn
+def get_activation_name(activation):
+    """Given a string or a `torch.nn.modules.activation` return the name of the activation."""
+    if isinstance(activation, str):
+        return activation
+    mapper = {nn.LeakyReLU: "leaky_relu", nn.ReLU: "relu", nn.Tanh: "tanh",
+              nn.Sigmoid: "sigmoid", nn.Softmax: "sigmoid"}
+    for k, v in mapper.items():
+        if isinstance(activation, k):
+            return k
+    raise ValueError("Unkown given activation type : {}".format(activation))
+def get_gain(activation):
+    """Given an object of `torch.nn.modules.activation` or an activation name
+    return the correct gain."""
+    if activation is None:
+        return 1
+    activation_name = get_activation_name(activation)
+    param = None if activation_name != "leaky_relu" else activation.negative_slope
+    gain = nn.init.calculate_gain(activation_name, param)
+    return gain
+def linear_init(layer, activation="relu"):
+    """Initialize a linear layer.
+    Args:
+        layer (nn.Linear): parameters to initialize.
+        activation (`torch.nn.modules.activation` or str, optional) activation that
+            will be used on the `layer`.
+    """
+    x = layer.weight
+    if activation is None:
+        return nn.init.xavier_uniform_(x)
+    activation_name = get_activation_name(activation)
+    if activation_name == "leaky_relu":
+        a = 0 if isinstance(activation, str) else activation.negative_slope
+        return nn.init.kaiming_uniform_(x, a=a, nonlinearity='leaky_relu')
+    elif activation_name == "relu":
+        return nn.init.kaiming_uniform_(x, nonlinearity='relu')
+    elif activation_name in ["sigmoid", "tanh"]:
+        return nn.init.xavier_uniform_(x, gain=get_gain(activation))
+def weights_init(module):
+    if isinstance(module, torch.nn.modules.conv._ConvNd):
+        # TO-DO: check litterature
+        linear_init(module)
+    elif isinstance(module, nn.Linear):
+        linear_init(module)

disvae/utils/math.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import math
+from tqdm import trange, tqdm
+import torch
+def matrix_log_density_gaussian(x, mu, logvar):
+    """Calculates log density of a Gaussian for all combination of bacth pairs of
+    `x` and `mu`. I.e. return tensor of shape `(batch_size, batch_size, dim)`
+    instead of (batch_size, dim) in the usual log density.
+    Parameters
+    ----------
+    x: torch.Tensor
+        Value at which to compute the density. Shape: (batch_size, dim).
+    mu: torch.Tensor
+        Mean. Shape: (batch_size, dim).
+    logvar: torch.Tensor
+        Log variance. Shape: (batch_size, dim).
+    batch_size: int
+        number of training images in the batch
+    """
+    batch_size, dim = x.shape
+    x = x.view(batch_size, 1, dim)
+    mu = mu.view(1, batch_size, dim)
+    logvar = logvar.view(1, batch_size, dim)
+    return log_density_gaussian(x, mu, logvar)
+def log_density_gaussian(x, mu, logvar):
+    """Calculates log density of a Gaussian.
+    Parameters
+    ----------
+    x: torch.Tensor or np.ndarray or float
+        Value at which to compute the density.
+    mu: torch.Tensor or np.ndarray or float
+        Mean.
+    logvar: torch.Tensor or np.ndarray or float
+        Log variance.
+    """
+    normalization = - 0.5 * (math.log(2 * math.pi) + logvar)
+    inv_var = torch.exp(-logvar)
+    log_density = normalization - 0.5 * ((x - mu)**2 * inv_var)
+    return log_density
+def log_importance_weight_matrix(batch_size, dataset_size):
+    """
+    Calculates a log importance weight matrix
+    Parameters
+    ----------
+    batch_size: int
+        number of training images in the batch
+    dataset_size: int
+    number of training images in the dataset
+    """
+    N = dataset_size
+    M = batch_size - 1
+    strat_weight = (N - M) / (N * M)
+    W = torch.Tensor(batch_size, batch_size).fill_(1 / M)
+    W.view(-1)[::M + 1] = 1 / N
+    W.view(-1)[1::M + 1] = strat_weight
+    W[M - 1, 0] = strat_weight
+    return W.log()

disvae/utils/modelIO.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import json
+import os
+import re
+from pathlib import Path
+import numpy as np
+import torch
+from disvae.models.vae import init_specific_model
+MODEL_FILENAME = "model.pt"
+META_FILENAME = "specs.json"
+def vae2onnx(vae, p_out: str) -> None:
+    if isinstance(vae, str):
+        p_out = Path(p_out)
+    if not p_out.exists():
+        p_out.mkdir()
+    device = next(vae.parameters()).device
+    vae.cpu()
+    # Encoder
+    vae.encoder.eval()
+    dummy_input_im = torch.zeros(tuple(np.concatenate([[1], vae.img_size])))
+    torch.onnx.export(vae.encoder, dummy_input_im, p_out / "encoder.onnx", verbose=True)
+    # Decoder
+    vae.decoder.eval()
+    dummy_input_latent = torch.zeros((1, vae.latent_dim))
+    torch.onnx.export(
+        vae.decoder, dummy_input_latent, p_out / "decoder.onnx", verbose=True
+    )
+    vae.to(device)  # restore device
+def save_model(model, directory, metadata=None, filename=MODEL_FILENAME):
+    """
+    Save a model and corresponding metadata.
+    Parameters
+    ----------
+    model : nn.Module
+        Model.
+    directory : str
+        Path to the directory where to save the data.
+    metadata : dict
+        Metadata to save.
+    """
+    device = next(model.parameters()).device
+    model.cpu()
+    if metadata is None:
+        # save the minimum required for loading
+        metadata = dict(
+            img_size=model.img_size,
+            latent_dim=model.latent_dim,
+            model_type=model.model_type,
+        )
+    save_metadata(metadata, directory)
+    path_to_model = os.path.join(directory, filename)
+    torch.save(model.state_dict(), path_to_model)
+    model.to(device)  # restore device
+def load_metadata(directory, filename=META_FILENAME):
+    """Load the metadata of a training directory.
+    Parameters
+    ----------
+    directory : string
+        Path to folder where model is saved. For example './experiments/mnist'.
+    """
+    path_to_metadata = os.path.join(directory, filename)
+    with open(path_to_metadata) as metadata_file:
+        metadata = json.load(metadata_file)
+    return metadata
+def save_metadata(metadata, directory, filename=META_FILENAME, **kwargs):
+    """Load the metadata of a training directory.
+    Parameters
+    ----------
+    metadata:
+        Object to save
+    directory: string
+        Path to folder where to save model. For example './experiments/mnist'.
+    kwargs:
+        Additional arguments to `json.dump`
+    """
+    path_to_metadata = os.path.join(directory, filename)
+    with open(path_to_metadata, "w") as f:
+        json.dump(metadata, f, indent=4, sort_keys=True, **kwargs)
+def load_model(directory, is_gpu=True, filename=MODEL_FILENAME):
+    """Load a trained model.
+    Parameters
+    ----------
+    directory : string
+        Path to folder where model is saved. For example './experiments/mnist'.
+    is_gpu : bool
+        Whether to load on GPU is available.
+    """
+    device = torch.device("cuda" if torch.cuda.is_available() and is_gpu else "cpu")
+    path_to_model = os.path.join(directory, MODEL_FILENAME)
+    metadata = load_metadata(directory)
+    img_size = metadata["img_size"]
+    latent_dim = metadata["latent_dim"]
+    model_type = metadata["model_type"]
+    path_to_model = os.path.join(directory, filename)
+    model = _get_model(model_type, img_size, latent_dim, device, path_to_model)
+    return model
+def load_checkpoints(directory, is_gpu=True):
+    """Load all chechpointed models.
+    Parameters
+    ----------
+    directory : string
+        Path to folder where model is saved. For example './experiments/mnist'.
+    is_gpu : bool
+        Whether to load on GPU .
+    """
+    checkpoints = []
+    for root, _, filenames in os.walk(directory):
+        for filename in filenames:
+            results = re.search(r".*?-([0-9].*?).pt", filename)
+            if results is not None:
+                epoch_idx = int(results.group(1))
+                model = load_model(root, is_gpu=is_gpu, filename=filename)
+                checkpoints.append((epoch_idx, model))
+    return checkpoints
+def _get_model(model_type, img_size, latent_dim, device, path_to_model):
+    """Load a single model.
+    Parameters
+    ----------
+    model_type : str
+        The name of the model to load. For example Burgess.
+    img_size : tuple
+        Tuple of the number of pixels in the image width and height.
+        For example (32, 32) or (64, 64).
+    latent_dim : int
+        The number of latent dimensions in the bottleneck.
+    device : str
+        Either 'cuda' or 'cpu'
+    path_to_device : str
+        Full path to the saved model on the device.
+    """
+    model = init_specific_model(model_type, img_size, latent_dim).to(device)
+    # works with state_dict to make it independent of the file structure
+    model.load_state_dict(torch.load(path_to_model), strict=False)
+    model.eval()
+    return model
+def numpy_serialize(obj):
+    if type(obj).__module__ == np.__name__:
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+        else:
+            return obj.item()
+    raise TypeError("Unknown type:", type(obj))
+def save_np_arrays(arrays, directory, filename):
+    """Save dictionary of arrays in json file."""
+    save_metadata(arrays, directory, filename=filename, default=numpy_serialize)
+def load_np_arrays(directory, filename):
+    """Load dictionary of arrays from json file."""
+    arrays = load_metadata(directory, filename=filename)
+    return {k: np.array(v) for k, v in arrays.items()}

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+numpy
+#pandas
+#plotly
+# scipy
+# tqdm
+# pillow
+# ipywidgets
+# jupyterlab
+torch

transforms.py ADDED Viewed

	@@ -0,0 +1,168 @@

+"""
+Alle transforms sind grundsätzlich auf batches bezogen!
+Vae transforms sind invertierbar
+"""
+import pickle
+from dataclasses import dataclass
+from functools import partial, reduce, wraps
+import numpy as np
+import torch
+# Allgemeine Funktionen -------------------------------------------------------------
+# Transformations in Pytorch sind am einfachsten.
+def load(p):
+    with open(p, "rb") as stream:
+        return pickle.load(stream)
+def save(obj, p):
+    with open(p, "wb") as stream:
+        pickle.dump(obj, stream)
+def sequential_function(*functions):
+    return lambda x: reduce(lambda res, func: func(res), functions, x)
+def np_sample(func):
+    rtn = sequential_function(
+        lambda x: torch.from_numpy(x).float(),
+        lambda x: torch.unsqueeze(x, 0),
+        func,
+        lambda x: x[0].numpy(),
+    )
+    return rtn
+# Inverseabvle
+class SequentialInversable(torch.nn.Sequential):
+    def __init__(self, *functions):
+        super().__init__(*functions)
+        self.inv_funcs = [f.inv for f in functions]
+        self.inv_funcs.reverse()
+    # def forward(self, x):
+    #     return sequential_function(*self.functions)(x)
+    def inv(self, x):
+        return sequential_function(*self.inv_funcs)(x)
+class LatentSelector(torch.nn.Module):
+    """Verarbeitet Tensoren und numpy arrays"""
+    def __init__(self, ldim: int, selectdim: int):
+        super().__init__()
+        self.ldim = ldim
+        self.selectdim = selectdim
+    def forward(self, x: torch.Tensor):
+        return x[:, : self.selectdim]
+    def inv(self, x: torch.Tensor):
+        rtn = torch.cat(
+            [x, torch.zeros((x.shape[0], self.ldim - x.shape[1]), device=x.device)],
+            dim=1,
+        )
+        return rtn
+class MinMaxScaler(torch.nn.Module):
+    #! Bei mehreren Signalen vorsicht mit dem Broadcasting.
+    def __init__(
+        self,
+        _min: torch.Tensor,
+        _max: torch.Tensor,
+        min_norm: float = 0.0,
+        max_norm: float = 1.0,
+    ):
+        super().__init__()
+        self._min = _min
+        self._max = _max
+        self.min_norm = min_norm
+        self.max_norm = max_norm
+    def forward(self, ts):
+        """None, no_signals"""
+        std = (ts - self._min) / (self._max - self._min)
+        rtn = std * (self.max_norm - self.min_norm) + self.min_norm
+        return rtn
+    def inv(self, ts):
+        std = (ts - self.min_norm) / (self.max_norm - self.min_norm)
+        rtn = std * (self._max - self._min) + self._min
+        return rtn
+    @classmethod
+    def from_array(cls, arr: torch.Tensor):
+        _min = torch.min(arr, axis=0).values
+        _max = torch.max(arr, axis=0).values
+        return cls(_min, _max)
+class LatentSorter(torch.nn.Module):
+    def __init__(self, kl_dict: dict):
+        super().__init__()
+        self.kl_dict = kl_dict
+    def forward(self, latent):
+        """
+        unsorted -> sorted
+        latent: (None, latent_dim)
+        """
+        return latent[:, list(self.kl_dict.keys())]
+    def inv(self, latent):
+        keys = np.array(list(self.kl_dict.keys()))
+        return latent[:, torch.from_numpy(keys.argsort())]
+    @property
+    def names(self):
+        rtn = ["{} KL{:.2f}".format(k, v) for k, v in self.kl_dict.items()]
+        return rtn
+def apply_along_axis(function, x, axis: int = 0):
+    return torch.stack([function(x_i) for x_i in torch.unbind(x, dim=axis)], dim=axis)
+# Eingangsshapes bleiben wie sie sind!
+class SumField(torch.nn.Module):
+    """
+    time series: [idx, time_step, signal]
+    image: [idx, signal, time_step, time_step]
+    """
+    def forward(self, ts: torch.Tensor):
+        """ts2img"""
+        samples = ts.shape[0]
+        time = ts.shape[1]
+        channels = ts.shape[2]
+        ts = torch.swapaxes(ts, 1, 2)  # Zeitachse ans Ende
+        ts = torch.reshape(
+            ts, (samples * channels, time)
+        )  # Zusammenfassen von Channel + idx
+        #! TODO: Schleife besser lösen
+        rtn = apply_along_axis(self._mtf_forward, ts, 0)
+        rtn = torch.reshape(rtn, (samples, channels, time, time))
+        return rtn
+    def inv(self, img: torch.Tensor):
+        """img2ts"""
+        rtn = torch.diagonal(img, dim1=2, dim2=3)
+        rtn = torch.swapaxes(rtn, 1, 2)  # Channel und Zeitachse tauschen
+        return rtn
+    @staticmethod
+    def _mtf_forward(ts):
+        """For one dimensional time series ts"""
+        return torch.add(*torch.meshgrid(ts, ts, indexing="ij")) / 2