Samuel Mueller
working locally
f50f696
raw
history blame
3.65 kB
import random
import torch
from utils import set_locals_in_self
from itertools import repeat
from .prior import PriorDataLoader
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import scipy.stats as stats
def get_batch_to_dataloader(get_batch_method_):
class DL(PriorDataLoader):
get_batch_method = get_batch_method_
# Caution, you might need to set self.num_features manually if it is not part of the args.
def __init__(self, num_steps, fuse_x_y=False, **get_batch_kwargs):
set_locals_in_self(locals())
# The stuff outside the or is set as class attribute before instantiation.
self.num_features = get_batch_kwargs.get('num_features') or self.num_features
self.num_outputs = get_batch_kwargs.get('num_outputs') or self.num_outputs
print('DataLoader.__dict__', self.__dict__)
@staticmethod
def gbm(*args, fuse_x_y=True, **kwargs):
x, y, target_y = get_batch_method_(*args, **kwargs)
if fuse_x_y:
return torch.cat([x, torch.cat([torch.zeros_like(y[:1]), y[:-1]], 0).unsqueeze(-1).float()],
-1), target_y
else:
return (x, y), target_y
def __len__(self):
return self.num_steps
def __iter__(self):
return iter(self.gbm(**self.get_batch_kwargs, fuse_x_y=self.fuse_x_y) for _ in range(self.num_steps))
return DL
def plot_features(data, targets):
if torch.is_tensor(data):
data = data.detach().cpu().numpy()
targets = targets.detach().cpu().numpy()
fig2 = plt.figure(constrained_layout=True, figsize=(12, 12))
spec2 = gridspec.GridSpec(ncols=data.shape[1], nrows=data.shape[1], figure=fig2)
for d in range(0, data.shape[1]):
for d2 in range(0, data.shape[1]):
sub_ax = fig2.add_subplot(spec2[d, d2])
sub_ax.scatter(data[:, d], data[:, d2],
c=targets[:])
def plot_prior(prior):
s = np.array([prior() for _ in range(0, 10000)])
count, bins, ignored = plt.hist(s, 50, density=True)
print(s.min())
plt.show()
trunc_norm_sampler_f = lambda mu, sigma : lambda: stats.truncnorm((0 - mu) / sigma, (1 - mu) / sigma, loc=mu, scale=sigma).rvs(1)[0]
beta_sampler_f = lambda a, b : lambda : np.random.beta(a, b)
gamma_sampler_f = lambda a, b : lambda : np.random.gamma(a, b)
uniform_sampler_f = lambda a, b : lambda : np.random.uniform(a, b)
uniform_int_sampler_f = lambda a, b : lambda : np.random.randint(a, b)
zipf_sampler_f = lambda a, b, c : lambda : min(b + np.random.zipf(a), c)
scaled_beta_sampler_f = lambda a, b, scale, minimum : lambda : minimum + round(beta_sampler_f(a, b)() * (scale - minimum + 1) - 0.5)
def normalize_data(data):
mean = data.mean(0)
std = data.std(0) + .000001
data = (data - mean) / std
return data
def normalize_by_used_features_f(x, num_features_used, num_features):
return x / (num_features_used / num_features)
class Binarize(nn.Module):
def __init__(self, p=0.5):
super().__init__()
self.p = p
def forward(self, x):
return (x > torch.median(x)).float()
def order_by_y(x, y):
order = torch.argsort(y if random.randint(0, 1) else -y, dim=0)[:, 0, 0]
order = order.reshape(2, -1).transpose(0, 1).reshape(-1)#.reshape(seq_len)
x = x[order] # .reshape(2, -1).transpose(0, 1).reshape(-1).flip([0]).reshape(seq_len, 1, -1)
y = y[order] # .reshape(2, -1).transpose(0, 1).reshape(-1).reshape(seq_len, 1, -1)
return x, y