Jacobellis Dan (dgj335) commited on
Commit
1a0f97c
·
1 Parent(s): bfc89d2
README.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
README.md CHANGED
@@ -2,15 +2,27 @@
2
  datasets:
3
  - danjacobellis/LSDIR_540
4
  ---
 
5
 
6
- ```python
7
- !pip install walloc PyWavelets pytorch-wavelets
8
- ```
9
 
 
10
 
11
- ```python
12
- !wget "https://r0k.us/graphics/kodak/kodak/kodim05.png"
13
- ```
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
  ```python
@@ -25,10 +37,14 @@ from walloc.walloc import Walloc
25
  class Args: pass
26
  ```
27
 
 
 
 
 
28
 
29
  ```python
30
  device = "cpu"
31
- checkpoint = torch.load("v0.6.1.pth",map_location="cpu")
32
  args = checkpoint['args']
33
  codec = Walloc(
34
  channels = args.channels,
@@ -41,6 +57,10 @@ codec.load_state_dict(checkpoint['model_state_dict'])
41
  codec = codec.to(device)
42
  ```
43
 
 
 
 
 
44
 
45
  ```python
46
  img = Image.open("kodim05.png")
@@ -51,11 +71,17 @@ img
51
 
52
 
53
 
54
- ![png](README_files/README_5_0.png)
55
 
56
 
57
 
58
 
 
 
 
 
 
 
59
 
60
  ```python
61
  with torch.no_grad():
@@ -70,11 +96,13 @@ ToPILImage()(x_hat[0]+0.5)
70
 
71
 
72
 
73
- ![png](README_files/README_6_0.png)
74
 
75
 
76
 
77
 
 
 
78
 
79
  ```python
80
  with torch.no_grad():
@@ -84,10 +112,10 @@ with torch.no_grad():
84
  X_hat = codec.decoder(Y)
85
  x_hat = codec.wavelet_synthesis(X_hat,J=codec.J)
86
 
87
- print(f"dimensionality reduction: {x.numel()/Y.numel()}x")
88
  ```
89
 
90
- dimensionality reduction: 12.0x
91
 
92
 
93
 
@@ -99,53 +127,121 @@ Y.unique()
99
 
100
 
101
  tensor([-15., -14., -13., -12., -11., -10., -9., -8., -7., -6., -5., -4.,
102
- -3., -2., -1., 0., 1., 2., 3., 4., 5., 6., 7., 8.,
103
  9., 10., 11., 12., 13., 14., 15.])
104
 
105
 
106
 
107
 
108
  ```python
109
- plt.hist(Y.flatten().numpy(),range=(-17.5,17.5),bins=35);
 
 
 
 
 
 
 
 
110
  ```
111
 
112
 
113
 
114
- ![png](README_files/README_9_0.png)
115
 
116
 
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  ```python
120
- grid_size = 4
121
- n_channels, H, W = Y[0].shape
122
- combined_image = Image.new('L', (W * grid_size, H * grid_size))
123
- size_bytes = 0
124
- for i, channel in enumerate(Y[0]):
125
- channel = (channel+16).to(torch.uint8)
126
- row = i // grid_size
127
- col = i % grid_size
128
- channel = ToPILImage()(channel)
129
- combined_image.paste(channel, (col * W, row * H))
130
- combined_image
131
  ```
132
 
133
 
134
 
135
 
136
 
137
- ![png](README_files/README_10_0.png)
138
 
139
 
140
 
141
 
142
 
143
  ```python
144
- combined_image.save('tmp.png')
145
- print("compression_ratio: ", x.numel()/os.path.getsize("tmp.png"))
146
  ```
147
 
148
- compression_ratio: 20.792244646161983
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
 
151
 
@@ -155,5 +251,5 @@ print("compression_ratio: ", x.numel()/os.path.getsize("tmp.png"))
155
 
156
  [NbConvertApp] Converting notebook README.ipynb to markdown
157
  [NbConvertApp] Support files will be in README_files/
158
- [NbConvertApp] Writing 2620 bytes to README.md
159
 
 
2
  datasets:
3
  - danjacobellis/LSDIR_540
4
  ---
5
+ # Wavelet Learned Lossy Compression (WaLLoC)
6
 
7
+ WaLLoC sandwiches a convolutional autoencoder between time-frequency analysis and synthesis transforms using
8
+ CDF 9/7 wavelet filters. The time-frequency transform increases the number of signal channels, but reduces the temporal or spatial resolution, resulting in lower GPU memory consumption and higher throughput. WaLLoC's training procedure is highly simplified compared to other $\beta$-VAEs, VQ-VAEs, and neural codecs, but still offers significant dimensionality reduction and compression. This makes it suitable for dataset storage and compressed-domain learning. It currently supports 2D signals (e.g. grayscale, RGB, or hyperspectral images). Support for 1D and 3D signals is in progress.
 
9
 
10
+ ## Installation
11
 
12
+ 1. Follow the installation instructions for [torch](https://pytorch.org/get-started/locally/)
13
+ 2. Install WaLLoC and other dependencies via pip
14
+
15
+ ```pip install walloc PyWavelets pytorch-wavelets```
16
+
17
+ ## Pre-trained checkpoints
18
+
19
+ Pre-trained checkpoints are available on [Hugging Face](https://huggingface.co/danjacobellis/walloc).
20
+
21
+ ## Training
22
+
23
+ Access to training code is provided by request via [email.](mailto:[email protected])
24
+
25
+ ## Usage example
26
 
27
 
28
  ```python
 
37
  class Args: pass
38
  ```
39
 
40
+ ### Load the model from a pre-trained checkpoint
41
+
42
+ ```wget https://hf.co/danjacobellis/walloc/resolve/main/v0.6.3_ext.pth```
43
+
44
 
45
  ```python
46
  device = "cpu"
47
+ checkpoint = torch.load("v0.6.3_ext.pth",map_location="cpu")
48
  args = checkpoint['args']
49
  codec = Walloc(
50
  channels = args.channels,
 
57
  codec = codec.to(device)
58
  ```
59
 
60
+ ### Load an example image
61
+
62
+ ```wget "https://r0k.us/graphics/kodak/kodak/kodim05.png"```
63
+
64
 
65
  ```python
66
  img = Image.open("kodim05.png")
 
71
 
72
 
73
 
74
+ ![png](README_files/README_6_0.png)
75
 
76
 
77
 
78
 
79
+ ### Full encoding and decoding pipeline with .forward()
80
+
81
+ * If `codec.eval()` is called, the latent is rounded to nearest integer.
82
+
83
+ * If `codec.train()` is called, uniform noise is added instead of rounding.
84
+
85
 
86
  ```python
87
  with torch.no_grad():
 
96
 
97
 
98
 
99
+ ![png](README_files/README_8_0.png)
100
 
101
 
102
 
103
 
104
+ ### Accessing latents
105
+
106
 
107
  ```python
108
  with torch.no_grad():
 
112
  X_hat = codec.decoder(Y)
113
  x_hat = codec.wavelet_synthesis(X_hat,J=codec.J)
114
 
115
+ print(f"dimensionality reduction: {x.numel()/Y.numel()}×")
116
  ```
117
 
118
+ dimensionality reduction: 12.
119
 
120
 
121
 
 
127
 
128
 
129
  tensor([-15., -14., -13., -12., -11., -10., -9., -8., -7., -6., -5., -4.,
130
+ -3., -2., -1., -0., 1., 2., 3., 4., 5., 6., 7., 8.,
131
  9., 10., 11., 12., 13., 14., 15.])
132
 
133
 
134
 
135
 
136
  ```python
137
+ plt.figure(figsize=(5,3),dpi=150)
138
+ plt.hist(
139
+ Y.flatten().numpy(),
140
+ range=(-17.5,17.5),
141
+ bins=35,
142
+ density=True,
143
+ width=0.8);
144
+ plt.title("Histogram of latents")
145
+ plt.xticks(range(-15,16,5));
146
  ```
147
 
148
 
149
 
150
+ ![png](README_files/README_12_0.png)
151
 
152
 
153
 
154
+ # Lossless compression of latents using PNG
155
+
156
+
157
+ ```python
158
+ def concatenate_channels(x):
159
+ batch_size, N, h, w = x.shape
160
+ n = int(N**0.5)
161
+ if n*n != N:
162
+ raise ValueError("Number of channels must be a perfect square.")
163
+
164
+ x = x.view(batch_size, n, n, h, w)
165
+ x = x.permute(0, 1, 3, 2, 4).contiguous()
166
+ x = x.view(batch_size, 1, n*h, n*w)
167
+ return x
168
+
169
+ def split_channels(x, N):
170
+ batch_size, _, H, W = x.shape
171
+ n = int(N**0.5)
172
+ h = H // n
173
+ w = W // n
174
+
175
+ x = x.view(batch_size, n, h, n, w)
176
+ x = x.permute(0, 1, 3, 2, 4).contiguous()
177
+ x = x.view(batch_size, N, h, w)
178
+ return x
179
+
180
+ def to_bytes(x, n_bits):
181
+ max_value = 2**(n_bits - 1) - 1
182
+ min_value = -max_value - 1
183
+ if x.min() < min_value or x.max() > max_value:
184
+ raise ValueError(f"Tensor values should be in the range [{min_value}, {max_value}].")
185
+ return (x + (max_value + 1)).to(torch.uint8)
186
+
187
+ def from_bytes(x, n_bits):
188
+ max_value = 2**(n_bits - 1) - 1
189
+ return (x.to(torch.float32) - (max_value + 1))
190
+
191
+ def latent_to_pil(latent, n_bits):
192
+ latent_bytes = to_bytes(latent, n_bits)
193
+ concatenated_latent = concatenate_channels(latent_bytes)
194
+
195
+ pil_images = []
196
+ for i in range(concatenated_latent.shape[0]):
197
+ pil_image = Image.fromarray(concatenated_latent[i][0].numpy(), mode='L')
198
+ pil_images.append(pil_image)
199
+
200
+ return pil_images
201
+
202
+ def pil_to_latent(pil_images, N, n_bits):
203
+ tensor_images = [PILToTensor()(img).unsqueeze(0) for img in pil_images]
204
+ tensor_images = torch.cat(tensor_images, dim=0)
205
+ split_latent = split_channels(tensor_images, N)
206
+ latent = from_bytes(split_latent, n_bits)
207
+ return latent
208
+ ```
209
+
210
 
211
  ```python
212
+ Y_pil = latent_to_pil(Y,5)
213
+ Y_pil[0]
 
 
 
 
 
 
 
 
 
214
  ```
215
 
216
 
217
 
218
 
219
 
220
+ ![png](README_files/README_15_0.png)
221
 
222
 
223
 
224
 
225
 
226
  ```python
227
+ Y_pil[0].save('latent.png')
228
+ print("compression_ratio: ", x.numel()/os.path.getsize("latent.png"))
229
  ```
230
 
231
+ compression_ratio: 20.307596963280485
232
+
233
+
234
+
235
+ ```python
236
+ Y2 = pil_to_latent(Y_pil, 16, 5)
237
+ (Y == Y2).sum()/Y.numel()
238
+ ```
239
+
240
+
241
+
242
+
243
+ tensor(1.)
244
+
245
 
246
 
247
 
 
251
 
252
  [NbConvertApp] Converting notebook README.ipynb to markdown
253
  [NbConvertApp] Support files will be in README_files/
254
+ [NbConvertApp] Writing 5751 bytes to README.md
255
 
README_files/README_12_0.png ADDED
README_files/README_15_0.jpg ADDED
README_files/README_15_0.png ADDED
README_files/README_6_0.jpg CHANGED
README_files/README_6_0.png CHANGED
README_files/README_8_0.jpg ADDED
README_files/README_8_0.png CHANGED

Git LFS Details

  • SHA256: cb722ebdab9bf4ec5af4dc20a273ca50efcca97a5981e92f58196279277a7396
  • Pointer size: 130 Bytes
  • Size of remote file: 11.3 kB

Git LFS Details

  • SHA256: 136ab61fa2ec384a44024134ec2b36ef39c42451dffec4f912b9965e46028812
  • Pointer size: 131 Bytes
  • Size of remote file: 782 kB