JMalott commited on
Commit
c8a4377
·
1 Parent(s): 55c50e3

Update min_dalle/min_dalle.py

Browse files
Files changed (1) hide show
  1. min_dalle/min_dalle.py +22 -24
min_dalle/min_dalle.py CHANGED
@@ -39,10 +39,9 @@ class MinDalle:
39
  self.dtype = dtype
40
  self.is_verbose = is_verbose
41
  self.text_token_count = 64
42
- #Changed
43
- self.layer_count = 24 if is_mega else 6
44
- self.attention_head_count = 32 if is_mega else 8
45
- self.embed_count = 2048 if is_mega else 512
46
  self.glu_embed_count = 4096 if is_mega else 2730
47
  self.text_vocab_count = 50272 if is_mega else 50264
48
  self.image_vocab_count = 16415 if is_mega else 16384
@@ -238,29 +237,27 @@ class MinDalle:
238
  for i in range(IMAGE_TOKEN_COUNT):
239
  if(st.session_state.page != 0):
240
  break
241
-
242
  st.session_state.bar.progress(i/IMAGE_TOKEN_COUNT)
243
-
244
- #torch.cuda.empty_cache()
245
- #torch.cpu.empty_cache()
246
- #with torch.cuda.amp.autocast(dtype=self.dtype):
247
- image_tokens[i + 1], attention_state = self.decoder.forward(
248
- settings=settings,
249
- attention_mask=attention_mask,
250
- encoder_state=encoder_state,
251
- attention_state=attention_state,
252
- prev_tokens=image_tokens[i],
253
- token_index=token_indices[[i]]
254
- )
255
 
256
- # with torch.cuda.amp.autocast(dtype=torch.float32):
257
- if ((i + 1) % 32 == 0 and progressive_outputs) or i + 1 == 256:
258
- yield self.image_grid_from_tokens(
259
- image_tokens=image_tokens[1:].T,
260
- is_seamless=is_seamless,
261
- is_verbose=is_verbose
 
 
 
 
262
  )
263
-
 
 
 
 
 
 
 
264
 
265
  def generate_image_stream(self, *args, **kwargs) -> Iterator[Image.Image]:
266
  image_stream = self.generate_raw_image_stream(*args, **kwargs)
@@ -278,6 +275,7 @@ class MinDalle:
278
  image = image.reshape([grid_size ** 2, 2 ** 8, 2 ** 8, 3])
279
  yield image
280
 
 
281
  def generate_image(self, *args, **kwargs) -> Image.Image:
282
  image_stream = self.generate_image_stream(
283
  *args, **kwargs,
 
39
  self.dtype = dtype
40
  self.is_verbose = is_verbose
41
  self.text_token_count = 64
42
+ self.layer_count = 24 if is_mega else 12
43
+ self.attention_head_count = 32 if is_mega else 16
44
+ self.embed_count = 2048 if is_mega else 1024
 
45
  self.glu_embed_count = 4096 if is_mega else 2730
46
  self.text_vocab_count = 50272 if is_mega else 50264
47
  self.image_vocab_count = 16415 if is_mega else 16384
 
237
  for i in range(IMAGE_TOKEN_COUNT):
238
  if(st.session_state.page != 0):
239
  break
 
240
  st.session_state.bar.progress(i/IMAGE_TOKEN_COUNT)
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
+ torch.cuda.empty_cache()
243
+ #torch.cpu.empty_cache()
244
+ with torch.cuda.amp.autocast(dtype=self.dtype):
245
+ image_tokens[i + 1], attention_state = self.decoder.forward(
246
+ settings=settings,
247
+ attention_mask=attention_mask,
248
+ encoder_state=encoder_state,
249
+ attention_state=attention_state,
250
+ prev_tokens=image_tokens[i],
251
+ token_index=token_indices[[i]]
252
  )
253
+
254
+ with torch.cuda.amp.autocast(dtype=torch.float32):
255
+ if ((i + 1) % 32 == 0 and progressive_outputs) or i + 1 == 256:
256
+ yield self.image_grid_from_tokens(
257
+ image_tokens=image_tokens[1:].T,
258
+ is_seamless=is_seamless,
259
+ is_verbose=is_verbose
260
+ )
261
 
262
  def generate_image_stream(self, *args, **kwargs) -> Iterator[Image.Image]:
263
  image_stream = self.generate_raw_image_stream(*args, **kwargs)
 
275
  image = image.reshape([grid_size ** 2, 2 ** 8, 2 ** 8, 3])
276
  yield image
277
 
278
+
279
  def generate_image(self, *args, **kwargs) -> Image.Image:
280
  image_stream = self.generate_image_stream(
281
  *args, **kwargs,