Spaces:

pandora-s
/

Pixtral-12B-EXL2

Sleeping

pandora-s commited on Nov 11, 2024

Commit

52c2787

verified ·

1 Parent(s): ee3ecd6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -67,13 +67,13 @@ def run_inference(message, history, model_picked, temperature, context_size, max
     # Loading only once GPU available
     config = ExLlamaV2Config(local_dir)
-    config.max_seq_len = 16384
     vision_model = ExLlamaV2VisionTower(config)
     vision_model.load(progress = True)
     model = ExLlamaV2(config)
-    cache = ExLlamaV2Cache(model, lazy = True, max_seq_len = 16384)
     model.load_autosplit(cache, progress = True)
     tokenizer = ExLlamaV2Tokenizer(config)
@@ -127,8 +127,8 @@ def run_inference(message, history, model_picked, temperature, context_size, max
     # Gnerating Response
     output = generator.generate(
         prompt = prompt,
-        max_new_tokens = 1024,
-        temperature = 0.15,
         add_bos = True,
         encode_special_tokens = True,
         decode_special_tokens = True,
@@ -136,7 +136,7 @@ def run_inference(message, history, model_picked, temperature, context_size, max
         gen_settings = ExLlamaV2Sampler.Settings.greedy(),
         embeddings = images_embeddings
     )
-    result = out.split("[/INST]")[-1]
     print(result)
     return result

     # Loading only once GPU available
     config = ExLlamaV2Config(local_dir)
+    config.max_seq_len = context_size
     vision_model = ExLlamaV2VisionTower(config)
     vision_model.load(progress = True)
     model = ExLlamaV2(config)
+    cache = ExLlamaV2Cache(model, lazy = True, max_seq_len = context_size)
     model.load_autosplit(cache, progress = True)
     tokenizer = ExLlamaV2Tokenizer(config)
     # Gnerating Response
     output = generator.generate(
         prompt = prompt,
+        max_new_tokens = max_output,
+        temperature = temperature,
         add_bos = True,
         encode_special_tokens = True,
         decode_special_tokens = True,
         gen_settings = ExLlamaV2Sampler.Settings.greedy(),
         embeddings = images_embeddings
     )
+    result = output.split("[/INST]")[-1]
     print(result)
     return result