pandora-s commited on
Commit
52c2787
·
verified ·
1 Parent(s): ee3ecd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -67,13 +67,13 @@ def run_inference(message, history, model_picked, temperature, context_size, max
67
 
68
  # Loading only once GPU available
69
  config = ExLlamaV2Config(local_dir)
70
- config.max_seq_len = 16384
71
 
72
  vision_model = ExLlamaV2VisionTower(config)
73
  vision_model.load(progress = True)
74
 
75
  model = ExLlamaV2(config)
76
- cache = ExLlamaV2Cache(model, lazy = True, max_seq_len = 16384)
77
  model.load_autosplit(cache, progress = True)
78
  tokenizer = ExLlamaV2Tokenizer(config)
79
 
@@ -127,8 +127,8 @@ def run_inference(message, history, model_picked, temperature, context_size, max
127
  # Gnerating Response
128
  output = generator.generate(
129
  prompt = prompt,
130
- max_new_tokens = 1024,
131
- temperature = 0.15,
132
  add_bos = True,
133
  encode_special_tokens = True,
134
  decode_special_tokens = True,
@@ -136,7 +136,7 @@ def run_inference(message, history, model_picked, temperature, context_size, max
136
  gen_settings = ExLlamaV2Sampler.Settings.greedy(),
137
  embeddings = images_embeddings
138
  )
139
- result = out.split("[/INST]")[-1]
140
  print(result)
141
  return result
142
 
 
67
 
68
  # Loading only once GPU available
69
  config = ExLlamaV2Config(local_dir)
70
+ config.max_seq_len = context_size
71
 
72
  vision_model = ExLlamaV2VisionTower(config)
73
  vision_model.load(progress = True)
74
 
75
  model = ExLlamaV2(config)
76
+ cache = ExLlamaV2Cache(model, lazy = True, max_seq_len = context_size)
77
  model.load_autosplit(cache, progress = True)
78
  tokenizer = ExLlamaV2Tokenizer(config)
79
 
 
127
  # Gnerating Response
128
  output = generator.generate(
129
  prompt = prompt,
130
+ max_new_tokens = max_output,
131
+ temperature = temperature,
132
  add_bos = True,
133
  encode_special_tokens = True,
134
  decode_special_tokens = True,
 
136
  gen_settings = ExLlamaV2Sampler.Settings.greedy(),
137
  embeddings = images_embeddings
138
  )
139
+ result = output.split("[/INST]")[-1]
140
  print(result)
141
  return result
142