MaxBlumenfeld commited on
Commit
cd33601
·
1 Parent(s): e6e2d3b

switched to model uploadd in better format

Browse files
Files changed (1) hide show
  1. app.py +3 -16
app.py CHANGED
@@ -2,30 +2,17 @@ import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaConfig
3
  import gradio as gr
4
 
5
- # base_model_id = "HuggingFaceTB/SmolLM2-135M"
6
- # instruct_model_id = "MaxBlumenfeld/smollm2-135m-bootleg-instruct"
7
-
8
-
9
- # base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
10
-
11
- # # Load models with explicit configs
12
- # base_config = LlamaConfig.from_pretrained(base_model_id)
13
- # instruct_config = LlamaConfig.from_pretrained(base_model_id) # Using base model config for both since it's the same architecture
14
-
15
- # base_model = AutoModelForCausalLM.from_pretrained(base_model_id, config=base_config)
16
- # instruct_model = AutoModelForCausalLM.from_pretrained(instruct_model_id, from_tf=True) # Added from_tf=True
17
-
18
 
19
  # Model IDs from Hugging Face Hub
20
  base_model_id = "HuggingFaceTB/SmolLM2-135M"
21
- instruct_model_id = "MaxBlumenfeld/smollm2-135m-bootleg-instruct"
22
 
23
  # Load tokenizer
24
- tokenizer = AutoTokenizer.from_pretrained(base_model_id)
25
 
26
  # Load models with explicit LLaMA architecture
27
  base_model = LlamaForCausalLM.from_pretrained(base_model_id)
28
- instruct_model = LlamaForCausalLM.from_pretrained(instruct_model_id, from_tf = True)
29
 
30
  def generate_response(model, tokenizer, message, temperature=0.5, max_length=200, system_prompt="", is_instruct=False):
31
  # Prepare input based on model type
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaConfig
3
  import gradio as gr
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # Model IDs from Hugging Face Hub
7
  base_model_id = "HuggingFaceTB/SmolLM2-135M"
8
+ instruct_model_id = "MaxBlumenfeld/bootleg_instruct_01"
9
 
10
  # Load tokenizer
11
+ base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
12
 
13
  # Load models with explicit LLaMA architecture
14
  base_model = LlamaForCausalLM.from_pretrained(base_model_id)
15
+ instruct_model = LlamaForCausalLM.from_pretrained(instruct_model_id)
16
 
17
  def generate_response(model, tokenizer, message, temperature=0.5, max_length=200, system_prompt="", is_instruct=False):
18
  # Prepare input based on model type