praty7717 commited on
Commit
b5ad935
·
verified ·
1 Parent(s): 40b1107

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +21 -26
  2. gpt_dev.py +3 -24
app.py CHANGED
@@ -1,34 +1,29 @@
1
- import os
2
  import torch
3
- import gradio as gr
4
- from gpt_dev import GPTLanguageModel, encode, decode, stoi, itos
5
 
6
- # Load the model
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
- model = GPTLanguageModel().to(device)
9
 
10
- # Load the trained model state
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if os.path.exists("gpt_language_model.pth"):
12
  checkpoint = torch.load("gpt_language_model.pth", map_location=device)
13
  model.load_state_dict(checkpoint)
14
 
15
- model.eval() # Set model to evaluation mode
16
-
17
- def get_response(prompt):
18
- # Encode the prompt
19
- encoded_prompt = encode(prompt)
20
- input_tensor = torch.tensor(encoded_prompt).unsqueeze(0).to(device) # Shape (1, seq_length)
21
-
22
- # Generate response
23
- with torch.no_grad():
24
- output = model(input_tensor) # Generate the output
25
- output_text = decode(output[0].tolist()) # Decode the output to text
26
-
27
- return output_text
28
-
29
- def main():
30
- iface = gr.Interface(fn=get_response, inputs="text", outputs="text", title="GPT Language Model")
31
- iface.launch()
32
-
33
- if __name__ == "__main__":
34
- main()
 
 
1
  import torch
2
+ import os
3
+ from gpt_dev import GPTLanguageModel, encode, decode, generate_text # Import necessary parts from gpt_dev.py
4
 
5
+ # Set up device (GPU or CPU)
6
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
7
 
8
+ # Load your saved model (pre-trained model's state dict)
9
+ block_size= 256
10
+ model_type = "gpt"
11
+ n_embd = 384
12
+ n_head = 6
13
+ n_layer = 6
14
+ transformers_version = 4.44.2
15
+ vocab_size = 95
16
+
17
+ # Instantiate the model
18
+ model = GPTLanguageModel(vocab_size, embedding_size, num_heads, num_layers)
19
+ model.to(device)
20
+
21
+ # Load the pre-trained weights
22
  if os.path.exists("gpt_language_model.pth"):
23
  checkpoint = torch.load("gpt_language_model.pth", map_location=device)
24
  model.load_state_dict(checkpoint)
25
 
26
+ # Generate text based on a prompt
27
+ start_prompt = "Once upon a time"
28
+ generated_text = generate_text(model, start_prompt, max_length=100)
29
+ print(generated_text) # Display the generated text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gpt_dev.py CHANGED
@@ -426,40 +426,19 @@ b = torch.randint(0, 10, (3, 2)).float()
426
  c = a @ b
427
 
428
  def generate_text(model, start_prompt, max_length=100, temperature=1.0):
429
- # Encode the input prompt into tensor format
430
  input_ids = torch.tensor(encode(start_prompt), dtype=torch.long).unsqueeze(0).to(device)
431
-
432
- # Set model to evaluation mode
433
  model.eval()
434
-
435
- # Generated tokens list (starting with the prompt)
436
  generated_ids = input_ids.tolist()[0]
437
-
438
- # Sampling loop
439
  with torch.no_grad():
440
  for _ in range(max_length):
441
- # Get model predictions for the current input
442
  logits, _ = model(input_ids)
443
-
444
- # Optionally, divide logits by temperature to control randomness
445
  logits = logits[:, -1, :] / temperature
446
-
447
- # Apply softmax to get probabilities and sample the next token
448
  probs = torch.nn.functional.softmax(logits, dim=-1)
449
  next_token = torch.multinomial(probs, num_samples=1)
450
-
451
- # Append the generated token to the list
452
  generated_ids.append(next_token.item())
453
-
454
- # Update the input with the new token for the next iteration
455
  input_ids = torch.cat((input_ids, next_token), dim=1)
456
-
457
- # Decode the generated tokens back into text
458
- generated_text = decode(generated_ids)
459
- return generated_text
460
-
461
- # Example usage
462
- start_prompt = "Once upon a time"
463
- generated_text = generate_text(m, start_prompt, max_length=100)
464
 
465
 
 
 
 
426
  c = a @ b
427
 
428
  def generate_text(model, start_prompt, max_length=100, temperature=1.0):
 
429
  input_ids = torch.tensor(encode(start_prompt), dtype=torch.long).unsqueeze(0).to(device)
 
 
430
  model.eval()
 
 
431
  generated_ids = input_ids.tolist()[0]
 
 
432
  with torch.no_grad():
433
  for _ in range(max_length):
 
434
  logits, _ = model(input_ids)
 
 
435
  logits = logits[:, -1, :] / temperature
 
 
436
  probs = torch.nn.functional.softmax(logits, dim=-1)
437
  next_token = torch.multinomial(probs, num_samples=1)
 
 
438
  generated_ids.append(next_token.item())
 
 
439
  input_ids = torch.cat((input_ids, next_token), dim=1)
440
+ return decode(generated_ids)
 
 
 
 
 
 
 
441
 
442
 
443
+ if __name__ == "__main__":
444
+ train_model()