migueldeguzmandev commited on
Commit
b5d67b8
·
verified ·
1 Parent(s): 4c2f5ea

Upload 11 files

Browse files
cached_lm_GPT2Tokenizer_128_petertodd.text ADDED
Binary file (260 kB). View file
 
cached_lm_GPT2Tokenizer_128_petertodd.text.lock ADDED
File without changes
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/Users/migueldeguzman/Desktop/gpt2xl_algos/RLLMv10/v7/",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1600,
16
+ "n_head": 25,
17
+ "n_inner": null,
18
+ "n_layer": 48,
19
+ "n_positions": 1024,
20
+ "output_past": true,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 1024
34
+ }
35
+ },
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.33.3",
38
+ "use_cache": true,
39
+ "vocab_size": 50257
40
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.33.3"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
petertodd.text ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0a42edec06d95c59a257c7a32f89e7376f29f5ba5fd956e385f055b9367df6c
3
+ size 6230624769
simulator-temp70_with_timestamp_100.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel
3
+ import time
4
+
5
+ class GPT2Assistant:
6
+ def __init__(self, model_dir):
7
+ self.model = GPT2LMHeadModel.from_pretrained(model_dir)
8
+ self.tokenizer = GPT2Tokenizer.from_pretrained(model_dir)
9
+
10
+ def generate_answer(self, prompt, max_length=1024):
11
+ input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
12
+ if self.tokenizer.pad_token_id is None:
13
+ self.tokenizer.pad_token = self.tokenizer.eos_token
14
+
15
+ attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
16
+ output = self.model.generate(
17
+ input_ids,
18
+ attention_mask=attention_mask,
19
+ max_length=max_length,
20
+ num_return_sequences=1,
21
+ no_repeat_ngram_size=2,
22
+ do_sample=True,
23
+ top_k=50,
24
+ top_p=0.95,
25
+ temperature=0.70
26
+ )
27
+
28
+ answer = self.tokenizer.decode(output[0], skip_special_tokens=True)
29
+ return answer[len(prompt):]
30
+
31
+ def query(self, prompt):
32
+ generated_answer = self.generate_answer(prompt)
33
+ return generated_answer
34
+
35
+ def main():
36
+ start_time = time.time()
37
+
38
+ model_output_dir = "/Users/migueldeguzman/Desktop/gpt2xl_algos/RLLMv10/v8-petertodd/"
39
+ assistant = GPT2Assistant(model_output_dir)
40
+
41
+ num_iterations = 50
42
+ prompt = input(f"Enter your question to ask the model {num_iterations} times: ")
43
+
44
+ for i in range(num_iterations):
45
+ print(f"Answering question {i + 1}/{num_iterations}...")
46
+ response = assistant.query(prompt)
47
+ print(f"Response {i + 1}: {response}\n")
48
+
49
+ end_time = time.time()
50
+ elapsed_time = (end_time - start_time) / 60 # Convert to minutes
51
+ print(f"Time-stamp: {elapsed_time:.2f} minutes")
52
+
53
+ end_time = time.time()
54
+ elapsed_time = (end_time - start_time) / 60 # Convert to minutes
55
+ print(f"Time taken to complete the task: {elapsed_time:.2f} minutes")
56
+
57
+ if __name__ == "__main__":
58
+ main()
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 1000000000000000019884624838656,
23
+ "pad_token": null,
24
+ "tokenizer_class": "GPT2Tokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<|endoftext|>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff