Daniel Fried commited on
Commit
13f28f2
·
1 Parent(s): d3c5d0d

decruft and add some comments

Browse files
Files changed (2) hide show
  1. app.py +21 -37
  2. templates/index.html +0 -633
app.py CHANGED
@@ -9,30 +9,17 @@ import pprint
9
  from huggingface_hub import Repository
10
  from text_generation import Client
11
 
12
- # from flask import Flask, request, render_template
13
- # from flask_cors import CORS
14
- # app = Flask(__name__, static_folder='static')
15
- # app.config['TEMPLATES_AUTO_RELOAD'] = Tru
16
- # CORS(app, resources= {
17
- # r"/generate": {"origins": origins},
18
- # r"/infill": {"origins": origins},
19
- # })
20
- # origins=[f"http://localhost:{PORT}", "https://huggingface.co", "https://hf.space"]
21
-
22
  PORT = 7860
23
- VERBOSE = False
24
-
25
- if os.path.exists('unlock'):
26
- MAX_LENGTH = 8192
27
- else:
28
- MAX_LENGTH = 8192
29
- TRUNCATION_MESSAGE = f'warning: This demo is limited to {MAX_LENGTH} tokens in the document for efficiency.'
30
 
31
- from fastapi import FastAPI, Request
32
- from fastapi.staticfiles import StaticFiles
33
- from fastapi.responses import FileResponse, StreamingResponse
34
- app = FastAPI(docs_url=None, redoc_url=None)
35
- app.mount("/static", StaticFiles(directory="static"), name="static")
 
 
 
 
36
 
37
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
38
  API_URL = os.environ.get("API_URL")
@@ -40,24 +27,32 @@ API_URL = os.environ.get("API_URL")
40
  with open("./HHH_prompt.txt", "r") as f:
41
  HHH_PROMPT = f.read() + "\n\n"
42
 
 
43
  FIM_PREFIX = "<fim_prefix>"
44
  FIM_MIDDLE = "<fim_middle>"
45
  FIM_SUFFIX = "<fim_suffix>"
46
-
47
  END_OF_TEXT = "<|endoftext|>"
48
 
 
49
  FIM_INDICATOR = "<infill>"
50
 
51
  client = Client(
52
  API_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"},
53
  )
54
 
 
 
 
 
 
 
55
  @app.head("/")
56
  @app.get("/")
57
  def index() -> FileResponse:
58
  return FileResponse(path="static/index.html", media_type="text/html")
59
 
60
  def generate(prefix, suffix=None, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
 
61
  temperature = float(temperature)
62
  if temperature < 1e-2:
63
  temperature = 1e-2
@@ -79,8 +74,8 @@ def generate(prefix, suffix=None, temperature=0.9, max_new_tokens=256, top_p=0.9
79
  else:
80
  prompt = prefix
81
  output = client.generate(prompt, **generate_kwargs)
82
- # TODO
83
  generated_text = output.generated_text
 
84
  truncated = False
85
  while generated_text.endswith(END_OF_TEXT):
86
  generated_text = generated_text[:-len(END_OF_TEXT)]
@@ -88,26 +83,22 @@ def generate(prefix, suffix=None, temperature=0.9, max_new_tokens=256, top_p=0.9
88
  'truncated': truncated,
89
  }
90
  if fim_mode:
 
91
  generation['text'] = prefix + generated_text + suffix
92
  generation['parts'] = [prefix, suffix]
93
  generation['infills'] = [generated_text]
94
- generation['type'] = 'infill'
95
  else:
 
96
  generation['text'] = prompt + generated_text
97
  generation['parts'] = [prompt]
98
- generation['type'] = 'generate'
99
  return generation
100
 
101
  @app.get('/generate')
102
- # async def generate_maybe(request: Request):
103
  async def generate_maybe(info: str):
104
- # form = await info.json()
105
- # form = await request.json()
106
  # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
107
  # fix padding, following https://stackoverflow.com/a/9956217/1319683
108
  info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
109
  form = json.loads(info)
110
- # print(form)
111
  prompt = form['prompt']
112
  length_limit = int(form['length'])
113
  temperature = float(form['temperature'])
@@ -123,18 +114,13 @@ async def generate_maybe(info: str):
123
  return {'result': 'error', 'type': 'generate', 'prompt': prompt, 'message': f'Error: {e}.'}
124
 
125
  @app.get('/infill')
126
- # async def infill_maybe(request: Request):
127
  async def infill_maybe(info: str):
128
- # form = await info.json()
129
- # form = await request.json()
130
  # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
131
  # fix padding, following https://stackoverflow.com/a/9956217/1319683
132
  info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
133
  form = json.loads(info)
134
  length_limit = int(form['length'])
135
  temperature = float(form['temperature'])
136
- max_retries = 1
137
- extra_sentinel = True
138
  try:
139
  if len(form['parts']) > 2:
140
  return {'result': 'error', 'text': ''.join(form['parts']), 'type': 'infill', 'message': f"error: Only a single infill is supported!"}
@@ -146,11 +132,9 @@ async def infill_maybe(info: str):
146
  else:
147
  generation['message'] = ''
148
  return generation
149
- # return {'result': 'success', 'prefix': prefix, 'suffix': suffix, 'text': generation['text']}
150
  except Exception as e:
151
  traceback.print_exception(*sys.exc_info())
152
  return {'result': 'error', 'type': 'infill', 'message': f'Error: {e}.'}
153
 
154
-
155
  if __name__ == "__main__":
156
  app.run(host='0.0.0.0', port=PORT, threaded=False)
 
9
  from huggingface_hub import Repository
10
  from text_generation import Client
11
 
 
 
 
 
 
 
 
 
 
 
12
  PORT = 7860
 
 
 
 
 
 
 
13
 
14
+ # TODO: implement maximum length (currently, each iteration is limited by the slider-specified max length, but this can be iterated, or long code entered into the editor, to get really long documents
15
+ # if os.path.exists('unlock'):
16
+ # # create an 'unlock' file (not checked into Git) locally to get full context lengths
17
+ # MAX_LENGTH = 8192
18
+ # else:
19
+ # # set to a shorter value to prevent long contexts and make the demo more efficient
20
+ # MAX_LENGTH = 1024
21
+ # TRUNCATION_MESSAGE = f'warning: This demo is limited to {MAX_LENGTH} tokens in the document for efficiency.'
22
+ TRUNCATION_MESSAGE = f'TODO'
23
 
24
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
25
  API_URL = os.environ.get("API_URL")
 
27
  with open("./HHH_prompt.txt", "r") as f:
28
  HHH_PROMPT = f.read() + "\n\n"
29
 
30
+ # used by the model
31
  FIM_PREFIX = "<fim_prefix>"
32
  FIM_MIDDLE = "<fim_middle>"
33
  FIM_SUFFIX = "<fim_suffix>"
 
34
  END_OF_TEXT = "<|endoftext|>"
35
 
36
+ # used to mark infill locations in the editor
37
  FIM_INDICATOR = "<infill>"
38
 
39
  client = Client(
40
  API_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"},
41
  )
42
 
43
+ from fastapi import FastAPI, Request
44
+ from fastapi.staticfiles import StaticFiles
45
+ from fastapi.responses import FileResponse, StreamingResponse
46
+ app = FastAPI(docs_url=None, redoc_url=None)
47
+ app.mount("/static", StaticFiles(directory="static"), name="static")
48
+
49
  @app.head("/")
50
  @app.get("/")
51
  def index() -> FileResponse:
52
  return FileResponse(path="static/index.html", media_type="text/html")
53
 
54
  def generate(prefix, suffix=None, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
55
+ # TODO: deduplicate code between this and `infill`
56
  temperature = float(temperature)
57
  if temperature < 1e-2:
58
  temperature = 1e-2
 
74
  else:
75
  prompt = prefix
76
  output = client.generate(prompt, **generate_kwargs)
 
77
  generated_text = output.generated_text
78
+ # TODO: set this based on stop reason from client.generate
79
  truncated = False
80
  while generated_text.endswith(END_OF_TEXT):
81
  generated_text = generated_text[:-len(END_OF_TEXT)]
 
83
  'truncated': truncated,
84
  }
85
  if fim_mode:
86
+ generation['type'] = 'infill'
87
  generation['text'] = prefix + generated_text + suffix
88
  generation['parts'] = [prefix, suffix]
89
  generation['infills'] = [generated_text]
 
90
  else:
91
+ generation['type'] = 'generate'
92
  generation['text'] = prompt + generated_text
93
  generation['parts'] = [prompt]
 
94
  return generation
95
 
96
  @app.get('/generate')
 
97
  async def generate_maybe(info: str):
 
 
98
  # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
99
  # fix padding, following https://stackoverflow.com/a/9956217/1319683
100
  info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
101
  form = json.loads(info)
 
102
  prompt = form['prompt']
103
  length_limit = int(form['length'])
104
  temperature = float(form['temperature'])
 
114
  return {'result': 'error', 'type': 'generate', 'prompt': prompt, 'message': f'Error: {e}.'}
115
 
116
  @app.get('/infill')
 
117
  async def infill_maybe(info: str):
 
 
118
  # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
119
  # fix padding, following https://stackoverflow.com/a/9956217/1319683
120
  info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
121
  form = json.loads(info)
122
  length_limit = int(form['length'])
123
  temperature = float(form['temperature'])
 
 
124
  try:
125
  if len(form['parts']) > 2:
126
  return {'result': 'error', 'text': ''.join(form['parts']), 'type': 'infill', 'message': f"error: Only a single infill is supported!"}
 
132
  else:
133
  generation['message'] = ''
134
  return generation
 
135
  except Exception as e:
136
  traceback.print_exception(*sys.exc_info())
137
  return {'result': 'error', 'type': 'infill', 'message': f'Error: {e}.'}
138
 
 
139
  if __name__ == "__main__":
140
  app.run(host='0.0.0.0', port=PORT, threaded=False)
templates/index.html DELETED
@@ -1,633 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8"/>
5
- <meta name="viewport" contents="width=device-width, initial-scale=1.0" />
6
- <title>InCoder</title>
7
- <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
8
- <script src="https://cdn.jsdelivr.net/npm/[email protected]/base64.min.js"></script>
9
-
10
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/ace.min.js"></script>
11
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-plain_text.min.js"></script>
12
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-c_cpp.min.js"></script>
13
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-csharp.min.js"></script>
14
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-clojure.min.js"></script>
15
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-coffee.min.js"></script>
16
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-golang.min.js"></script>
17
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-haskell.min.js"></script>
18
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-python.min.js"></script>
19
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-java.min.js"></script>
20
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-javascript.min.js"></script>
21
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-lua.min.js"></script>
22
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-objectivec.min.js"></script>
23
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-perl.min.js"></script>
24
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-php.min.js"></script>
25
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-python.min.js"></script>
26
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-ruby.min.js"></script>
27
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-rust.min.js"></script>
28
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-scala.min.js"></script>
29
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-sh.min.js"></script>
30
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-swift.min.js"></script>
31
- <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-typescript.min.js"></script>
32
- <link rel="stylesheet" href="static/style.css">
33
- </head>
34
- <style type="text/css">
35
- /* body {
36
- font-family: sans-serif;
37
- } */
38
- /* .leftside {
39
- } */
40
- main {
41
- max-width: 80rem;
42
- }
43
- .rightside {
44
- width: 30em;
45
- }
46
- .submit-holder {
47
- margin-top: 2em;
48
- }
49
- .submit input {
50
- font-size: 16pt;
51
- }
52
- .slider {
53
- width: 20em;
54
- }
55
- #faq {
56
- max-width: 60em;
57
- }
58
- #result {
59
- font-family: monospace;
60
- white-space: pre-wrap;
61
- word-wrap: break-word;
62
- font-size: 12pt;
63
- clear: both;
64
- margin-top: 1em;
65
- border: 1px solid black;
66
- padding: 1em;
67
- width: 60em;
68
- min-height: 12em;
69
- }
70
- #prompt {
71
- font-weight: bold;
72
- }
73
- .loader {
74
- border: 4px solid #f3f3f3;
75
- border-radius: 50%;
76
- border-top: 4px solid #3498db;
77
- width: 30px;
78
- height: 30px;
79
- animation: spin 2s linear infinite;
80
- margin-right: 1em;
81
- }
82
- @keyframes spin {
83
- 0% { transform: rotate(0deg); }
84
- 100% { transform: rotate(360deg); }
85
- }
86
- #loader_holder {
87
- visibility: hidden;
88
- display: flex;
89
- align-items: center;
90
- }
91
-
92
- label {
93
- margin-top: 1em;
94
- display: inline-elock;
95
- width: 10em;
96
- text-align: right;
97
- font-size: 80%;
98
- }
99
- #loader_holder_super {
100
- }
101
- #error {
102
- color: red;
103
- width: 100%;
104
- }
105
- #warning {
106
- color: darkorange;
107
- width: 100%;
108
- }
109
- #examples span {
110
- margin-right: 1em;
111
- }
112
- #editor {
113
- position: relative;
114
- width: 100%;
115
- height: 400px;
116
- }
117
- #editor-holder {
118
- position: relative;
119
- width: 100%;
120
- height: 400px;
121
- }
122
- .ace_infill {
123
- color: red;
124
- }
125
- </style>
126
- <body>
127
- <main>
128
- <div class="card" id="about">
129
- <div class="header"> <h1>InCoder: A Generative Model for Code Infilling and Synthesis</h1> </div>
130
- <p>Demo of the 6.7B parameter version of InCoder: a decoder-only Transformer model that can both extend and insert/infill code.</p>
131
- <p>Select one of the examples below, or input your own code into the editor. You can type &lt;infill&gt; to mark a location you want the model to insert code at.</p>
132
- <p>Click "Extend" to append text at the end of the editor. Click "Infill" to replace all &lt;infill&gt; masks. (Click "Add &lt;infill&gt; mask" to add a mask at the cursor or replace the current selection.) </p>
133
- </div>
134
- <div class="card" id="examples">
135
- <div id="examples-infill">
136
- <span class="softspan">Infill Examples:</span>
137
- <br>
138
- <span class="softspan"><a href='javascript:select_example("type-pred");'>Type prediction</a></span>
139
- <span class="softspan"><a href='javascript:select_example("multi-region");'>Docstring to function</a></span>
140
- <span class="softspan"><a href='javascript:select_example("docstring-2");'>Function to docstring</a></span>
141
- <!--
142
- <span class="softspan"><a href='javascript:select_example("python-infill2");'>Docstring to function</a></span>
143
- -->
144
- <span class="softspan"><a href='javascript:select_example("class");'>Class generation</a></span>
145
- </div>
146
- <div id="examples-extend">
147
- <span class="softspan">Extend Examples:</span>
148
- <br>
149
- <span class="softspan"><a href='javascript:select_example("python");'>Python</a></span>
150
- <span class="softspan"><a href='javascript:select_example("javascript");'>JavaScript</a></span>
151
- <span class="softspan"><a href='javascript:select_example("jupyter");'>Jupyter</a></span>
152
- <span class="softspan"><a href='javascript:select_example("stackoverflow");'>StackOverflow</a></span>
153
- <span class="softspan"><a href='javascript:select_example("metadata-conditioning");'>Metadata Conditioning</a></span>
154
- <span class="softspan"><a href='javascript:select_example("metadata-prediction");'>Metadata Prediction</a></span>
155
- </div>
156
- </div>
157
- <div class="card" id="controls">
158
- <div>
159
- <label>Num Tokens:</label>
160
- <input type="range" value="64" min="16" max="256" step="16" class="slider"
161
- oninput="this.nextElementSibling.value = this.value" name="length" id='length_slider'>
162
- <output class='a' id="length_slider_output">64</output>
163
- </div>
164
- <div>
165
- <label>Temperature:</label>
166
- <input type="range" value="0.6" min="0.1" max="1.0" step="0.10" class="slider"
167
- oninput="this.nextElementSibling.value = this.value" name="temp" id='temp_slider'>
168
- <output class='a' id="temp_slider_output">0.6</output>
169
- </div>
170
- <div id="buttons">
171
- <br>
172
- <input type="button" value="Extend" id="extend-form-button" />
173
- <input type="button" value="Infill" id="infill-form-button" />
174
- <br>
175
- <br>
176
- <input type="button" value="Add <infill> mask" id="insert-mask-button" title="add the infill marker at cursor or selection" />
177
- </div>
178
- </div>
179
- <div id="edit-container" class="card">
180
- <div id="syntax">
181
- <span class="softspan">Syntax:</span>
182
- <select name="mode" id="mode">
183
- <option value="text">Text</option>
184
- <option value="c_cpp">C/C++</option>
185
- <option value="csharp">C#</option>
186
- <option value="clojure">Clojure</option>
187
- <option value="coffee">CoffeeScript</option>
188
- <option value="golang">Go</option>
189
- <option value="haskell">Haskell</option>
190
- <option value="java">Java</option>
191
- <option value="javascript">JavaScript</option>
192
- <option value="lua">Lua</option>
193
- <option value="objectivec">Objective C</option>
194
- <option value="perl">Perl</option>
195
- <option value="php">PHP</option>
196
- <option value="python">Python</option>
197
- <option value="ruby">Ruby</option>
198
- <option value="rust">Rust</option>
199
- <option value="scala">Scala</option>
200
- <option value="sh">Shell</option>
201
- <option value="swift">Swift</option>
202
- <option value="typescript">Typescript</option>
203
- </select>
204
- </div>
205
- <div id="editor"></div>
206
- </div>
207
- <div id="loader_holder_super" class="card">
208
- <h1>Messages</h1>
209
- <div id="error"></div>
210
- <div id="warning"></div>
211
- <div id="loader_holder">
212
- <div class="loader"></div>
213
- <div>
214
- Generation queued, please wait...
215
- </div>
216
- </div>
217
- </div>
218
- <div id="info" class="card">
219
- <h1 id="debug-info">More Info</h3>
220
- <p>
221
- See <a href="https://sites.google.com/view/incoder-code-models" target="_blank" rel="noopener noreferrer">our project site</a> for more information on
222
- these models, including a paper and examples.
223
- </p>
224
-
225
- <p>
226
- For instructions on setting up and using the models (via HuggingFace transformers), see
227
- <a href="https://github.com/dpfried/incoder/blob/main/README.md" target="_blank" rel="noopener noreferrer">our readme</a>.
228
- </p>
229
-
230
- <h1 id="debug-info">Credits</h3>
231
- <p>This model was developed at Facebook AI Research by Daniel Fried*, Armen Aghajanyan*, Jessy Lin, Sida Wang, Eric Wallace, Freda Shi, Ruiqi Zhong,
232
- Wen-tau Yih, Luke Zettlemoyer, and Mike Lewis.</p>
233
- <p>Thanks to Naman Goyal and Stephen Roller for writing the code this demo was based on. Extensions by Daniel Fried and
234
- Sida Wang.</p>
235
- </div>
236
- </main>
237
- <script type="text/javascript">
238
- // these constants are only used for providing user expectations.
239
- var OVERHEAD = 3;
240
- var PER_TOKEN = 0.12;
241
- var SPLIT_TOKEN = "<infill>"
242
-
243
- var Range = require("ace/range").Range;
244
-
245
- // examples for the user
246
- var EXAMPLES = {
247
- "python-infill2": {
248
- "prompt":
249
- `<| file ext=.py |>
250
- from collections import Counter
251
-
252
- def <infill>
253
- """Count the number of occurrences of each word in the file."""
254
- <infill>
255
- `,
256
- "length": 64,
257
- "temperature": 0.2,
258
- "mode": "python"
259
- },
260
- "multi-region": {
261
- "prompt":
262
- `<| file ext=.py |>
263
- <infill>
264
- """ Load the given gzip jsonl file. """
265
- <infill>
266
- `,
267
- "length": 64,
268
- "temperature": 0.2,
269
- "mode": "python"
270
- },
271
- "type-pred": {
272
- "prompt":
273
- `def count_words(filename: str) -> <infill>
274
- """Count the number of occurrences of each word in the file."""
275
- with open(filename, 'r') as f:
276
- word_counts = {}
277
- for line in f:
278
- for word in line.split():
279
- if word in word_counts:
280
- word_counts[word] = 1
281
- else:
282
- word_counts[word] = 1
283
- return word_counts
284
- `,
285
- "length": 4,
286
- "temperature": 0.2,
287
- "mode": "python"
288
- },
289
- "docstring-2": {
290
- "prompt":
291
- `def _minimize_in_graph(build_loss_fn, num_steps=200, optimizer=None):
292
- """
293
- <infill>
294
- """
295
- optimizer = tf.compat.v1.train.AdamOptimizer(
296
- 0.1) if optimizer is None else optimizer
297
-
298
- def train_loop_body(step):
299
- train_op = optimizer.minimize(
300
- build_loss_fn if tf.executing_eagerly() else build_loss_fn())
301
- return tf.tuple(tensors=[tf.add(step, 1)], control_inputs=[train_op])
302
-
303
- minimize_op = tf.compat.v1.while_loop(
304
- cond=lambda step: step < num_steps,
305
- body=train_loop_body,
306
- loop_vars=[tf.constant(0)],
307
- return_same_structure=True)[0]
308
- return minimize_op`,
309
- "length": 64,
310
- "temperature": 0.3,
311
- "mode": "python",
312
- },
313
- "docstring": {
314
- "prompt":
315
- `<| file ext=.py |>
316
-
317
- def count_words(filename: str) -> Dict[str, int]:
318
- """<infill>
319
- """
320
- with open(filename, 'r') as f:
321
- word_counts = {}
322
- for line in f:
323
- for word in line.split():
324
- if word in word_counts:
325
- word_counts[word] = 1
326
- else:
327
- word_counts[word] = 1
328
- return word_counts
329
- `,
330
- "length": 32,
331
- "temperature": 0.2,
332
- "mode": "python"
333
- },
334
- "python": {
335
- "prompt":
336
- `<| file ext=.py |>
337
- def count_words(filename):
338
- """Count the number of occurrences of each word in the file"""`,
339
- "length": 64,
340
- "temperature": 0.6,
341
- "mode": "python"
342
- },
343
- "class": {
344
- "prompt": "<| file ext=.py |>\nclass Person:\n" + SPLIT_TOKEN + "\np = Person('Eren', 18, 'Male')",
345
- "length": 64,
346
- "temperature": 0.2,
347
- "mode": "python"
348
- },
349
- "javascript": {
350
- "prompt": "// fetch from the given URL and load the response contents into a new div",
351
- "length": 64,
352
- "temperature": 0.6,
353
- "mode": "javascript"
354
- },
355
- "jupyter": {
356
- "prompt": "<| file ext=.ipynb:python |>\n<text>\nThis notebook demonstrates using scikit-learn to perform PCA.\n</text>\n<cell>",
357
- "length": 64,
358
- "temperature": 0.6,
359
- "mode": "python"
360
- },
361
- "stackoverflow": {
362
- "prompt": "<| q tags=regex,html |>\nParsing HTML with regular expressions\nHow do I do this? Is it a good idea?\n<|/ q dscore=3 |>\n<| a dscore=4 |>",
363
- "length": 64,
364
- "temperature": 0.6,
365
- "mode": "text"
366
- },
367
- "metadata-conditioning": {
368
- "prompt": "<| file ext=.py filename=train_model.py source=github dstars=4 |>\n",
369
- "length": 64,
370
- "temperature": 0.6,
371
- "mode": "python"
372
- },
373
- "metadata-prediction": {
374
- "prompt": "<| file source=github ext=.py |>\nfrom setuptools import setup\nfrom setuptools_rust import Binding, RustExtension\n\nextras = {}\nextras[\"testing\"] = [\"pytest\", \"requests\", \"numpy\", \"datasets\"]\nextras[\"docs\"] = [\"sphinx\", \"sphinx_rtd_theme\", \"setuptools_rust\"]\n\nsetup(\n name=\"tokenizers\",\n version=\"0.11\",\n description=\"Fast and Customizable Tokenizers\",\n long_description=open(\"README.md\", \"r\", encoding=\"utf-8\").read(),\n)\n\n<|/ file filename=",
375
- "length": 1,
376
- "temperature": 0.2,
377
- "mode": "python"
378
- },
379
- "humaneval": {
380
- "prompt": "from typing import List, Optional\n\n\ndef longest(strings: List[str]) -> Optional[str]:\n \"\"\" Out of list of strings, return the longest one. Return the first one in case of multiple\n strings of the same length. Return None in case the input list is empty.\n >>> longest([])\n\n >>> longest(['a', 'b', 'c'])\n 'a'\n >>> longest(['a', 'bb', 'ccc'])\n 'ccc'\n \"\"\"\n",
381
- "temperature": 0.6,
382
- "length": 64,
383
- "mode": "python"
384
- },
385
- };
386
-
387
- var editor = ace.edit("editor");
388
- editor.setOption("wrap", true);
389
- //var editor = null;
390
-
391
- function set_editor_mode(mode) {
392
- session = editor.session
393
- session.setMode("ace/mode/" + mode, function() {
394
- var rules = session.$mode.$highlightRules.getRules();
395
- for (var stateName in rules) {
396
- if (Object.prototype.hasOwnProperty.call(rules, stateName)) {
397
- rules[stateName].unshift({
398
- token: 'infill',
399
- regex: SPLIT_TOKEN
400
- });
401
- }
402
- }
403
- // force recreation of tokenizer
404
- session.$mode.$tokenizer = null;
405
- session.bgTokenizer.setTokenizer(session.$mode.getTokenizer());
406
- // force re-highlight whole document
407
- session.bgTokenizer.start(0);
408
- });
409
- }
410
-
411
- /*
412
- var textarea = $('textarea[name="prompt"]').hide();
413
- var prefix_textarea = $('textarea[name="prefix"]').hide();
414
- var suffix_textarea = $('textarea[name="suffix"]').hide();
415
- editor.getSession().on('change', function () {
416
- textarea.val(editor.getSession().getValue());
417
- });
418
- */
419
-
420
- function set_text(text) {
421
- editor.getSession().setValue(text);
422
- // textarea.val(text);
423
- }
424
-
425
- function set_selection(data) {
426
- var lines = editor.getSession().doc.$lines;
427
- var lines_flat = join_lines(lines);
428
- if (data['type'] == 'generate') {
429
- doc_length = lines_flat.length;
430
- var start = convert_string_index_to_location(data['prompt'].length, lines);
431
- var end = convert_string_index_to_location(doc_length, lines);
432
- // reverse this so that we can shift select to shorten and delete extra stuff
433
- editor.selection.setRange(new Range(end.row, end.column, start.row, start.column));
434
- } else if (data['type'] == 'infill') {
435
- var length_so_far = 0;
436
- for (var i = 0; i < data['infills'].length; i++) {
437
- var prefix = data['parts'][i];
438
- var suffix = data['parts'][i+1];
439
- var infilled = data['infills'][i];
440
- var start = convert_string_index_to_location(length_so_far + prefix.length, lines);
441
- var end = convert_string_index_to_location(length_so_far + (prefix + infilled).length, lines);
442
- var range = null;
443
- if (data['infills'].length == 1) {
444
- range = new Range(end.row, end.column, start.row, start.column)
445
- } else {
446
- range = new Range(start.row, start.column, end.row, end.column)
447
- }
448
- if (i == 0) {
449
- editor.selection.setRange(range);
450
- } else {
451
- editor.selection.addRange(range);
452
- }
453
- length_so_far += (prefix + infilled).length;
454
- }
455
- }
456
- editor.focus();
457
- }
458
-
459
- function select_example(name) {
460
- $("#length_slider").val(EXAMPLES[name]["length"]);
461
- $("#length_slider_output").text(EXAMPLES[name]["length"]);
462
- $("#temp_slider").val(EXAMPLES[name]["temperature"]);
463
- $("#temp_slider_output").text(EXAMPLES[name]["temperature"]);
464
- set_text(EXAMPLES[name]["prompt"])
465
- var mode = EXAMPLES[name]["mode"];
466
-
467
- set_editor_mode(mode);
468
- $("#mode").val(mode).change();
469
- }
470
-
471
- function newline_character() {
472
- return editor.getSession().doc.getNewLineCharacter();
473
- }
474
-
475
- function join_lines(lines) {
476
- return lines.join(newline_character());
477
- }
478
-
479
- function get_prefix(location, lines) {
480
- if (!(location.hasOwnProperty('row') && location.hasOwnProperty('column'))) {
481
- console.error("invalid location " + location);
482
- }
483
- if (location.row == 0) {
484
- return lines[location.row].substring(0, location.column);
485
- } else {
486
- return join_lines(lines.slice(0, location.row)) + newline_character() + lines[location.row].substring(0, location.column);
487
- }
488
- }
489
-
490
- function convert_location_to_string_index(location, lines) {
491
- return get_prefix(location, lines).length;
492
- }
493
-
494
- function convert_string_index_to_location(string_index, lines) {
495
- var column = 0;
496
- var row = 0;
497
- var char_count = 0;
498
- var line_sep_length = editor.getSession().doc.getNewLineCharacter().length;
499
- for (var i = 0; i < lines.length; i++) {
500
- var line = lines[i];
501
- var new_char_count = char_count + line.length + line_sep_length;
502
- if (string_index < new_char_count) {
503
- return {
504
- 'row': i,
505
- 'column': string_index - char_count,
506
- }
507
- }
508
- char_count = new_char_count;
509
- }
510
- console.error("did not find index " + string_index + " in lines " + lines);
511
- return null;
512
- }
513
-
514
- function get_infill_parts(warn_on_single) {
515
- var lines = editor.getSession().doc.$lines;
516
- var lines_flat = join_lines(lines);
517
- parts = lines_flat.split(SPLIT_TOKEN)
518
- if (warn_on_single && parts.length == 1) {
519
- window.alert('There are no infill masks, add some <infill> masks before requesting an infill')
520
- }
521
- return parts
522
- }
523
-
524
- function insert_mask() {
525
- if (editor.selection.ranges.length > 1) {
526
- for (var i = 0; i < editor.selection.ranges.length; i++) {
527
- console.log('range is', editor.selection.ranges[i])
528
- editor.session.replace(editor.selection.ranges[i], SPLIT_TOKEN)
529
- }
530
- } else {
531
- editor.session.replace(editor.selection.getRange(), SPLIT_TOKEN)
532
- }
533
- }
534
-
535
-
536
- function make_generate_listener(url) {
537
- return async function(event) {
538
- var length = $("#length_slider").val();
539
- var eta = PER_TOKEN * length + OVERHEAD;
540
- // $("#eta").text(eta);
541
- // $("#infill-form-button").click(function (event) { console.log(editor.selection.getCursor()); });
542
-
543
- // get temperature and response length parameters
544
- var send_data = {
545
- length: $("#length_slider").val(),
546
- temperature: $("#temp_slider").val(),
547
- extra_sentinel: $('#extra_sentinel_checkbox').is(":checked"),
548
- max_retries: $('#max_retries_slider').val(),
549
- parts: get_infill_parts(url == "infill"),
550
- prompt: editor.getSession().getValue(),
551
- }
552
- console.log("send_data:");
553
- console.log(send_data);
554
-
555
- $("#loader_holder").css("visibility", "visible");
556
- $("#extend-form-button").prop("disabled", true);
557
- $("#infill-form-button").prop("disabled", true);
558
- $("#error").text("");
559
-
560
- function complete() {
561
- $("#loader_holder").css("visibility", "hidden");
562
- $("#extend-form-button").prop("disabled", false);
563
- $("#infill-form-button").prop("disabled", false);
564
- }
565
-
566
- function success(receive_data) {
567
- console.log("Response:");
568
- console.log(receive_data);
569
- if (receive_data["result"] == "success") {
570
- console.log("success");
571
- // $("#prompt").text(data["prompt"]);
572
- // $("#response").text(data["text"]);
573
- set_text(receive_data["text"]);
574
- set_selection(receive_data);
575
- $("#error").text("");
576
- if (receive_data["message"] != "") {
577
- $("#warning").text(receive_data["message"]);
578
- } else {
579
- $("#warning").text("");
580
- }
581
- } else {
582
- console.log("error");
583
- set_text(receive_data["text"])
584
- $("#error").text(receive_data["message"]);
585
- }
586
- }
587
-
588
- function error(err) {
589
- console.log(err);
590
- $("#error").text(err);
591
- }
592
-
593
- try {
594
- var stringified = JSON.stringify(send_data);
595
- // var encoded_data = encodeURIComponent(btoa(stringified));
596
- var encoded_data = Base64.encodeURI(stringified);
597
-
598
- const response = await fetch(`${url}?info=${encoded_data}`);
599
- // const response = await fetch(`${url}` {
600
- // method: 'GET',
601
- // body: encoded_data,
602
- // });
603
- if (response.status >= 400) {
604
- error(response.statusText);
605
- console.log("here");
606
- console.log(response.status);
607
- } else {
608
- response.json().then(success).catch(error).finally(complete);
609
- }
610
- } catch (e) {
611
- error(e);
612
- } finally {
613
- complete();
614
- }
615
- }
616
- }
617
-
618
- // actual logic
619
- $(document).ready(function() {
620
- $("#insert-mask-button").click(insert_mask);
621
- $("#extend-form-button").click(make_generate_listener("generate"));
622
- $("#infill-form-button").click(make_generate_listener("infill"));
623
- $("#mode").change(function (e) {
624
- var mode = $("#mode").val();
625
- set_editor_mode(mode);
626
- });
627
- select_example("python")
628
- // set_editor_mode("python");
629
- });
630
- </script>
631
- <script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.2/iframeResizer.contentWindow.min.js"></script>
632
- </body>
633
- </html>