davanstrien HF staff commited on
Commit
f487e4f
·
1 Parent(s): d599135

directly use file

Browse files
Files changed (1) hide show
  1. index.html +49 -33
index.html CHANGED
@@ -237,52 +237,68 @@
237
  async function fetchTotalAnnotations() {
238
  try {
239
  const stats = { total: 0, languages: new Set(), users: new Set() };
240
- let offset = 0;
241
- const limit = 100; // API default limit
242
- let hasMore = true;
243
 
244
  document.getElementById("total-annotations").innerHTML = `
245
  <div class="loading"></div>
246
- <p>Processing rows...</p>
247
  `;
248
 
249
- while (hasMore) {
250
- const response = await fetch(
251
- `https://datasets-server.huggingface.co/rows?dataset=data-is-better-together%2Ffineweb-c-progress&config=default&split=train&offset=${offset}`
252
- );
253
-
254
- if (!response.ok) {
255
- throw new Error(`HTTP error! status: ${response.status}`);
256
- }
257
-
258
- const data = await response.json();
259
-
260
- // Process this batch of rows
261
- data.rows.forEach((row) => {
262
- const submissions = parseInt(row.row.submitted) || 0;
263
- stats.total += submissions;
264
- if (submissions > 0) {
265
- stats.languages.add(row.row.language_dataset_name);
266
- if (row.row.username) {
267
- stats.users.add(row.row.username);
 
 
 
 
 
 
 
 
 
 
268
  }
 
 
269
  }
270
- });
271
 
272
- // Update progress
273
  document.getElementById("total-annotations").innerHTML = `
274
  <div class="loading"></div>
275
  <p>Processing... Current count: <strong>${stats.total.toLocaleString()}</strong></p>
276
- <p>Processed ${offset + data.rows.length} rows</p>
277
  `;
 
278
 
279
- // Check if we should continue
280
- if (data.rows.length < limit) {
281
- hasMore = false;
282
- } else {
283
- offset += limit;
284
- // Add a small delay to avoid overwhelming the API
285
- await new Promise((resolve) => setTimeout(resolve, 100));
 
 
 
 
 
 
 
286
  }
287
  }
288
 
 
237
  async function fetchTotalAnnotations() {
238
  try {
239
  const stats = { total: 0, languages: new Set(), users: new Set() };
 
 
 
240
 
241
  document.getElementById("total-annotations").innerHTML = `
242
  <div class="loading"></div>
243
+ <p>Loading data...</p>
244
  `;
245
 
246
+ const response = await fetch(
247
+ "https://huggingface.co/datasets/data-is-better-together/fineweb-c-progress/resolve/main/argilla_progress.ndjson"
248
+ );
249
+ if (!response.ok)
250
+ throw new Error(`HTTP error! status: ${response.status}`);
251
+
252
+ const reader = response.body.getReader();
253
+ const decoder = new TextDecoder();
254
+ let buffer = "";
255
+
256
+ while (true) {
257
+ const { value, done } = await reader.read();
258
+ if (done) break;
259
+
260
+ buffer += decoder.decode(value, { stream: true });
261
+ const lines = buffer.split("\n");
262
+ buffer = lines.pop() || ""; // Keep the last incomplete line in buffer
263
+
264
+ for (const line of lines) {
265
+ if (!line.trim()) continue;
266
+ try {
267
+ const row = JSON.parse(line);
268
+ const submissions = parseInt(row.submitted) || 0;
269
+ stats.total += submissions;
270
+ if (submissions > 0) {
271
+ stats.languages.add(row.language_dataset_name);
272
+ if (row.username) {
273
+ stats.users.add(row.username);
274
+ }
275
  }
276
+ } catch (e) {
277
+ console.error("Error parsing line:", e);
278
  }
279
+ }
280
 
281
+ // Update progress periodically
282
  document.getElementById("total-annotations").innerHTML = `
283
  <div class="loading"></div>
284
  <p>Processing... Current count: <strong>${stats.total.toLocaleString()}</strong></p>
 
285
  `;
286
+ }
287
 
288
+ // Process any remaining data in buffer
289
+ if (buffer.trim()) {
290
+ try {
291
+ const row = JSON.parse(buffer);
292
+ const submissions = parseInt(row.submitted) || 0;
293
+ stats.total += submissions;
294
+ if (submissions > 0) {
295
+ stats.languages.add(row.language_dataset_name);
296
+ if (row.username) {
297
+ stats.users.add(row.username);
298
+ }
299
+ }
300
+ } catch (e) {
301
+ console.error("Error parsing final line:", e);
302
  }
303
  }
304