Commit
·
f487e4f
1
Parent(s):
d599135
directly use file
Browse files- index.html +49 -33
index.html
CHANGED
@@ -237,52 +237,68 @@
|
|
237 |
async function fetchTotalAnnotations() {
|
238 |
try {
|
239 |
const stats = { total: 0, languages: new Set(), users: new Set() };
|
240 |
-
let offset = 0;
|
241 |
-
const limit = 100; // API default limit
|
242 |
-
let hasMore = true;
|
243 |
|
244 |
document.getElementById("total-annotations").innerHTML = `
|
245 |
<div class="loading"></div>
|
246 |
-
<p>
|
247 |
`;
|
248 |
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
}
|
|
|
|
|
269 |
}
|
270 |
-
}
|
271 |
|
272 |
-
// Update progress
|
273 |
document.getElementById("total-annotations").innerHTML = `
|
274 |
<div class="loading"></div>
|
275 |
<p>Processing... Current count: <strong>${stats.total.toLocaleString()}</strong></p>
|
276 |
-
<p>Processed ${offset + data.rows.length} rows</p>
|
277 |
`;
|
|
|
278 |
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
}
|
287 |
}
|
288 |
|
|
|
237 |
async function fetchTotalAnnotations() {
|
238 |
try {
|
239 |
const stats = { total: 0, languages: new Set(), users: new Set() };
|
|
|
|
|
|
|
240 |
|
241 |
document.getElementById("total-annotations").innerHTML = `
|
242 |
<div class="loading"></div>
|
243 |
+
<p>Loading data...</p>
|
244 |
`;
|
245 |
|
246 |
+
const response = await fetch(
|
247 |
+
"https://huggingface.co/datasets/data-is-better-together/fineweb-c-progress/resolve/main/argilla_progress.ndjson"
|
248 |
+
);
|
249 |
+
if (!response.ok)
|
250 |
+
throw new Error(`HTTP error! status: ${response.status}`);
|
251 |
+
|
252 |
+
const reader = response.body.getReader();
|
253 |
+
const decoder = new TextDecoder();
|
254 |
+
let buffer = "";
|
255 |
+
|
256 |
+
while (true) {
|
257 |
+
const { value, done } = await reader.read();
|
258 |
+
if (done) break;
|
259 |
+
|
260 |
+
buffer += decoder.decode(value, { stream: true });
|
261 |
+
const lines = buffer.split("\n");
|
262 |
+
buffer = lines.pop() || ""; // Keep the last incomplete line in buffer
|
263 |
+
|
264 |
+
for (const line of lines) {
|
265 |
+
if (!line.trim()) continue;
|
266 |
+
try {
|
267 |
+
const row = JSON.parse(line);
|
268 |
+
const submissions = parseInt(row.submitted) || 0;
|
269 |
+
stats.total += submissions;
|
270 |
+
if (submissions > 0) {
|
271 |
+
stats.languages.add(row.language_dataset_name);
|
272 |
+
if (row.username) {
|
273 |
+
stats.users.add(row.username);
|
274 |
+
}
|
275 |
}
|
276 |
+
} catch (e) {
|
277 |
+
console.error("Error parsing line:", e);
|
278 |
}
|
279 |
+
}
|
280 |
|
281 |
+
// Update progress periodically
|
282 |
document.getElementById("total-annotations").innerHTML = `
|
283 |
<div class="loading"></div>
|
284 |
<p>Processing... Current count: <strong>${stats.total.toLocaleString()}</strong></p>
|
|
|
285 |
`;
|
286 |
+
}
|
287 |
|
288 |
+
// Process any remaining data in buffer
|
289 |
+
if (buffer.trim()) {
|
290 |
+
try {
|
291 |
+
const row = JSON.parse(buffer);
|
292 |
+
const submissions = parseInt(row.submitted) || 0;
|
293 |
+
stats.total += submissions;
|
294 |
+
if (submissions > 0) {
|
295 |
+
stats.languages.add(row.language_dataset_name);
|
296 |
+
if (row.username) {
|
297 |
+
stats.users.add(row.username);
|
298 |
+
}
|
299 |
+
}
|
300 |
+
} catch (e) {
|
301 |
+
console.error("Error parsing final line:", e);
|
302 |
}
|
303 |
}
|
304 |
|