dvilasuero HF staff commited on
Commit
c5c6da9
Β·
verified Β·
1 Parent(s): 208151a

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +44 -20
index.html CHANGED
@@ -164,7 +164,7 @@
164
  <body>
165
  <div class="container">
166
  <header class="header">
167
- <h1>πŸ† 🌐 FineWeb 2 Sprint Leaderboard</h1>
168
  </header>
169
 
170
  <div class="summary" id="total-annotations">
@@ -202,33 +202,57 @@
202
  <script>
203
  async function fetchTotalAnnotations() {
204
  try {
205
- const response = await fetch(
206
- "https://datasets-server.huggingface.co/rows?dataset=data-is-better-together/fineweb-c-progress&config=default&split=train&offset=0&length=100"
207
- );
 
 
 
 
 
 
208
 
209
- if (!response.ok) {
210
- throw new Error(`HTTP error! status: ${response.status}`);
211
- }
 
212
 
213
- const data = await response.json();
 
 
214
 
215
- // Calculate total annotations and count languages with submissions
216
- const stats = data.rows.reduce(
217
- (acc, row) => {
218
- const submissions = row.row.submitted || 0;
 
 
219
  if (submissions > 0) {
220
- acc.languages.add(row.row.language_dataset_name);
221
- // Add username to the Set if it exists
222
  if (row.row.username) {
223
- acc.users.add(row.row.username);
224
  }
225
  }
226
- acc.total += submissions;
227
- return acc;
228
- },
229
- { total: 0, languages: new Set(), users: new Set() }
230
- );
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
 
232
  document.getElementById("total-annotations").innerHTML = `
233
  <p>Total annotations submitted: <strong>${stats.total.toLocaleString()}</strong></p>
234
  <p>Languages with annotations: <strong>${stats.languages.size}</strong></p>
 
164
  <body>
165
  <div class="container">
166
  <header class="header">
167
+ <h1>πŸ† FineWeb 2 Sprint Leaderboard</h1>
168
  </header>
169
 
170
  <div class="summary" id="total-annotations">
 
202
  <script>
203
  async function fetchTotalAnnotations() {
204
  try {
205
+ const stats = { total: 0, languages: new Set(), users: new Set() };
206
+ let offset = 0;
207
+ const limit = 100; // API default limit
208
+ let hasMore = true;
209
+
210
+ document.getElementById("total-annotations").innerHTML = `
211
+ <div class="loading"></div>
212
+ <p>Processing rows...</p>
213
+ `;
214
 
215
+ while (hasMore) {
216
+ const response = await fetch(
217
+ `https://datasets-server.huggingface.co/rows?dataset=data-is-better-together%2Ffineweb-c-progress&config=default&split=train&offset=${offset}`
218
+ );
219
 
220
+ if (!response.ok) {
221
+ throw new Error(`HTTP error! status: ${response.status}`);
222
+ }
223
 
224
+ const data = await response.json();
225
+
226
+ // Process this batch of rows
227
+ data.rows.forEach(row => {
228
+ const submissions = parseInt(row.row.submitted) || 0;
229
+ stats.total += submissions;
230
  if (submissions > 0) {
231
+ stats.languages.add(row.row.language_dataset_name);
 
232
  if (row.row.username) {
233
+ stats.users.add(row.row.username);
234
  }
235
  }
236
+ });
237
+
238
+ // Update progress
239
+ document.getElementById("total-annotations").innerHTML = `
240
+ <div class="loading"></div>
241
+ <p>Processing... Current count: <strong>${stats.total.toLocaleString()}</strong></p>
242
+ <p>Processed ${offset + data.rows.length} rows</p>
243
+ `;
244
+
245
+ // Check if we should continue
246
+ if (data.rows.length < limit) {
247
+ hasMore = false;
248
+ } else {
249
+ offset += limit;
250
+ // Add a small delay to avoid overwhelming the API
251
+ await new Promise(resolve => setTimeout(resolve, 100));
252
+ }
253
+ }
254
 
255
+ // Final update
256
  document.getElementById("total-annotations").innerHTML = `
257
  <p>Total annotations submitted: <strong>${stats.total.toLocaleString()}</strong></p>
258
  <p>Languages with annotations: <strong>${stats.languages.size}</strong></p>