fineweb-c-dashboard / index.html
davanstrien's picture
davanstrien HF staff
directly use file
f487e4f
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Leaderboards</title>
<link
href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600&display=swap"
rel="stylesheet"
/>
<style>
body {
font-family: "Source Sans Pro", system-ui, -apple-system, sans-serif;
margin: 0;
padding: 20px;
background: linear-gradient(
135deg,
rgb(245, 245, 245) 0%,
#ffffff 100%
);
min-height: 100vh;
}
.container {
max-width: 768px;
margin: 0 auto;
padding: 0 1rem;
}
@media (min-width: 768px) {
.container {
max-width: 1200px;
padding: 0 2rem;
}
}
.header {
text-align: center;
margin-bottom: 2rem;
color: #111827;
padding-bottom: 1rem;
border-bottom: 2px solid rgb(228, 228, 228);
}
.summary {
text-align: center;
margin-bottom: 2rem;
padding: 1rem;
background: white;
border-radius: 12px;
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
border: 1px solid rgb(228, 228, 228);
}
.summary p {
font-size: 1.1rem;
color: rgb(107, 114, 128);
margin: 0;
}
.grid {
display: flex;
flex-direction: column;
gap: 2rem;
}
.card {
background: white;
border-radius: 12px;
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
padding: 1.5rem;
border: 1px solid rgb(228, 228, 228);
transition: transform 0.2s ease, box-shadow 0.2s ease;
}
.card:hover {
transform: translateY(-2px);
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.card-title {
font-size: 1.25rem;
font-weight: 600;
margin-bottom: 1rem;
color: #007bff;
border-bottom: 1px solid rgb(228, 228, 228);
padding-bottom: 0.5rem;
}
.iframe-container {
border-radius: 8px;
overflow: hidden;
background: #fff;
position: relative;
padding-bottom: 56.25%;
height: 0;
}
.iframe-container iframe {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
border: none;
}
.loading {
display: inline-block;
width: 20px;
height: 20px;
border: 2px solid rgb(228, 228, 228);
border-radius: 50%;
border-top-color: #007bff;
border-right-color: #007bff;
animation: spin 1s ease-in-out infinite;
margin-right: 10px;
vertical-align: middle;
opacity: 0.8;
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}
.error {
color: #dc2626;
padding: 1rem;
background: #fee2e2;
border-radius: 8px;
margin: 1rem 0;
}
@media (max-width: 768px) {
body {
padding: 1rem;
}
.card {
padding: 1rem;
}
.iframe-container {
padding-bottom: 75%;
}
}
@media (min-width: 768px) {
.grid {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 2rem;
}
}
.summary p strong {
color: #007bff;
font-size: 1.2em;
}
</style>
</head>
<body>
<div class="container">
<header class="header">
<h1>🏆 FineWeb2-C Leaderboard</h1>
<p>
<em
>Helping build better language models by rating educational quality
of texts across different languages!</em
>
</p>
<p>
Contribute to the dataset:
<a
href="https://huggingface.co/spaces/data-is-better-together/fineweb-c"
target="_blank"
>
here
</a>
</p>
<p>
Check out the current version of the dataset:
<a
href="https://huggingface.co/datasets/data-is-better-together/fineweb-c"
target="_blank"
>
here
</a>
</p>
<p>
See this
<a
href="https://huggingface.co/blog/davanstrien/fineweb2-community"
target="_blank"
>blog post</a
>
for more information.
</p>
</header>
<div class="summary" id="total-annotations">
<div class="loading"></div>
<span>Loading annotation data...</span>
</div>
<div class="grid">
<div class="card">
<div class="card-title">Language Leaderboard</div>
<div class="iframe-container">
<iframe
src="https://huggingface.co/datasets/data-is-better-together/fineweb-c-progress/embed/sql-console/dhn8hw-"
frameborder="0"
width="100%"
height="560px"
></iframe>
</div>
</div>
<div class="card">
<div class="card-title">User Leaderboard</div>
<div class="iframe-container">
<iframe
src="https://huggingface.co/datasets/data-is-better-together/fineweb-c-progress/embed/sql-console/DJ2n1Z0"
frameborder="0"
width="100%"
height="560px"
></iframe>
</div>
</div>
</div>
</div>
<script>
async function fetchTotalAnnotations() {
try {
const stats = { total: 0, languages: new Set(), users: new Set() };
document.getElementById("total-annotations").innerHTML = `
<div class="loading"></div>
<p>Loading data...</p>
`;
const response = await fetch(
"https://huggingface.co/datasets/data-is-better-together/fineweb-c-progress/resolve/main/argilla_progress.ndjson"
);
if (!response.ok)
throw new Error(`HTTP error! status: ${response.status}`);
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = "";
while (true) {
const { value, done } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() || ""; // Keep the last incomplete line in buffer
for (const line of lines) {
if (!line.trim()) continue;
try {
const row = JSON.parse(line);
const submissions = parseInt(row.submitted) || 0;
stats.total += submissions;
if (submissions > 0) {
stats.languages.add(row.language_dataset_name);
if (row.username) {
stats.users.add(row.username);
}
}
} catch (e) {
console.error("Error parsing line:", e);
}
}
// Update progress periodically
document.getElementById("total-annotations").innerHTML = `
<div class="loading"></div>
<p>Processing... Current count: <strong>${stats.total.toLocaleString()}</strong></p>
`;
}
// Process any remaining data in buffer
if (buffer.trim()) {
try {
const row = JSON.parse(buffer);
const submissions = parseInt(row.submitted) || 0;
stats.total += submissions;
if (submissions > 0) {
stats.languages.add(row.language_dataset_name);
if (row.username) {
stats.users.add(row.username);
}
}
} catch (e) {
console.error("Error parsing final line:", e);
}
}
// Final update
document.getElementById("total-annotations").innerHTML = `
<p>Total annotations submitted: <strong>${stats.total.toLocaleString()}</strong></p>
<p>Languages with annotations: <strong>${
stats.languages.size
}</strong></p>
<p>Total contributors: <strong>${stats.users.size}</strong></p>
`;
} catch (error) {
console.error("Error fetching total annotations:", error);
document.getElementById("total-annotations").innerHTML = `
<div class="error">
Error loading annotations: ${error.message}
</div>
`;
}
}
// Start fetching when the page loads
if (document.readyState === "loading") {
document.addEventListener("DOMContentLoaded", fetchTotalAnnotations);
} else {
fetchTotalAnnotations();
}
</script>
</body>
</html>