Upload 5 files
Browse files- Dockerfile +29 -0
- README.md +5 -5
- app.py +167 -0
- requirements.in +4 -0
- requirements.txt +173 -0
Dockerfile
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
# Install system dependencies
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
build-essential \
|
8 |
+
&& rm -rf /var/lib/apt/lists/*
|
9 |
+
|
10 |
+
# Create necessary directories with permissions
|
11 |
+
RUN mkdir -p /.cache bluesky_data
|
12 |
+
RUN chmod 777 /.cache bluesky_data
|
13 |
+
|
14 |
+
# Install Python dependencies
|
15 |
+
COPY requirements.txt .
|
16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
17 |
+
|
18 |
+
# Copy application code
|
19 |
+
COPY . .
|
20 |
+
|
21 |
+
# Expose the port
|
22 |
+
EXPOSE 7860
|
23 |
+
|
24 |
+
# Set environment variables (these should be overridden at runtime)
|
25 |
+
ENV HF_REPO_ID="davanstrien/bluesky-counts"
|
26 |
+
ENV HF_REPO_TYPE="dataset"
|
27 |
+
ENV HF_TOKEN=""
|
28 |
+
|
29 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
-
title: Bluesky
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
---
|
9 |
|
10 |
-
|
|
|
1 |
---
|
2 |
+
title: Bluesky Post Counter
|
3 |
+
emoji: 📊
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: indigo
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
---
|
9 |
|
10 |
+
Real-time visualization of posts per second on Bluesky social network.
|
app.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hvplot.streamz
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from streamz import Stream
|
5 |
+
from streamz.dataframe import DataFrame
|
6 |
+
from atproto import FirehoseSubscribeReposClient, parse_subscribe_repos_message
|
7 |
+
import datetime
|
8 |
+
import queue
|
9 |
+
import threading
|
10 |
+
import time
|
11 |
+
import os
|
12 |
+
import json
|
13 |
+
from huggingface_hub import CommitScheduler, HfApi, hf_hub_download
|
14 |
+
import uuid
|
15 |
+
from pathlib import Path
|
16 |
+
import panel as pn
|
17 |
+
|
18 |
+
|
19 |
+
pn.extension(design="material")
|
20 |
+
# Create a queue to communicate between threads
|
21 |
+
post_queue = queue.Queue()
|
22 |
+
|
23 |
+
# Counter for posts
|
24 |
+
post_count = 0
|
25 |
+
|
26 |
+
# Create streaming dataframe
|
27 |
+
stream = Stream()
|
28 |
+
# Wait 1 second to collect initial data
|
29 |
+
time.sleep(1)
|
30 |
+
example = pd.DataFrame(
|
31 |
+
{"timestamp": [pd.Timestamp.now()], "post_count": [post_count]}, index=[0]
|
32 |
+
)
|
33 |
+
df = DataFrame(stream, example=example)
|
34 |
+
|
35 |
+
# Calculate backlog for 1 month (31 days)
|
36 |
+
MONTH_IN_SECONDS = 31 * 24 * 60 * 60 # 31 days * 24 hours * 60 minutes * 60 seconds
|
37 |
+
|
38 |
+
# Add environment variable support for configuration
|
39 |
+
REPO_ID = os.getenv("HF_REPO_ID", "davanstrien/bluesky-counts")
|
40 |
+
REPO_TYPE = os.getenv("HF_REPO_TYPE", "dataset")
|
41 |
+
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") # Required for HuggingFace API access
|
42 |
+
DATA_FOLDER = Path("bluesky_data")
|
43 |
+
DATA_FILE = f"bluesky_counts_{uuid.uuid4()}.json"
|
44 |
+
|
45 |
+
|
46 |
+
def load_hub_data():
|
47 |
+
"""Load the most recent data from the Hub"""
|
48 |
+
try:
|
49 |
+
api = HfApi(token=HF_TOKEN)
|
50 |
+
# List files in the repository
|
51 |
+
files = api.list_repo_files(REPO_ID, repo_type=REPO_TYPE)
|
52 |
+
data_files = [f for f in files if f.startswith("data/bluesky_counts_")]
|
53 |
+
|
54 |
+
if not data_files:
|
55 |
+
return []
|
56 |
+
|
57 |
+
# Get the most recent file
|
58 |
+
latest_file = sorted(data_files)[-1]
|
59 |
+
# Download the file
|
60 |
+
local_path = hf_hub_download(
|
61 |
+
repo_id=REPO_ID, filename=latest_file, repo_type=REPO_TYPE, token=HF_TOKEN
|
62 |
+
)
|
63 |
+
|
64 |
+
# Load and parse the data
|
65 |
+
data = []
|
66 |
+
with open(local_path, "r") as f:
|
67 |
+
data.extend(json.loads(line.strip()) for line in f)
|
68 |
+
# Keep only last month of data
|
69 |
+
return data[-MONTH_IN_SECONDS:]
|
70 |
+
except Exception as e:
|
71 |
+
print(f"Error loading data from Hub: {e}")
|
72 |
+
return []
|
73 |
+
|
74 |
+
|
75 |
+
# Initialize storage and Hub connection
|
76 |
+
DATA_FOLDER.mkdir(exist_ok=True)
|
77 |
+
scheduler = CommitScheduler(
|
78 |
+
repo_id=REPO_ID,
|
79 |
+
repo_type=REPO_TYPE,
|
80 |
+
folder_path=DATA_FOLDER,
|
81 |
+
path_in_repo="data",
|
82 |
+
every=600, # Upload every 10 minutes
|
83 |
+
token=HF_TOKEN, # Add token for authentication
|
84 |
+
)
|
85 |
+
|
86 |
+
|
87 |
+
def on_message_handler(message):
|
88 |
+
global post_count
|
89 |
+
commit = parse_subscribe_repos_message(message)
|
90 |
+
# Only count new posts (not likes, reposts, etc)
|
91 |
+
if hasattr(commit, "ops"):
|
92 |
+
for op in commit.ops:
|
93 |
+
if op.action == "create" and "app.bsky.feed.post" in op.path:
|
94 |
+
post_count += 1
|
95 |
+
|
96 |
+
|
97 |
+
def emit_counts():
|
98 |
+
"""Emit post counts every second"""
|
99 |
+
global post_count
|
100 |
+
|
101 |
+
if saved_data := load_hub_data():
|
102 |
+
print(f"Loaded {len(saved_data)} historical data points from Hub")
|
103 |
+
# Emit historical data
|
104 |
+
for point in saved_data[-100:]: # Emit last 100 points to initialize plot
|
105 |
+
df = pd.DataFrame(
|
106 |
+
{
|
107 |
+
"timestamp": [pd.Timestamp(point["timestamp"])],
|
108 |
+
"post_count": [point["post_count"]],
|
109 |
+
}
|
110 |
+
)
|
111 |
+
stream.emit(df)
|
112 |
+
|
113 |
+
# Wait for first second to collect initial data
|
114 |
+
time.sleep(1)
|
115 |
+
|
116 |
+
while True:
|
117 |
+
# Create DataFrame with current timestamp and count
|
118 |
+
now = pd.Timestamp.now()
|
119 |
+
df = pd.DataFrame({"timestamp": [now], "post_count": [post_count]})
|
120 |
+
stream.emit(df)
|
121 |
+
|
122 |
+
# Reset counter
|
123 |
+
post_count = 0
|
124 |
+
# Wait 1 second
|
125 |
+
time.sleep(1)
|
126 |
+
|
127 |
+
|
128 |
+
# Create the plot with month-long backlog
|
129 |
+
plot = df.hvplot.line(
|
130 |
+
"timestamp",
|
131 |
+
"post_count",
|
132 |
+
title="Bluesky Posts per Second",
|
133 |
+
width=800,
|
134 |
+
height=400,
|
135 |
+
backlog=MONTH_IN_SECONDS, # Keep last month of points
|
136 |
+
)
|
137 |
+
|
138 |
+
|
139 |
+
# Start Firehose client in a separate thread
|
140 |
+
def run_firehose():
|
141 |
+
client = FirehoseSubscribeReposClient()
|
142 |
+
client.start(on_message_handler)
|
143 |
+
|
144 |
+
|
145 |
+
firehose_thread = threading.Thread(target=run_firehose)
|
146 |
+
firehose_thread.daemon = True
|
147 |
+
firehose_thread.start()
|
148 |
+
|
149 |
+
# Start emitting counts in another thread
|
150 |
+
emit_thread = threading.Thread(target=emit_counts)
|
151 |
+
emit_thread.daemon = True
|
152 |
+
emit_thread.start()
|
153 |
+
|
154 |
+
# If running in a Jupyter notebook, display the plot
|
155 |
+
if __name__ == "__main__":
|
156 |
+
import panel as pn
|
157 |
+
|
158 |
+
pn.extension()
|
159 |
+
dashboard = pn.Column(pn.pane.HoloViews(plot))
|
160 |
+
# Update server configuration for Docker
|
161 |
+
pn.serve(
|
162 |
+
dashboard,
|
163 |
+
address="0.0.0.0",
|
164 |
+
port=7860,
|
165 |
+
allow_websocket_origin=["*"], # Changed from "*" to ["*"]
|
166 |
+
show=False,
|
167 |
+
)
|
requirements.in
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
panel
|
2 |
+
hvplot
|
3 |
+
streamz
|
4 |
+
atproto
|
requirements.txt
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file was autogenerated by uv via the following command:
|
2 |
+
# uv pip compile requirements.in -o requirements.txt
|
3 |
+
annotated-types==0.7.0
|
4 |
+
# via pydantic
|
5 |
+
anyio==4.6.2.post1
|
6 |
+
# via httpx
|
7 |
+
atproto==0.0.55
|
8 |
+
# via -r requirements.in
|
9 |
+
bleach==6.2.0
|
10 |
+
# via panel
|
11 |
+
bokeh==3.6.1
|
12 |
+
# via
|
13 |
+
# holoviews
|
14 |
+
# hvplot
|
15 |
+
# panel
|
16 |
+
certifi==2024.8.30
|
17 |
+
# via
|
18 |
+
# httpcore
|
19 |
+
# httpx
|
20 |
+
# requests
|
21 |
+
cffi==1.17.1
|
22 |
+
# via cryptography
|
23 |
+
charset-normalizer==3.4.0
|
24 |
+
# via requests
|
25 |
+
click==8.1.7
|
26 |
+
# via atproto
|
27 |
+
colorcet==3.1.0
|
28 |
+
# via
|
29 |
+
# holoviews
|
30 |
+
# hvplot
|
31 |
+
contourpy==1.3.1
|
32 |
+
# via bokeh
|
33 |
+
cryptography==43.0.3
|
34 |
+
# via atproto
|
35 |
+
dnspython==2.7.0
|
36 |
+
# via atproto
|
37 |
+
filelock==3.16.1
|
38 |
+
# via huggingface-hub
|
39 |
+
fsspec==2024.10.0
|
40 |
+
# via huggingface-hub
|
41 |
+
h11==0.14.0
|
42 |
+
# via httpcore
|
43 |
+
holoviews==1.20.0
|
44 |
+
# via hvplot
|
45 |
+
httpcore==1.0.7
|
46 |
+
# via httpx
|
47 |
+
httpx==0.27.2
|
48 |
+
# via atproto
|
49 |
+
huggingface-hub==0.26.2
|
50 |
+
# via -r requirements.in
|
51 |
+
hvplot==0.11.1
|
52 |
+
# via -r requirements.in
|
53 |
+
idna==3.10
|
54 |
+
# via
|
55 |
+
# anyio
|
56 |
+
# httpx
|
57 |
+
# requests
|
58 |
+
jinja2==3.1.4
|
59 |
+
# via bokeh
|
60 |
+
libipld==3.0.0
|
61 |
+
# via atproto
|
62 |
+
linkify-it-py==2.0.3
|
63 |
+
# via panel
|
64 |
+
markdown==3.7
|
65 |
+
# via panel
|
66 |
+
markdown-it-py==3.0.0
|
67 |
+
# via
|
68 |
+
# mdit-py-plugins
|
69 |
+
# panel
|
70 |
+
markupsafe==3.0.2
|
71 |
+
# via jinja2
|
72 |
+
mdit-py-plugins==0.4.2
|
73 |
+
# via panel
|
74 |
+
mdurl==0.1.2
|
75 |
+
# via markdown-it-py
|
76 |
+
numpy==2.1.3
|
77 |
+
# via
|
78 |
+
# bokeh
|
79 |
+
# contourpy
|
80 |
+
# holoviews
|
81 |
+
# hvplot
|
82 |
+
# pandas
|
83 |
+
packaging==24.2
|
84 |
+
# via
|
85 |
+
# bokeh
|
86 |
+
# holoviews
|
87 |
+
# huggingface-hub
|
88 |
+
# hvplot
|
89 |
+
# panel
|
90 |
+
pandas==2.2.3
|
91 |
+
# via
|
92 |
+
# bokeh
|
93 |
+
# holoviews
|
94 |
+
# hvplot
|
95 |
+
# panel
|
96 |
+
panel==1.5.4
|
97 |
+
# via
|
98 |
+
# -r requirements.in
|
99 |
+
# holoviews
|
100 |
+
# hvplot
|
101 |
+
param==2.1.1
|
102 |
+
# via
|
103 |
+
# holoviews
|
104 |
+
# hvplot
|
105 |
+
# panel
|
106 |
+
# pyviz-comms
|
107 |
+
pillow==11.0.0
|
108 |
+
# via bokeh
|
109 |
+
pycparser==2.22
|
110 |
+
# via cffi
|
111 |
+
pydantic==2.10.1
|
112 |
+
# via atproto
|
113 |
+
pydantic-core==2.27.1
|
114 |
+
# via pydantic
|
115 |
+
python-dateutil==2.9.0.post0
|
116 |
+
# via pandas
|
117 |
+
pytz==2024.2
|
118 |
+
# via pandas
|
119 |
+
pyviz-comms==3.0.3
|
120 |
+
# via
|
121 |
+
# holoviews
|
122 |
+
# panel
|
123 |
+
pyyaml==6.0.2
|
124 |
+
# via
|
125 |
+
# bokeh
|
126 |
+
# huggingface-hub
|
127 |
+
requests==2.32.3
|
128 |
+
# via
|
129 |
+
# huggingface-hub
|
130 |
+
# panel
|
131 |
+
setuptools==75.6.0
|
132 |
+
# via streamz
|
133 |
+
six==1.16.0
|
134 |
+
# via
|
135 |
+
# python-dateutil
|
136 |
+
# streamz
|
137 |
+
sniffio==1.3.1
|
138 |
+
# via
|
139 |
+
# anyio
|
140 |
+
# httpx
|
141 |
+
streamz==0.6.4
|
142 |
+
# via -r requirements.in
|
143 |
+
toolz==1.0.0
|
144 |
+
# via streamz
|
145 |
+
tornado==6.4.2
|
146 |
+
# via
|
147 |
+
# bokeh
|
148 |
+
# streamz
|
149 |
+
tqdm==4.67.1
|
150 |
+
# via
|
151 |
+
# huggingface-hub
|
152 |
+
# panel
|
153 |
+
typing-extensions==4.12.2
|
154 |
+
# via
|
155 |
+
# atproto
|
156 |
+
# huggingface-hub
|
157 |
+
# panel
|
158 |
+
# pydantic
|
159 |
+
# pydantic-core
|
160 |
+
tzdata==2024.2
|
161 |
+
# via pandas
|
162 |
+
uc-micro-py==1.0.3
|
163 |
+
# via linkify-it-py
|
164 |
+
urllib3==2.2.3
|
165 |
+
# via requests
|
166 |
+
webencodings==0.5.1
|
167 |
+
# via bleach
|
168 |
+
websockets==13.1
|
169 |
+
# via atproto
|
170 |
+
xyzservices==2024.9.0
|
171 |
+
# via bokeh
|
172 |
+
zict==3.0.0
|
173 |
+
# via streamz
|