davanstrien HF staff commited on
Commit
10e329b
·
verified ·
1 Parent(s): 7aa87a3

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +29 -0
  2. README.md +5 -5
  3. app.py +167 -0
  4. requirements.in +4 -0
  5. requirements.txt +173 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /code
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Create necessary directories with permissions
11
+ RUN mkdir -p /.cache bluesky_data
12
+ RUN chmod 777 /.cache bluesky_data
13
+
14
+ # Install Python dependencies
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy application code
19
+ COPY . .
20
+
21
+ # Expose the port
22
+ EXPOSE 7860
23
+
24
+ # Set environment variables (these should be overridden at runtime)
25
+ ENV HF_REPO_ID="davanstrien/bluesky-counts"
26
+ ENV HF_REPO_TYPE="dataset"
27
+ ENV HF_TOKEN=""
28
+
29
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Bluesky Posts Over Time
3
- emoji: 💻
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Bluesky Post Counter
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
+ Real-time visualization of posts per second on Bluesky social network.
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hvplot.streamz
2
+ import pandas as pd
3
+ import numpy as np
4
+ from streamz import Stream
5
+ from streamz.dataframe import DataFrame
6
+ from atproto import FirehoseSubscribeReposClient, parse_subscribe_repos_message
7
+ import datetime
8
+ import queue
9
+ import threading
10
+ import time
11
+ import os
12
+ import json
13
+ from huggingface_hub import CommitScheduler, HfApi, hf_hub_download
14
+ import uuid
15
+ from pathlib import Path
16
+ import panel as pn
17
+
18
+
19
+ pn.extension(design="material")
20
+ # Create a queue to communicate between threads
21
+ post_queue = queue.Queue()
22
+
23
+ # Counter for posts
24
+ post_count = 0
25
+
26
+ # Create streaming dataframe
27
+ stream = Stream()
28
+ # Wait 1 second to collect initial data
29
+ time.sleep(1)
30
+ example = pd.DataFrame(
31
+ {"timestamp": [pd.Timestamp.now()], "post_count": [post_count]}, index=[0]
32
+ )
33
+ df = DataFrame(stream, example=example)
34
+
35
+ # Calculate backlog for 1 month (31 days)
36
+ MONTH_IN_SECONDS = 31 * 24 * 60 * 60 # 31 days * 24 hours * 60 minutes * 60 seconds
37
+
38
+ # Add environment variable support for configuration
39
+ REPO_ID = os.getenv("HF_REPO_ID", "davanstrien/bluesky-counts")
40
+ REPO_TYPE = os.getenv("HF_REPO_TYPE", "dataset")
41
+ HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") # Required for HuggingFace API access
42
+ DATA_FOLDER = Path("bluesky_data")
43
+ DATA_FILE = f"bluesky_counts_{uuid.uuid4()}.json"
44
+
45
+
46
+ def load_hub_data():
47
+ """Load the most recent data from the Hub"""
48
+ try:
49
+ api = HfApi(token=HF_TOKEN)
50
+ # List files in the repository
51
+ files = api.list_repo_files(REPO_ID, repo_type=REPO_TYPE)
52
+ data_files = [f for f in files if f.startswith("data/bluesky_counts_")]
53
+
54
+ if not data_files:
55
+ return []
56
+
57
+ # Get the most recent file
58
+ latest_file = sorted(data_files)[-1]
59
+ # Download the file
60
+ local_path = hf_hub_download(
61
+ repo_id=REPO_ID, filename=latest_file, repo_type=REPO_TYPE, token=HF_TOKEN
62
+ )
63
+
64
+ # Load and parse the data
65
+ data = []
66
+ with open(local_path, "r") as f:
67
+ data.extend(json.loads(line.strip()) for line in f)
68
+ # Keep only last month of data
69
+ return data[-MONTH_IN_SECONDS:]
70
+ except Exception as e:
71
+ print(f"Error loading data from Hub: {e}")
72
+ return []
73
+
74
+
75
+ # Initialize storage and Hub connection
76
+ DATA_FOLDER.mkdir(exist_ok=True)
77
+ scheduler = CommitScheduler(
78
+ repo_id=REPO_ID,
79
+ repo_type=REPO_TYPE,
80
+ folder_path=DATA_FOLDER,
81
+ path_in_repo="data",
82
+ every=600, # Upload every 10 minutes
83
+ token=HF_TOKEN, # Add token for authentication
84
+ )
85
+
86
+
87
+ def on_message_handler(message):
88
+ global post_count
89
+ commit = parse_subscribe_repos_message(message)
90
+ # Only count new posts (not likes, reposts, etc)
91
+ if hasattr(commit, "ops"):
92
+ for op in commit.ops:
93
+ if op.action == "create" and "app.bsky.feed.post" in op.path:
94
+ post_count += 1
95
+
96
+
97
+ def emit_counts():
98
+ """Emit post counts every second"""
99
+ global post_count
100
+
101
+ if saved_data := load_hub_data():
102
+ print(f"Loaded {len(saved_data)} historical data points from Hub")
103
+ # Emit historical data
104
+ for point in saved_data[-100:]: # Emit last 100 points to initialize plot
105
+ df = pd.DataFrame(
106
+ {
107
+ "timestamp": [pd.Timestamp(point["timestamp"])],
108
+ "post_count": [point["post_count"]],
109
+ }
110
+ )
111
+ stream.emit(df)
112
+
113
+ # Wait for first second to collect initial data
114
+ time.sleep(1)
115
+
116
+ while True:
117
+ # Create DataFrame with current timestamp and count
118
+ now = pd.Timestamp.now()
119
+ df = pd.DataFrame({"timestamp": [now], "post_count": [post_count]})
120
+ stream.emit(df)
121
+
122
+ # Reset counter
123
+ post_count = 0
124
+ # Wait 1 second
125
+ time.sleep(1)
126
+
127
+
128
+ # Create the plot with month-long backlog
129
+ plot = df.hvplot.line(
130
+ "timestamp",
131
+ "post_count",
132
+ title="Bluesky Posts per Second",
133
+ width=800,
134
+ height=400,
135
+ backlog=MONTH_IN_SECONDS, # Keep last month of points
136
+ )
137
+
138
+
139
+ # Start Firehose client in a separate thread
140
+ def run_firehose():
141
+ client = FirehoseSubscribeReposClient()
142
+ client.start(on_message_handler)
143
+
144
+
145
+ firehose_thread = threading.Thread(target=run_firehose)
146
+ firehose_thread.daemon = True
147
+ firehose_thread.start()
148
+
149
+ # Start emitting counts in another thread
150
+ emit_thread = threading.Thread(target=emit_counts)
151
+ emit_thread.daemon = True
152
+ emit_thread.start()
153
+
154
+ # If running in a Jupyter notebook, display the plot
155
+ if __name__ == "__main__":
156
+ import panel as pn
157
+
158
+ pn.extension()
159
+ dashboard = pn.Column(pn.pane.HoloViews(plot))
160
+ # Update server configuration for Docker
161
+ pn.serve(
162
+ dashboard,
163
+ address="0.0.0.0",
164
+ port=7860,
165
+ allow_websocket_origin=["*"], # Changed from "*" to ["*"]
166
+ show=False,
167
+ )
requirements.in ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ panel
2
+ hvplot
3
+ streamz
4
+ atproto
requirements.txt ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile requirements.in -o requirements.txt
3
+ annotated-types==0.7.0
4
+ # via pydantic
5
+ anyio==4.6.2.post1
6
+ # via httpx
7
+ atproto==0.0.55
8
+ # via -r requirements.in
9
+ bleach==6.2.0
10
+ # via panel
11
+ bokeh==3.6.1
12
+ # via
13
+ # holoviews
14
+ # hvplot
15
+ # panel
16
+ certifi==2024.8.30
17
+ # via
18
+ # httpcore
19
+ # httpx
20
+ # requests
21
+ cffi==1.17.1
22
+ # via cryptography
23
+ charset-normalizer==3.4.0
24
+ # via requests
25
+ click==8.1.7
26
+ # via atproto
27
+ colorcet==3.1.0
28
+ # via
29
+ # holoviews
30
+ # hvplot
31
+ contourpy==1.3.1
32
+ # via bokeh
33
+ cryptography==43.0.3
34
+ # via atproto
35
+ dnspython==2.7.0
36
+ # via atproto
37
+ filelock==3.16.1
38
+ # via huggingface-hub
39
+ fsspec==2024.10.0
40
+ # via huggingface-hub
41
+ h11==0.14.0
42
+ # via httpcore
43
+ holoviews==1.20.0
44
+ # via hvplot
45
+ httpcore==1.0.7
46
+ # via httpx
47
+ httpx==0.27.2
48
+ # via atproto
49
+ huggingface-hub==0.26.2
50
+ # via -r requirements.in
51
+ hvplot==0.11.1
52
+ # via -r requirements.in
53
+ idna==3.10
54
+ # via
55
+ # anyio
56
+ # httpx
57
+ # requests
58
+ jinja2==3.1.4
59
+ # via bokeh
60
+ libipld==3.0.0
61
+ # via atproto
62
+ linkify-it-py==2.0.3
63
+ # via panel
64
+ markdown==3.7
65
+ # via panel
66
+ markdown-it-py==3.0.0
67
+ # via
68
+ # mdit-py-plugins
69
+ # panel
70
+ markupsafe==3.0.2
71
+ # via jinja2
72
+ mdit-py-plugins==0.4.2
73
+ # via panel
74
+ mdurl==0.1.2
75
+ # via markdown-it-py
76
+ numpy==2.1.3
77
+ # via
78
+ # bokeh
79
+ # contourpy
80
+ # holoviews
81
+ # hvplot
82
+ # pandas
83
+ packaging==24.2
84
+ # via
85
+ # bokeh
86
+ # holoviews
87
+ # huggingface-hub
88
+ # hvplot
89
+ # panel
90
+ pandas==2.2.3
91
+ # via
92
+ # bokeh
93
+ # holoviews
94
+ # hvplot
95
+ # panel
96
+ panel==1.5.4
97
+ # via
98
+ # -r requirements.in
99
+ # holoviews
100
+ # hvplot
101
+ param==2.1.1
102
+ # via
103
+ # holoviews
104
+ # hvplot
105
+ # panel
106
+ # pyviz-comms
107
+ pillow==11.0.0
108
+ # via bokeh
109
+ pycparser==2.22
110
+ # via cffi
111
+ pydantic==2.10.1
112
+ # via atproto
113
+ pydantic-core==2.27.1
114
+ # via pydantic
115
+ python-dateutil==2.9.0.post0
116
+ # via pandas
117
+ pytz==2024.2
118
+ # via pandas
119
+ pyviz-comms==3.0.3
120
+ # via
121
+ # holoviews
122
+ # panel
123
+ pyyaml==6.0.2
124
+ # via
125
+ # bokeh
126
+ # huggingface-hub
127
+ requests==2.32.3
128
+ # via
129
+ # huggingface-hub
130
+ # panel
131
+ setuptools==75.6.0
132
+ # via streamz
133
+ six==1.16.0
134
+ # via
135
+ # python-dateutil
136
+ # streamz
137
+ sniffio==1.3.1
138
+ # via
139
+ # anyio
140
+ # httpx
141
+ streamz==0.6.4
142
+ # via -r requirements.in
143
+ toolz==1.0.0
144
+ # via streamz
145
+ tornado==6.4.2
146
+ # via
147
+ # bokeh
148
+ # streamz
149
+ tqdm==4.67.1
150
+ # via
151
+ # huggingface-hub
152
+ # panel
153
+ typing-extensions==4.12.2
154
+ # via
155
+ # atproto
156
+ # huggingface-hub
157
+ # panel
158
+ # pydantic
159
+ # pydantic-core
160
+ tzdata==2024.2
161
+ # via pandas
162
+ uc-micro-py==1.0.3
163
+ # via linkify-it-py
164
+ urllib3==2.2.3
165
+ # via requests
166
+ webencodings==0.5.1
167
+ # via bleach
168
+ websockets==13.1
169
+ # via atproto
170
+ xyzservices==2024.9.0
171
+ # via bokeh
172
+ zict==3.0.0
173
+ # via streamz