amaye15
commited on
Commit
·
10656cf
1
Parent(s):
10c2fec
webhook
Browse files
app.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
import os
|
2 |
import shutil
|
3 |
import logging
|
4 |
-
from
|
|
|
5 |
from datasets import Dataset, load_dataset, disable_caching
|
|
|
|
|
6 |
|
7 |
disable_caching()
|
8 |
|
@@ -17,9 +20,11 @@ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(messag
|
|
17 |
console_handler.setFormatter(formatter)
|
18 |
logger.addHandler(console_handler)
|
19 |
|
|
|
20 |
DS_NAME = "amaye15/object-segmentation"
|
21 |
DATA_DIR = "data"
|
22 |
TARGET_REPO = "amaye15/tmp"
|
|
|
23 |
|
24 |
|
25 |
def get_data():
|
@@ -52,23 +57,70 @@ def process_and_push_data():
|
|
52 |
logger.info("Data processed and pushed to the hub.")
|
53 |
|
54 |
|
55 |
-
# Initialize the WebhooksServer
|
56 |
-
app = WebhooksServer(webhook_secret=
|
57 |
|
58 |
|
59 |
-
@
|
60 |
-
async def
|
|
|
|
|
61 |
"""
|
62 |
Webhook endpoint that triggers data processing when the dataset is updated.
|
63 |
"""
|
64 |
-
if
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
|
73 |
-
|
74 |
-
app.launch()
|
|
|
1 |
import os
|
2 |
import shutil
|
3 |
import logging
|
4 |
+
from pathlib import Path
|
5 |
+
from huggingface_hub import WebhooksServer, WebhookPayload
|
6 |
from datasets import Dataset, load_dataset, disable_caching
|
7 |
+
from fastapi import BackgroundTasks, Response, status
|
8 |
+
from huggingface_hub.utils import build_hf_headers, get_session
|
9 |
|
10 |
disable_caching()
|
11 |
|
|
|
20 |
console_handler.setFormatter(formatter)
|
21 |
logger.addHandler(console_handler)
|
22 |
|
23 |
+
# Environment variables
|
24 |
DS_NAME = "amaye15/object-segmentation"
|
25 |
DATA_DIR = "data"
|
26 |
TARGET_REPO = "amaye15/tmp"
|
27 |
+
WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", "my_secret_key")
|
28 |
|
29 |
|
30 |
def get_data():
|
|
|
57 |
logger.info("Data processed and pushed to the hub.")
|
58 |
|
59 |
|
60 |
+
# Initialize the WebhooksServer with Gradio interface (if needed)
|
61 |
+
app = WebhooksServer(webhook_secret=WEBHOOK_SECRET)
|
62 |
|
63 |
|
64 |
+
@app.add_webhook("/dataset_repo")
|
65 |
+
async def handle_repository_changes(
|
66 |
+
payload: WebhookPayload, task_queue: BackgroundTasks
|
67 |
+
):
|
68 |
"""
|
69 |
Webhook endpoint that triggers data processing when the dataset is updated.
|
70 |
"""
|
71 |
+
if not payload.event.scope.startswith("repo"):
|
72 |
+
return Response("No task scheduled", status_code=status.HTTP_200_OK)
|
73 |
+
|
74 |
+
# Only run if change is on main branch
|
75 |
+
try:
|
76 |
+
if payload.updatedRefs[0].ref != "refs/heads/main":
|
77 |
+
response_content = "No task scheduled: Change not on main branch"
|
78 |
+
logger.info(response_content)
|
79 |
+
return Response(response_content, status_code=status.HTTP_200_OK)
|
80 |
+
except Exception as e:
|
81 |
+
logger.error(f"Error checking branch: {str(e)}")
|
82 |
+
return Response("No task scheduled", status_code=status.HTTP_200_OK)
|
83 |
+
|
84 |
+
# No need to run for README only updates
|
85 |
+
try:
|
86 |
+
commit_files_url = f"{payload.repo.url.api}/compare/{payload.updatedRefs[0].oldSha}..{payload.updatedRefs[0].newSha}?raw=true"
|
87 |
+
response_text = (
|
88 |
+
get_session().get(commit_files_url, headers=build_hf_headers()).text
|
89 |
+
)
|
90 |
+
logger.info(f"Git Compare URL: {commit_files_url}")
|
91 |
+
|
92 |
+
# Splitting the output into lines
|
93 |
+
file_lines = response_text.split("\n")
|
94 |
+
|
95 |
+
# Filtering the lines to find file changes
|
96 |
+
changed_files = [line.split("\t")[-1] for line in file_lines if line.strip()]
|
97 |
+
logger.info(f"Changed files: {changed_files}")
|
98 |
+
|
99 |
+
# Checking if only README.md has been changed
|
100 |
+
if all("README.md" in file for file in changed_files):
|
101 |
+
response_content = "No task scheduled: It's a README only update."
|
102 |
+
logger.info(response_content)
|
103 |
+
return Response(response_content, status_code=status.HTTP_200_OK)
|
104 |
+
except Exception as e:
|
105 |
+
logger.error(f"Error checking files: {str(e)}")
|
106 |
+
return Response("Unexpected issue", status_code=status.HTTP_501_NOT_IMPLEMENTED)
|
107 |
+
|
108 |
+
logger.info(
|
109 |
+
f"Webhook received from {payload.repo.name} indicating a repo {payload.event.action}"
|
110 |
+
)
|
111 |
+
task_queue.add_task(_process_webhook)
|
112 |
+
return Response("Task scheduled.", status_code=status.HTTP_202_ACCEPTED)
|
113 |
+
|
114 |
+
|
115 |
+
def _process_webhook():
|
116 |
+
logger.info(f"Loading new dataset...")
|
117 |
+
dataset = load_dataset(DS_NAME)
|
118 |
+
logger.info(f"Loaded new dataset")
|
119 |
+
|
120 |
+
logger.info(f"Processing and updating dataset...")
|
121 |
+
process_and_push_data()
|
122 |
+
logger.info(f"Processing and updating dataset completed!")
|
123 |
|
124 |
|
125 |
+
if __name__ == "__main__":
|
126 |
+
app.launch(server_name="0.0.0.0", show_error=True, server_port=7860)
|