amaye15 commited on
Commit
c30b770
·
1 Parent(s): ebeac92

webhook test

Browse files
Files changed (4) hide show
  1. app.py +70 -0
  2. dev.ipynb +336 -0
  3. old-app.py +59 -0
  4. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import logging
4
+ import pretty_errors
5
+ from datasets import Dataset, load_dataset
6
+ from huggingface_hub import WebhooksServer, WebhookPayload, webhook_endpoint
7
+
8
+ # Set up the logger
9
+ logger = logging.getLogger("basic_logger")
10
+ logger.setLevel(logging.INFO)
11
+
12
+ console_handler = logging.StreamHandler()
13
+ console_handler.setLevel(logging.INFO)
14
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
15
+ console_handler.setFormatter(formatter)
16
+ logger.addHandler(console_handler)
17
+
18
+ DS_NAME = "amaye15/object-segmentation"
19
+ DATA_DIR = "data"
20
+
21
+
22
+ def get_data():
23
+ """
24
+ Generator function to stream data from the dataset.
25
+ """
26
+ ds = load_dataset(
27
+ DS_NAME,
28
+ cache_dir=os.path.join(os.getcwd(), DATA_DIR),
29
+ streaming=True,
30
+ download_mode="force_redownload",
31
+ )
32
+ for row in ds["train"]:
33
+ yield row
34
+
35
+
36
+ def process_and_push_data():
37
+ """
38
+ Function to process and push new data.
39
+ """
40
+ p = os.path.join(os.getcwd(), DATA_DIR)
41
+
42
+ if os.path.exists(p):
43
+ shutil.rmtree(p)
44
+
45
+ os.mkdir(p)
46
+
47
+ ds_processed = Dataset.from_generator(get_data)
48
+ ds_processed.push_to_hub("amaye15/tmp")
49
+
50
+
51
+ # Initialize the WebhooksServer
52
+ app = WebhooksServer(webhook_secret="my_secret_key")
53
+
54
+
55
+ @webhook_endpoint
56
+ async def trigger_processing(payload: WebhookPayload):
57
+ """
58
+ Webhook endpoint that triggers data processing when a dataset is updated.
59
+ """
60
+ if payload.repo.type == "dataset" and payload.event.action == "update":
61
+ logger.info(f"Dataset {payload.repo.name} updated. Triggering processing.")
62
+ process_and_push_data()
63
+ return {"message": "Data processing triggered successfully."}
64
+ else:
65
+ logger.info(f"Ignored event: {payload.event.action} on {payload.repo.name}")
66
+ return {"message": "Event ignored."}
67
+
68
+
69
+ # Start the server
70
+ app.launch()
dev.ipynb ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "\n",
10
+ "import os\n",
11
+ "import shutil\n",
12
+ "import logging\n",
13
+ "import pretty_errors\n",
14
+ "\n",
15
+ "import huggingface_hub\n",
16
+ "from datasets import Dataset\n",
17
+ "from datasets import load_dataset\n",
18
+ "\n",
19
+ "# Set up the logger\n",
20
+ "logger = logging.getLogger('basic_logger')\n",
21
+ "logger.setLevel(logging.INFO)\n",
22
+ "\n",
23
+ "# Set up the console handler with a simple format\n",
24
+ "console_handler = logging.StreamHandler()\n",
25
+ "console_handler.setLevel(logging.INFO)\n",
26
+ "formatter = logging.Formatter(\n",
27
+ " '%Y-%m-%d %H:%M:%S - %(name)s - %(levelname)s - %(message)s'\n",
28
+ ")\n",
29
+ "console_handler.setFormatter(formatter)\n",
30
+ "logger.addHandler(console_handler)\n",
31
+ "\n",
32
+ "DS_NAME = \"amaye15/object-segmentation\"\n",
33
+ "\n",
34
+ "DATA_DIR = \"data\"\n",
35
+ "p = os.path.join(os.getcwd(), DATA_DIR)\n",
36
+ "\n",
37
+ "if os.path.exists(p):\n",
38
+ " shutil.rmtree(p)\n",
39
+ "\n",
40
+ "\n",
41
+ "os.mkdir(p)\n",
42
+ "\n",
43
+ "def get_data():\n",
44
+ " ds = load_dataset(DS_NAME, cache_dir=p, streaming=True)\n",
45
+ " for row in ds[\"train\"]:\n",
46
+ " yield row\n",
47
+ "\n",
48
+ "#ds_processed = Dataset.from_generator(get_data)\n",
49
+ "# ds_processed.push_to_hub(\"amaye15/tmp\")"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 16,
55
+ "metadata": {},
56
+ "outputs": [],
57
+ "source": [
58
+ "from huggingface_hub import scan_cache_dir\n",
59
+ "\n",
60
+ "repo_info = scan_cache_dir().repos\n",
61
+ "\n"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": null,
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "from "
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": 19,
76
+ "metadata": {},
77
+ "outputs": [],
78
+ "source": [
79
+ "from huggingface_hub import HfApi\n",
80
+ "\n",
81
+ "api = HfApi()\n",
82
+ "\n",
83
+ "# Get the list of revisions for the dataset\n",
84
+ "revisions = api.list_repo_refs(repo_id=DS_NAME, repo_type=\"dataset\")\n",
85
+ "\n",
86
+ "# Check the latest commit\n",
87
+ "# latest_commit = revisions[-1].commit_id\n",
88
+ "# print(f\"Latest commit ID: {latest_commit}\")"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": 20,
94
+ "metadata": {},
95
+ "outputs": [
96
+ {
97
+ "data": {
98
+ "text/plain": [
99
+ "GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='962a9a67307296a7abc7e94c2811c450970b80df')], converts=[GitRefInfo(name='duckdb', ref='refs/convert/duckdb', target_commit='72baa589701a6cbea2b7497931c7adf1daf42121'), GitRefInfo(name='parquet', ref='refs/convert/parquet', target_commit='c209a987d23de50a04ec9766e04dde2e4db7f5fb')], tags=[], pull_requests=None)"
100
+ ]
101
+ },
102
+ "execution_count": 20,
103
+ "metadata": {},
104
+ "output_type": "execute_result"
105
+ }
106
+ ],
107
+ "source": [
108
+ "revisions"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 15,
114
+ "metadata": {},
115
+ "outputs": [
116
+ {
117
+ "data": {
118
+ "text/plain": [
119
+ "frozenset({CachedRepoInfo(repo_id='amaye15/DaViT', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT'), size_on_disk=1677, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='a96d58f5ca3d0b138d8efe7618a860b10f8d986b', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b'), size_on_disk=1677, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/blobs/c928ad7cd8a9f9e48fc14780b84e5dd2ad6b1606'), size_on_disk=1677, blob_last_accessed=1722324257.4824574, blob_last_modified=1722324257.396636)}), refs=frozenset({'main'}), last_modified=1722324257.396636)}), last_accessed=1722324257.4824574, last_modified=1722324257.396636),\n",
120
+ " CachedRepoInfo(repo_id='amaye15/DaViT-Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft'), size_on_disk=1834, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='4cc7068026aaeb388ba2b0826abae30d670de3fc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc'), size_on_disk=1834, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/blobs/ab9f4c4537bc89f3a4cb187db5d771be9242f09f'), size_on_disk=1834, blob_last_accessed=1722405977.6422648, blob_last_modified=1722405977.6310754)}), refs=frozenset({'main'}), last_modified=1722405977.6310754)}), last_accessed=1722405977.6422648, last_modified=1722405977.6310754),\n",
121
+ " CachedRepoInfo(repo_id='amaye15/NSFW', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW'), size_on_disk=1240, nb_files=2, revisions=frozenset({CachedRevisionInfo(commit_hash='c76b1c300fb672189feb59f8faa1027b2d6956b3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3'), size_on_disk=619, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/dc8bcda261a57d5275af975f1411afdadc094009'), size_on_disk=619, blob_last_accessed=1722723952.58199, blob_last_modified=1722723952.5701885)}), refs=frozenset(), last_modified=1722723952.5701885), CachedRevisionInfo(commit_hash='b5cfb52e5a260983c6e6f70c7b21574efce998b1', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1'), size_on_disk=621, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/5924f8b7af709a9f080759cac11ea6f1c976df5d'), size_on_disk=621, blob_last_accessed=1722768256.9392703, blob_last_modified=1722768256.9274719)}), refs=frozenset({'main'}), last_modified=1722768256.9274719)}), last_accessed=1722768256.9392703, last_modified=1722768256.9274719),\n",
122
+ " CachedRepoInfo(repo_id='amaye15/Products-10k', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k'), size_on_disk=620, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c'), size_on_disk=620, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/blobs/a71affed5a9687aeabd33f9aa94c9cde049eb533'), size_on_disk=620, blob_last_accessed=1723091983.5595, blob_last_modified=1723091983.547405)}), refs=frozenset({'main'}), last_modified=1723091983.547405)}), last_accessed=1723091983.5595, last_modified=1723091983.547405),\n",
123
+ " CachedRepoInfo(repo_id='amaye15/SwinV2-Base-Document-Classifier', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier'), size_on_disk=590, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b0968577b56aec082d7cde1d2b04f68173b8e674', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674'), size_on_disk=590, files=frozenset({CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/blobs/86614921b04ad5b6e3d4ee5448f11efe6cc67917'), size_on_disk=590, blob_last_accessed=1722750225.5857947, blob_last_modified=1722750225.574735)}), refs=frozenset({'main'}), last_modified=1722750225.574735)}), last_accessed=1722750225.5857947, last_modified=1722750225.574735),\n",
124
+ " CachedRepoInfo(repo_id='amaye15/invoices', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices'), size_on_disk=618, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='f4e8d7dda1472da87125237182dc9f4d5fd860dc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc'), size_on_disk=618, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/blobs/10d6a19135e958a4856ebfbd82b130f571667b26'), size_on_disk=618, blob_last_accessed=1723087468.3128088, blob_last_modified=1723087468.3009398)}), refs=frozenset({'main'}), last_modified=1723087468.3009398)}), last_accessed=1723087468.3128088, last_modified=1723087468.3009398),\n",
125
+ " CachedRepoInfo(repo_id='amaye15/receipts', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts'), size_on_disk=617, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='7eaf60e64883eee2a744c1e00658967e0b61aab3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3'), size_on_disk=617, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/blobs/22cf712cf7551f2d2df0e6d87358a104fa485122'), size_on_disk=617, blob_last_accessed=1723085450.105201, blob_last_modified=1723085450.0932333)}), refs=frozenset({'main'}), last_modified=1723085450.0932333)}), last_accessed=1723085450.105201, last_modified=1723085450.0932333),\n",
126
+ " CachedRepoInfo(repo_id='amaye15/tmp', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp'), size_on_disk=372, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b593656ae71cef84e90be18cf6bb29cdc74fd7ff', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff'), size_on_disk=372, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/blobs/c274c17b952e2eba2a83b9255b334db02fd75125'), size_on_disk=372, blob_last_accessed=1724597074.5835145, blob_last_modified=1724597074.5719097)}), refs=frozenset({'main'}), last_modified=1724597074.5719097)}), last_accessed=1724597074.5835145, last_modified=1724597074.5719097),\n",
127
+ " CachedRepoInfo(repo_id='caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48461065, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='bb13f02e45e88d00b6c202b3fbe6a181af144606', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606'), size_on_disk=48461065, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954840.0557656, blob_last_modified=1722954840.043787), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954839.8127632, blob_last_modified=1722954839.8014247), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/4a5f52a20932085557ed115f87c0ee8385e12f2719108c0dfd38c64aedea4710'), size_on_disk=48460141, blob_last_accessed=1722954848.1445184, blob_last_modified=1722954848.0298514)}), refs=frozenset({'main'}), last_modified=1722954848.0298514)}), last_accessed=1722954848.1445184, last_modified=1722954848.0298514),\n",
128
+ " CachedRepoInfo(repo_id='facebook/bart-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large'), size_on_disk=1628, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='cb48c1365bd826bd521f650dc2e0940aee54720c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c'), size_on_disk=1628, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/blobs/79568cb2491a1a4da49f32fb723018158c222712'), size_on_disk=1628, blob_last_accessed=1722754758.8173473, blob_last_modified=1722754758.8058388)}), refs=frozenset({'main'}), last_modified=1722754758.8058388)}), last_accessed=1722754758.8173473, last_modified=1722754758.8058388),\n",
129
+ " CachedRepoInfo(repo_id='facebook/sam2-hiera-base-plus', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus'), size_on_disk=323493298, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9'), size_on_disk=323493298, files=frozenset({CachedFileInfo(file_name='sam2_hiera_base_plus.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9/sam2_hiera_base_plus.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/blobs/d0bb7f236400a49669ffdd1be617959a8b1d1065081789d7bbff88eded3a8071'), size_on_disk=323493298, blob_last_accessed=1723985664.6263692, blob_last_modified=1723985638.2220697)}), refs=frozenset({'main'}), last_modified=1723985638.2220697)}), last_accessed=1723985664.6263692, last_modified=1723985638.2220697),\n",
130
+ " CachedRepoInfo(repo_id='facebook/sam2-hiera-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large'), size_on_disk=897952466, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='eba9be237c463eb950e64b65c223ad55c878c2ac', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac'), size_on_disk=897952466, files=frozenset({CachedFileInfo(file_name='sam2_hiera_large.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac/sam2_hiera_large.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/blobs/7442e4e9b732a508f80e141e7c2913437a3610ee0c77381a66658c3a445df87b'), size_on_disk=897952466, blob_last_accessed=1723985746.4751956, blob_last_modified=1723985745.5689125)}), refs=frozenset({'main'}), last_modified=1723985745.5689125)}), last_accessed=1723985746.4751956, last_modified=1723985745.5689125),\n",
131
+ " CachedRepoInfo(repo_id='microsoft/Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft'), size_on_disk=2647748, nb_files=8, revisions=frozenset({CachedRevisionInfo(commit_hash='bb44b80c15e943b1bf7cec6e076359cec6e40178', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178'), size_on_disk=2647748, files=frozenset({CachedFileInfo(file_name='processing_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/processing_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/538110e8fd421258847d317cb62c40b9671d07a9'), size_on_disk=46372, blob_last_accessed=1722187335.8618798, blob_last_modified=1722187335.854595), CachedFileInfo(file_name='tokenizer_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/44784bc58d4cb18d3549ad71e062efcf032d9ef5'), size_on_disk=34, blob_last_accessed=1722187335.5466971, blob_last_modified=1722187334.7324762), CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/bff93d862796105c8cf1a0b3331ad3bec68aee91'), size_on_disk=2445, blob_last_accessed=1722186181.1469133, blob_last_modified=1722186180.799109), CachedFileInfo(file_name='vocab.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/vocab.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/94a2f4fd50e976bda926c700291522ea1a79323f'), size_on_disk=1099884, blob_last_accessed=1722187336.2418828, blob_last_modified=1722187336.7108266), CachedFileInfo(file_name='modeling_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/modeling_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/e5ee65134d1a5e98357f8d500c9b9af5f8c00a08'), size_on_disk=127219, blob_last_accessed=1722225017.2661808, blob_last_modified=1722225017.1880703), CachedFileInfo(file_name='configuration_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/configuration_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/622f74997c5612ff68d0e55063714f291d159166'), size_on_disk=15125, blob_last_accessed=1722187334.9981484, blob_last_modified=1722187334.9932766), CachedFileInfo(file_name='tokenizer.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311'), size_on_disk=1355863, blob_last_accessed=1722187337.8523662, blob_last_modified=1722187337.4607415), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/85cd7be3568df661ad536b6ab20d59b08ba079ae'), size_on_disk=806, blob_last_accessed=1722187335.9961612, blob_last_modified=1722187335.4969347)}), refs=frozenset({'main'}), last_modified=1722225017.1880703)}), last_accessed=1722225017.2661808, last_modified=1722225017.1880703),\n",
132
+ " CachedRepoInfo(repo_id='microsoft/swinv2-base-patch4-window16-256', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256'), size_on_disk=351904021, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78'), size_on_disk=351904021, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/9f6070ac05bf6f561f789e8699a4dc387df58724'), size_on_disk=69910, blob_last_accessed=1722848474.6562126, blob_last_modified=1722848474.6428308), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/fb816e3190d8ed24279c9975f45efeb660493c61'), size_on_disk=240, blob_last_accessed=1722847982.5025482, blob_last_modified=1722847982.4988532), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/c9307c9aa168a730c370d472783ae8274408a059e95245e0d7fcf1a1d91cf9aa'), size_on_disk=351833871, blob_last_accessed=1723624967.1287704, blob_last_modified=1722848484.9202104)}), refs=frozenset({'main'}), last_modified=1722848484.9202104)}), last_accessed=1723624967.1287704, last_modified=1722848484.9202104),\n",
133
+ " CachedRepoInfo(repo_id='thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48456429, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='e345b33f8e7e14b0dce731505234a8425412e343', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343'), size_on_disk=48456429, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954764.2667823, blob_last_modified=1722954764.2559414), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954763.868174, blob_last_modified=1722954763.8569045), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/91b0a2ca989b9c4e5a91124f67f552741594fd1bd41e3114d65a316d36f45e60'), size_on_disk=48455505, blob_last_accessed=1722954768.0409808, blob_last_modified=1722954767.9221504)}), refs=frozenset({'main'}), last_modified=1722954767.9221504)}), last_accessed=1722954768.0409808, last_modified=1722954767.9221504)})"
134
+ ]
135
+ },
136
+ "execution_count": 15,
137
+ "metadata": {},
138
+ "output_type": "execute_result"
139
+ }
140
+ ],
141
+ "source": [
142
+ "scan_cache_dir().repos"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": 17,
148
+ "metadata": {},
149
+ "outputs": [
150
+ {
151
+ "name": "stdout",
152
+ "output_type": "stream",
153
+ "text": [
154
+ "facebook/sam2-hiera-base-plus\n",
155
+ "caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n",
156
+ "amaye15/receipts\n",
157
+ "amaye15/DaViT-Florence-2-large-ft\n",
158
+ "amaye15/tmp\n",
159
+ "amaye15/Products-10k\n",
160
+ "amaye15/invoices\n",
161
+ "microsoft/Florence-2-large-ft\n",
162
+ "microsoft/swinv2-base-patch4-window16-256\n",
163
+ "thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n",
164
+ "amaye15/SwinV2-Base-Document-Classifier\n",
165
+ "amaye15/DaViT\n",
166
+ "facebook/sam2-hiera-large\n",
167
+ "facebook/bart-large\n",
168
+ "amaye15/NSFW\n"
169
+ ]
170
+ }
171
+ ],
172
+ "source": [
173
+ "for r in repo_info:\n",
174
+ " #if r.repo_n == DS_NAME:\n",
175
+ "\n",
176
+ " print(r.repo_id)"
177
+ ]
178
+ },
179
+ {
180
+ "cell_type": "code",
181
+ "execution_count": 2,
182
+ "metadata": {},
183
+ "outputs": [
184
+ {
185
+ "data": {
186
+ "application/vnd.jupyter.widget-view+json": {
187
+ "model_id": "825736a8246f4fb593f4847c5c2268b6",
188
+ "version_major": 2,
189
+ "version_minor": 0
190
+ },
191
+ "text/plain": [
192
+ "Downloading readme: 0%| | 0.00/5.24k [00:00<?, ?B/s]"
193
+ ]
194
+ },
195
+ "metadata": {},
196
+ "output_type": "display_data"
197
+ },
198
+ {
199
+ "data": {
200
+ "application/vnd.jupyter.widget-view+json": {
201
+ "model_id": "699d58320ad6465697460490bfffaf65",
202
+ "version_major": 2,
203
+ "version_minor": 0
204
+ },
205
+ "text/plain": [
206
+ "Resolving data files: 0%| | 0/38 [00:00<?, ?it/s]"
207
+ ]
208
+ },
209
+ "metadata": {},
210
+ "output_type": "display_data"
211
+ }
212
+ ],
213
+ "source": [
214
+ "ds = load_dataset(DS_NAME, cache_dir=p, streaming=True)"
215
+ ]
216
+ },
217
+ {
218
+ "cell_type": "code",
219
+ "execution_count": 4,
220
+ "metadata": {},
221
+ "outputs": [
222
+ {
223
+ "ename": "AttributeError",
224
+ "evalue": "'IterableDataset' object has no attribute 'cleanup_cache_files'",
225
+ "output_type": "error",
226
+ "traceback": [
227
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
228
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
229
+ "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mds\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcleanup_cache_files\u001b[49m()\n",
230
+ "\u001b[0;31mAttributeError\u001b[0m: 'IterableDataset' object has no attribute 'cleanup_cache_files'"
231
+ ]
232
+ }
233
+ ],
234
+ "source": [
235
+ "ds[\"train\"].cleanup_cache_files()"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": 3,
241
+ "metadata": {},
242
+ "outputs": [
243
+ {
244
+ "data": {
245
+ "text/plain": [
246
+ "False"
247
+ ]
248
+ },
249
+ "execution_count": 3,
250
+ "metadata": {},
251
+ "output_type": "execute_result"
252
+ }
253
+ ],
254
+ "source": [
255
+ "os.path.exists(os.path.join(os.getcwd(), \"data\"))"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": 2,
261
+ "metadata": {},
262
+ "outputs": [
263
+ {
264
+ "data": {
265
+ "text/plain": [
266
+ "Dataset({\n",
267
+ " features: ['image', 'masked_image', 'mask'],\n",
268
+ " num_rows: 37\n",
269
+ "})"
270
+ ]
271
+ },
272
+ "execution_count": 2,
273
+ "metadata": {},
274
+ "output_type": "execute_result"
275
+ }
276
+ ],
277
+ "source": [
278
+ "ds_processed"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": 2,
284
+ "metadata": {},
285
+ "outputs": [
286
+ {
287
+ "ename": "AttributeError",
288
+ "evalue": "type object 'DatasetDict' has no attribute 'get_cache_files_size'",
289
+ "output_type": "error",
290
+ "traceback": [
291
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
292
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
293
+ "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DatasetDict\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Get the cache size\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m cache_size \u001b[38;5;241m=\u001b[39m \u001b[43mDatasetDict\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cache_files_size\u001b[49m()\n\u001b[1;32m 6\u001b[0m cache_size\n",
294
+ "\u001b[0;31mAttributeError\u001b[0m: type object 'DatasetDict' has no attribute 'get_cache_files_size'"
295
+ ]
296
+ }
297
+ ],
298
+ "source": [
299
+ "from datasets import DatasetDict\n",
300
+ "\n",
301
+ "# Get the cache size\n",
302
+ "cache_size = DatasetDict.get_cache_files_size()\n",
303
+ "\n",
304
+ "cache_size"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type": "code",
309
+ "execution_count": null,
310
+ "metadata": {},
311
+ "outputs": [],
312
+ "source": []
313
+ }
314
+ ],
315
+ "metadata": {
316
+ "kernelspec": {
317
+ "display_name": "env",
318
+ "language": "python",
319
+ "name": "python3"
320
+ },
321
+ "language_info": {
322
+ "codemirror_mode": {
323
+ "name": "ipython",
324
+ "version": 3
325
+ },
326
+ "file_extension": ".py",
327
+ "mimetype": "text/x-python",
328
+ "name": "python",
329
+ "nbconvert_exporter": "python",
330
+ "pygments_lexer": "ipython3",
331
+ "version": "3.12.4"
332
+ }
333
+ },
334
+ "nbformat": 4,
335
+ "nbformat_minor": 2
336
+ }
old-app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import logging
4
+ import pretty_errors
5
+ import huggingface_hub
6
+ from datasets import Dataset, load_dataset, disable_caching
7
+ import schedule
8
+ import time
9
+
10
+ disable_caching()
11
+
12
+ # Set up the logger
13
+ logger = logging.getLogger("basic_logger")
14
+ logger.setLevel(logging.INFO)
15
+
16
+ # Set up the console handler with a simple format
17
+ console_handler = logging.StreamHandler()
18
+ console_handler.setLevel(logging.INFO)
19
+ formatter = logging.Formatter(
20
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
21
+ ) # Corrected the format string
22
+ console_handler.setFormatter(formatter)
23
+ logger.addHandler(console_handler)
24
+
25
+ DS_NAME = "amaye15/object-segmentation"
26
+ DATA_DIR = "data"
27
+
28
+
29
+ def get_data():
30
+ ds = load_dataset(
31
+ DS_NAME,
32
+ cache_dir=os.path.join(os.getcwd(), DATA_DIR),
33
+ streaming=True,
34
+ download_mode="force_redownload",
35
+ )
36
+ for row in ds["train"]:
37
+ yield row
38
+
39
+
40
+ def process_and_push_data():
41
+ p = os.path.join(os.getcwd(), DATA_DIR)
42
+
43
+ if os.path.exists(p):
44
+ shutil.rmtree(p)
45
+
46
+ os.mkdir(p)
47
+
48
+ ds_processed = Dataset.from_generator(get_data)
49
+ ds_processed.push_to_hub("amaye15/tmp")
50
+ # logger.info("Data processed and pushed to the hub.")
51
+
52
+
53
+ # Schedule the task to run every minute
54
+ schedule.every(1).minute.do(process_and_push_data) # Corrected to pass the function
55
+
56
+ # Run the scheduler
57
+ while True:
58
+ schedule.run_pending()
59
+ time.sleep(1)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ huggingface_hub
3
+ datasets
4
+ pretty_errors