amaye15
commited on
Commit
·
c30b770
1
Parent(s):
ebeac92
webhook test
Browse files- app.py +70 -0
- dev.ipynb +336 -0
- old-app.py +59 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
import logging
|
4 |
+
import pretty_errors
|
5 |
+
from datasets import Dataset, load_dataset
|
6 |
+
from huggingface_hub import WebhooksServer, WebhookPayload, webhook_endpoint
|
7 |
+
|
8 |
+
# Set up the logger
|
9 |
+
logger = logging.getLogger("basic_logger")
|
10 |
+
logger.setLevel(logging.INFO)
|
11 |
+
|
12 |
+
console_handler = logging.StreamHandler()
|
13 |
+
console_handler.setLevel(logging.INFO)
|
14 |
+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
15 |
+
console_handler.setFormatter(formatter)
|
16 |
+
logger.addHandler(console_handler)
|
17 |
+
|
18 |
+
DS_NAME = "amaye15/object-segmentation"
|
19 |
+
DATA_DIR = "data"
|
20 |
+
|
21 |
+
|
22 |
+
def get_data():
|
23 |
+
"""
|
24 |
+
Generator function to stream data from the dataset.
|
25 |
+
"""
|
26 |
+
ds = load_dataset(
|
27 |
+
DS_NAME,
|
28 |
+
cache_dir=os.path.join(os.getcwd(), DATA_DIR),
|
29 |
+
streaming=True,
|
30 |
+
download_mode="force_redownload",
|
31 |
+
)
|
32 |
+
for row in ds["train"]:
|
33 |
+
yield row
|
34 |
+
|
35 |
+
|
36 |
+
def process_and_push_data():
|
37 |
+
"""
|
38 |
+
Function to process and push new data.
|
39 |
+
"""
|
40 |
+
p = os.path.join(os.getcwd(), DATA_DIR)
|
41 |
+
|
42 |
+
if os.path.exists(p):
|
43 |
+
shutil.rmtree(p)
|
44 |
+
|
45 |
+
os.mkdir(p)
|
46 |
+
|
47 |
+
ds_processed = Dataset.from_generator(get_data)
|
48 |
+
ds_processed.push_to_hub("amaye15/tmp")
|
49 |
+
|
50 |
+
|
51 |
+
# Initialize the WebhooksServer
|
52 |
+
app = WebhooksServer(webhook_secret="my_secret_key")
|
53 |
+
|
54 |
+
|
55 |
+
@webhook_endpoint
|
56 |
+
async def trigger_processing(payload: WebhookPayload):
|
57 |
+
"""
|
58 |
+
Webhook endpoint that triggers data processing when a dataset is updated.
|
59 |
+
"""
|
60 |
+
if payload.repo.type == "dataset" and payload.event.action == "update":
|
61 |
+
logger.info(f"Dataset {payload.repo.name} updated. Triggering processing.")
|
62 |
+
process_and_push_data()
|
63 |
+
return {"message": "Data processing triggered successfully."}
|
64 |
+
else:
|
65 |
+
logger.info(f"Ignored event: {payload.event.action} on {payload.repo.name}")
|
66 |
+
return {"message": "Event ignored."}
|
67 |
+
|
68 |
+
|
69 |
+
# Start the server
|
70 |
+
app.launch()
|
dev.ipynb
ADDED
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"\n",
|
10 |
+
"import os\n",
|
11 |
+
"import shutil\n",
|
12 |
+
"import logging\n",
|
13 |
+
"import pretty_errors\n",
|
14 |
+
"\n",
|
15 |
+
"import huggingface_hub\n",
|
16 |
+
"from datasets import Dataset\n",
|
17 |
+
"from datasets import load_dataset\n",
|
18 |
+
"\n",
|
19 |
+
"# Set up the logger\n",
|
20 |
+
"logger = logging.getLogger('basic_logger')\n",
|
21 |
+
"logger.setLevel(logging.INFO)\n",
|
22 |
+
"\n",
|
23 |
+
"# Set up the console handler with a simple format\n",
|
24 |
+
"console_handler = logging.StreamHandler()\n",
|
25 |
+
"console_handler.setLevel(logging.INFO)\n",
|
26 |
+
"formatter = logging.Formatter(\n",
|
27 |
+
" '%Y-%m-%d %H:%M:%S - %(name)s - %(levelname)s - %(message)s'\n",
|
28 |
+
")\n",
|
29 |
+
"console_handler.setFormatter(formatter)\n",
|
30 |
+
"logger.addHandler(console_handler)\n",
|
31 |
+
"\n",
|
32 |
+
"DS_NAME = \"amaye15/object-segmentation\"\n",
|
33 |
+
"\n",
|
34 |
+
"DATA_DIR = \"data\"\n",
|
35 |
+
"p = os.path.join(os.getcwd(), DATA_DIR)\n",
|
36 |
+
"\n",
|
37 |
+
"if os.path.exists(p):\n",
|
38 |
+
" shutil.rmtree(p)\n",
|
39 |
+
"\n",
|
40 |
+
"\n",
|
41 |
+
"os.mkdir(p)\n",
|
42 |
+
"\n",
|
43 |
+
"def get_data():\n",
|
44 |
+
" ds = load_dataset(DS_NAME, cache_dir=p, streaming=True)\n",
|
45 |
+
" for row in ds[\"train\"]:\n",
|
46 |
+
" yield row\n",
|
47 |
+
"\n",
|
48 |
+
"#ds_processed = Dataset.from_generator(get_data)\n",
|
49 |
+
"# ds_processed.push_to_hub(\"amaye15/tmp\")"
|
50 |
+
]
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"cell_type": "code",
|
54 |
+
"execution_count": 16,
|
55 |
+
"metadata": {},
|
56 |
+
"outputs": [],
|
57 |
+
"source": [
|
58 |
+
"from huggingface_hub import scan_cache_dir\n",
|
59 |
+
"\n",
|
60 |
+
"repo_info = scan_cache_dir().repos\n",
|
61 |
+
"\n"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cell_type": "code",
|
66 |
+
"execution_count": null,
|
67 |
+
"metadata": {},
|
68 |
+
"outputs": [],
|
69 |
+
"source": [
|
70 |
+
"from "
|
71 |
+
]
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"cell_type": "code",
|
75 |
+
"execution_count": 19,
|
76 |
+
"metadata": {},
|
77 |
+
"outputs": [],
|
78 |
+
"source": [
|
79 |
+
"from huggingface_hub import HfApi\n",
|
80 |
+
"\n",
|
81 |
+
"api = HfApi()\n",
|
82 |
+
"\n",
|
83 |
+
"# Get the list of revisions for the dataset\n",
|
84 |
+
"revisions = api.list_repo_refs(repo_id=DS_NAME, repo_type=\"dataset\")\n",
|
85 |
+
"\n",
|
86 |
+
"# Check the latest commit\n",
|
87 |
+
"# latest_commit = revisions[-1].commit_id\n",
|
88 |
+
"# print(f\"Latest commit ID: {latest_commit}\")"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"cell_type": "code",
|
93 |
+
"execution_count": 20,
|
94 |
+
"metadata": {},
|
95 |
+
"outputs": [
|
96 |
+
{
|
97 |
+
"data": {
|
98 |
+
"text/plain": [
|
99 |
+
"GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='962a9a67307296a7abc7e94c2811c450970b80df')], converts=[GitRefInfo(name='duckdb', ref='refs/convert/duckdb', target_commit='72baa589701a6cbea2b7497931c7adf1daf42121'), GitRefInfo(name='parquet', ref='refs/convert/parquet', target_commit='c209a987d23de50a04ec9766e04dde2e4db7f5fb')], tags=[], pull_requests=None)"
|
100 |
+
]
|
101 |
+
},
|
102 |
+
"execution_count": 20,
|
103 |
+
"metadata": {},
|
104 |
+
"output_type": "execute_result"
|
105 |
+
}
|
106 |
+
],
|
107 |
+
"source": [
|
108 |
+
"revisions"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "code",
|
113 |
+
"execution_count": 15,
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [
|
116 |
+
{
|
117 |
+
"data": {
|
118 |
+
"text/plain": [
|
119 |
+
"frozenset({CachedRepoInfo(repo_id='amaye15/DaViT', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT'), size_on_disk=1677, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='a96d58f5ca3d0b138d8efe7618a860b10f8d986b', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b'), size_on_disk=1677, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/blobs/c928ad7cd8a9f9e48fc14780b84e5dd2ad6b1606'), size_on_disk=1677, blob_last_accessed=1722324257.4824574, blob_last_modified=1722324257.396636)}), refs=frozenset({'main'}), last_modified=1722324257.396636)}), last_accessed=1722324257.4824574, last_modified=1722324257.396636),\n",
|
120 |
+
" CachedRepoInfo(repo_id='amaye15/DaViT-Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft'), size_on_disk=1834, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='4cc7068026aaeb388ba2b0826abae30d670de3fc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc'), size_on_disk=1834, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/blobs/ab9f4c4537bc89f3a4cb187db5d771be9242f09f'), size_on_disk=1834, blob_last_accessed=1722405977.6422648, blob_last_modified=1722405977.6310754)}), refs=frozenset({'main'}), last_modified=1722405977.6310754)}), last_accessed=1722405977.6422648, last_modified=1722405977.6310754),\n",
|
121 |
+
" CachedRepoInfo(repo_id='amaye15/NSFW', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW'), size_on_disk=1240, nb_files=2, revisions=frozenset({CachedRevisionInfo(commit_hash='c76b1c300fb672189feb59f8faa1027b2d6956b3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3'), size_on_disk=619, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/dc8bcda261a57d5275af975f1411afdadc094009'), size_on_disk=619, blob_last_accessed=1722723952.58199, blob_last_modified=1722723952.5701885)}), refs=frozenset(), last_modified=1722723952.5701885), CachedRevisionInfo(commit_hash='b5cfb52e5a260983c6e6f70c7b21574efce998b1', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1'), size_on_disk=621, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/5924f8b7af709a9f080759cac11ea6f1c976df5d'), size_on_disk=621, blob_last_accessed=1722768256.9392703, blob_last_modified=1722768256.9274719)}), refs=frozenset({'main'}), last_modified=1722768256.9274719)}), last_accessed=1722768256.9392703, last_modified=1722768256.9274719),\n",
|
122 |
+
" CachedRepoInfo(repo_id='amaye15/Products-10k', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k'), size_on_disk=620, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c'), size_on_disk=620, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/blobs/a71affed5a9687aeabd33f9aa94c9cde049eb533'), size_on_disk=620, blob_last_accessed=1723091983.5595, blob_last_modified=1723091983.547405)}), refs=frozenset({'main'}), last_modified=1723091983.547405)}), last_accessed=1723091983.5595, last_modified=1723091983.547405),\n",
|
123 |
+
" CachedRepoInfo(repo_id='amaye15/SwinV2-Base-Document-Classifier', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier'), size_on_disk=590, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b0968577b56aec082d7cde1d2b04f68173b8e674', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674'), size_on_disk=590, files=frozenset({CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/blobs/86614921b04ad5b6e3d4ee5448f11efe6cc67917'), size_on_disk=590, blob_last_accessed=1722750225.5857947, blob_last_modified=1722750225.574735)}), refs=frozenset({'main'}), last_modified=1722750225.574735)}), last_accessed=1722750225.5857947, last_modified=1722750225.574735),\n",
|
124 |
+
" CachedRepoInfo(repo_id='amaye15/invoices', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices'), size_on_disk=618, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='f4e8d7dda1472da87125237182dc9f4d5fd860dc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc'), size_on_disk=618, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/blobs/10d6a19135e958a4856ebfbd82b130f571667b26'), size_on_disk=618, blob_last_accessed=1723087468.3128088, blob_last_modified=1723087468.3009398)}), refs=frozenset({'main'}), last_modified=1723087468.3009398)}), last_accessed=1723087468.3128088, last_modified=1723087468.3009398),\n",
|
125 |
+
" CachedRepoInfo(repo_id='amaye15/receipts', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts'), size_on_disk=617, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='7eaf60e64883eee2a744c1e00658967e0b61aab3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3'), size_on_disk=617, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/blobs/22cf712cf7551f2d2df0e6d87358a104fa485122'), size_on_disk=617, blob_last_accessed=1723085450.105201, blob_last_modified=1723085450.0932333)}), refs=frozenset({'main'}), last_modified=1723085450.0932333)}), last_accessed=1723085450.105201, last_modified=1723085450.0932333),\n",
|
126 |
+
" CachedRepoInfo(repo_id='amaye15/tmp', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp'), size_on_disk=372, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b593656ae71cef84e90be18cf6bb29cdc74fd7ff', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff'), size_on_disk=372, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/blobs/c274c17b952e2eba2a83b9255b334db02fd75125'), size_on_disk=372, blob_last_accessed=1724597074.5835145, blob_last_modified=1724597074.5719097)}), refs=frozenset({'main'}), last_modified=1724597074.5719097)}), last_accessed=1724597074.5835145, last_modified=1724597074.5719097),\n",
|
127 |
+
" CachedRepoInfo(repo_id='caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48461065, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='bb13f02e45e88d00b6c202b3fbe6a181af144606', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606'), size_on_disk=48461065, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954840.0557656, blob_last_modified=1722954840.043787), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954839.8127632, blob_last_modified=1722954839.8014247), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/4a5f52a20932085557ed115f87c0ee8385e12f2719108c0dfd38c64aedea4710'), size_on_disk=48460141, blob_last_accessed=1722954848.1445184, blob_last_modified=1722954848.0298514)}), refs=frozenset({'main'}), last_modified=1722954848.0298514)}), last_accessed=1722954848.1445184, last_modified=1722954848.0298514),\n",
|
128 |
+
" CachedRepoInfo(repo_id='facebook/bart-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large'), size_on_disk=1628, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='cb48c1365bd826bd521f650dc2e0940aee54720c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c'), size_on_disk=1628, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/blobs/79568cb2491a1a4da49f32fb723018158c222712'), size_on_disk=1628, blob_last_accessed=1722754758.8173473, blob_last_modified=1722754758.8058388)}), refs=frozenset({'main'}), last_modified=1722754758.8058388)}), last_accessed=1722754758.8173473, last_modified=1722754758.8058388),\n",
|
129 |
+
" CachedRepoInfo(repo_id='facebook/sam2-hiera-base-plus', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus'), size_on_disk=323493298, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9'), size_on_disk=323493298, files=frozenset({CachedFileInfo(file_name='sam2_hiera_base_plus.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9/sam2_hiera_base_plus.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/blobs/d0bb7f236400a49669ffdd1be617959a8b1d1065081789d7bbff88eded3a8071'), size_on_disk=323493298, blob_last_accessed=1723985664.6263692, blob_last_modified=1723985638.2220697)}), refs=frozenset({'main'}), last_modified=1723985638.2220697)}), last_accessed=1723985664.6263692, last_modified=1723985638.2220697),\n",
|
130 |
+
" CachedRepoInfo(repo_id='facebook/sam2-hiera-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large'), size_on_disk=897952466, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='eba9be237c463eb950e64b65c223ad55c878c2ac', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac'), size_on_disk=897952466, files=frozenset({CachedFileInfo(file_name='sam2_hiera_large.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac/sam2_hiera_large.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/blobs/7442e4e9b732a508f80e141e7c2913437a3610ee0c77381a66658c3a445df87b'), size_on_disk=897952466, blob_last_accessed=1723985746.4751956, blob_last_modified=1723985745.5689125)}), refs=frozenset({'main'}), last_modified=1723985745.5689125)}), last_accessed=1723985746.4751956, last_modified=1723985745.5689125),\n",
|
131 |
+
" CachedRepoInfo(repo_id='microsoft/Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft'), size_on_disk=2647748, nb_files=8, revisions=frozenset({CachedRevisionInfo(commit_hash='bb44b80c15e943b1bf7cec6e076359cec6e40178', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178'), size_on_disk=2647748, files=frozenset({CachedFileInfo(file_name='processing_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/processing_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/538110e8fd421258847d317cb62c40b9671d07a9'), size_on_disk=46372, blob_last_accessed=1722187335.8618798, blob_last_modified=1722187335.854595), CachedFileInfo(file_name='tokenizer_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/44784bc58d4cb18d3549ad71e062efcf032d9ef5'), size_on_disk=34, blob_last_accessed=1722187335.5466971, blob_last_modified=1722187334.7324762), CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/bff93d862796105c8cf1a0b3331ad3bec68aee91'), size_on_disk=2445, blob_last_accessed=1722186181.1469133, blob_last_modified=1722186180.799109), CachedFileInfo(file_name='vocab.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/vocab.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/94a2f4fd50e976bda926c700291522ea1a79323f'), size_on_disk=1099884, blob_last_accessed=1722187336.2418828, blob_last_modified=1722187336.7108266), CachedFileInfo(file_name='modeling_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/modeling_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/e5ee65134d1a5e98357f8d500c9b9af5f8c00a08'), size_on_disk=127219, blob_last_accessed=1722225017.2661808, blob_last_modified=1722225017.1880703), CachedFileInfo(file_name='configuration_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/configuration_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/622f74997c5612ff68d0e55063714f291d159166'), size_on_disk=15125, blob_last_accessed=1722187334.9981484, blob_last_modified=1722187334.9932766), CachedFileInfo(file_name='tokenizer.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311'), size_on_disk=1355863, blob_last_accessed=1722187337.8523662, blob_last_modified=1722187337.4607415), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/85cd7be3568df661ad536b6ab20d59b08ba079ae'), size_on_disk=806, blob_last_accessed=1722187335.9961612, blob_last_modified=1722187335.4969347)}), refs=frozenset({'main'}), last_modified=1722225017.1880703)}), last_accessed=1722225017.2661808, last_modified=1722225017.1880703),\n",
|
132 |
+
" CachedRepoInfo(repo_id='microsoft/swinv2-base-patch4-window16-256', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256'), size_on_disk=351904021, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78'), size_on_disk=351904021, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/9f6070ac05bf6f561f789e8699a4dc387df58724'), size_on_disk=69910, blob_last_accessed=1722848474.6562126, blob_last_modified=1722848474.6428308), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/fb816e3190d8ed24279c9975f45efeb660493c61'), size_on_disk=240, blob_last_accessed=1722847982.5025482, blob_last_modified=1722847982.4988532), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/c9307c9aa168a730c370d472783ae8274408a059e95245e0d7fcf1a1d91cf9aa'), size_on_disk=351833871, blob_last_accessed=1723624967.1287704, blob_last_modified=1722848484.9202104)}), refs=frozenset({'main'}), last_modified=1722848484.9202104)}), last_accessed=1723624967.1287704, last_modified=1722848484.9202104),\n",
|
133 |
+
" CachedRepoInfo(repo_id='thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48456429, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='e345b33f8e7e14b0dce731505234a8425412e343', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343'), size_on_disk=48456429, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954764.2667823, blob_last_modified=1722954764.2559414), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954763.868174, blob_last_modified=1722954763.8569045), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/91b0a2ca989b9c4e5a91124f67f552741594fd1bd41e3114d65a316d36f45e60'), size_on_disk=48455505, blob_last_accessed=1722954768.0409808, blob_last_modified=1722954767.9221504)}), refs=frozenset({'main'}), last_modified=1722954767.9221504)}), last_accessed=1722954768.0409808, last_modified=1722954767.9221504)})"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
"execution_count": 15,
|
137 |
+
"metadata": {},
|
138 |
+
"output_type": "execute_result"
|
139 |
+
}
|
140 |
+
],
|
141 |
+
"source": [
|
142 |
+
"scan_cache_dir().repos"
|
143 |
+
]
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"cell_type": "code",
|
147 |
+
"execution_count": 17,
|
148 |
+
"metadata": {},
|
149 |
+
"outputs": [
|
150 |
+
{
|
151 |
+
"name": "stdout",
|
152 |
+
"output_type": "stream",
|
153 |
+
"text": [
|
154 |
+
"facebook/sam2-hiera-base-plus\n",
|
155 |
+
"caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n",
|
156 |
+
"amaye15/receipts\n",
|
157 |
+
"amaye15/DaViT-Florence-2-large-ft\n",
|
158 |
+
"amaye15/tmp\n",
|
159 |
+
"amaye15/Products-10k\n",
|
160 |
+
"amaye15/invoices\n",
|
161 |
+
"microsoft/Florence-2-large-ft\n",
|
162 |
+
"microsoft/swinv2-base-patch4-window16-256\n",
|
163 |
+
"thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n",
|
164 |
+
"amaye15/SwinV2-Base-Document-Classifier\n",
|
165 |
+
"amaye15/DaViT\n",
|
166 |
+
"facebook/sam2-hiera-large\n",
|
167 |
+
"facebook/bart-large\n",
|
168 |
+
"amaye15/NSFW\n"
|
169 |
+
]
|
170 |
+
}
|
171 |
+
],
|
172 |
+
"source": [
|
173 |
+
"for r in repo_info:\n",
|
174 |
+
" #if r.repo_n == DS_NAME:\n",
|
175 |
+
"\n",
|
176 |
+
" print(r.repo_id)"
|
177 |
+
]
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"cell_type": "code",
|
181 |
+
"execution_count": 2,
|
182 |
+
"metadata": {},
|
183 |
+
"outputs": [
|
184 |
+
{
|
185 |
+
"data": {
|
186 |
+
"application/vnd.jupyter.widget-view+json": {
|
187 |
+
"model_id": "825736a8246f4fb593f4847c5c2268b6",
|
188 |
+
"version_major": 2,
|
189 |
+
"version_minor": 0
|
190 |
+
},
|
191 |
+
"text/plain": [
|
192 |
+
"Downloading readme: 0%| | 0.00/5.24k [00:00<?, ?B/s]"
|
193 |
+
]
|
194 |
+
},
|
195 |
+
"metadata": {},
|
196 |
+
"output_type": "display_data"
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"data": {
|
200 |
+
"application/vnd.jupyter.widget-view+json": {
|
201 |
+
"model_id": "699d58320ad6465697460490bfffaf65",
|
202 |
+
"version_major": 2,
|
203 |
+
"version_minor": 0
|
204 |
+
},
|
205 |
+
"text/plain": [
|
206 |
+
"Resolving data files: 0%| | 0/38 [00:00<?, ?it/s]"
|
207 |
+
]
|
208 |
+
},
|
209 |
+
"metadata": {},
|
210 |
+
"output_type": "display_data"
|
211 |
+
}
|
212 |
+
],
|
213 |
+
"source": [
|
214 |
+
"ds = load_dataset(DS_NAME, cache_dir=p, streaming=True)"
|
215 |
+
]
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"cell_type": "code",
|
219 |
+
"execution_count": 4,
|
220 |
+
"metadata": {},
|
221 |
+
"outputs": [
|
222 |
+
{
|
223 |
+
"ename": "AttributeError",
|
224 |
+
"evalue": "'IterableDataset' object has no attribute 'cleanup_cache_files'",
|
225 |
+
"output_type": "error",
|
226 |
+
"traceback": [
|
227 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
228 |
+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
229 |
+
"Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mds\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcleanup_cache_files\u001b[49m()\n",
|
230 |
+
"\u001b[0;31mAttributeError\u001b[0m: 'IterableDataset' object has no attribute 'cleanup_cache_files'"
|
231 |
+
]
|
232 |
+
}
|
233 |
+
],
|
234 |
+
"source": [
|
235 |
+
"ds[\"train\"].cleanup_cache_files()"
|
236 |
+
]
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"cell_type": "code",
|
240 |
+
"execution_count": 3,
|
241 |
+
"metadata": {},
|
242 |
+
"outputs": [
|
243 |
+
{
|
244 |
+
"data": {
|
245 |
+
"text/plain": [
|
246 |
+
"False"
|
247 |
+
]
|
248 |
+
},
|
249 |
+
"execution_count": 3,
|
250 |
+
"metadata": {},
|
251 |
+
"output_type": "execute_result"
|
252 |
+
}
|
253 |
+
],
|
254 |
+
"source": [
|
255 |
+
"os.path.exists(os.path.join(os.getcwd(), \"data\"))"
|
256 |
+
]
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"cell_type": "code",
|
260 |
+
"execution_count": 2,
|
261 |
+
"metadata": {},
|
262 |
+
"outputs": [
|
263 |
+
{
|
264 |
+
"data": {
|
265 |
+
"text/plain": [
|
266 |
+
"Dataset({\n",
|
267 |
+
" features: ['image', 'masked_image', 'mask'],\n",
|
268 |
+
" num_rows: 37\n",
|
269 |
+
"})"
|
270 |
+
]
|
271 |
+
},
|
272 |
+
"execution_count": 2,
|
273 |
+
"metadata": {},
|
274 |
+
"output_type": "execute_result"
|
275 |
+
}
|
276 |
+
],
|
277 |
+
"source": [
|
278 |
+
"ds_processed"
|
279 |
+
]
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"cell_type": "code",
|
283 |
+
"execution_count": 2,
|
284 |
+
"metadata": {},
|
285 |
+
"outputs": [
|
286 |
+
{
|
287 |
+
"ename": "AttributeError",
|
288 |
+
"evalue": "type object 'DatasetDict' has no attribute 'get_cache_files_size'",
|
289 |
+
"output_type": "error",
|
290 |
+
"traceback": [
|
291 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
292 |
+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
293 |
+
"Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DatasetDict\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Get the cache size\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m cache_size \u001b[38;5;241m=\u001b[39m \u001b[43mDatasetDict\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cache_files_size\u001b[49m()\n\u001b[1;32m 6\u001b[0m cache_size\n",
|
294 |
+
"\u001b[0;31mAttributeError\u001b[0m: type object 'DatasetDict' has no attribute 'get_cache_files_size'"
|
295 |
+
]
|
296 |
+
}
|
297 |
+
],
|
298 |
+
"source": [
|
299 |
+
"from datasets import DatasetDict\n",
|
300 |
+
"\n",
|
301 |
+
"# Get the cache size\n",
|
302 |
+
"cache_size = DatasetDict.get_cache_files_size()\n",
|
303 |
+
"\n",
|
304 |
+
"cache_size"
|
305 |
+
]
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"cell_type": "code",
|
309 |
+
"execution_count": null,
|
310 |
+
"metadata": {},
|
311 |
+
"outputs": [],
|
312 |
+
"source": []
|
313 |
+
}
|
314 |
+
],
|
315 |
+
"metadata": {
|
316 |
+
"kernelspec": {
|
317 |
+
"display_name": "env",
|
318 |
+
"language": "python",
|
319 |
+
"name": "python3"
|
320 |
+
},
|
321 |
+
"language_info": {
|
322 |
+
"codemirror_mode": {
|
323 |
+
"name": "ipython",
|
324 |
+
"version": 3
|
325 |
+
},
|
326 |
+
"file_extension": ".py",
|
327 |
+
"mimetype": "text/x-python",
|
328 |
+
"name": "python",
|
329 |
+
"nbconvert_exporter": "python",
|
330 |
+
"pygments_lexer": "ipython3",
|
331 |
+
"version": "3.12.4"
|
332 |
+
}
|
333 |
+
},
|
334 |
+
"nbformat": 4,
|
335 |
+
"nbformat_minor": 2
|
336 |
+
}
|
old-app.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
import logging
|
4 |
+
import pretty_errors
|
5 |
+
import huggingface_hub
|
6 |
+
from datasets import Dataset, load_dataset, disable_caching
|
7 |
+
import schedule
|
8 |
+
import time
|
9 |
+
|
10 |
+
disable_caching()
|
11 |
+
|
12 |
+
# Set up the logger
|
13 |
+
logger = logging.getLogger("basic_logger")
|
14 |
+
logger.setLevel(logging.INFO)
|
15 |
+
|
16 |
+
# Set up the console handler with a simple format
|
17 |
+
console_handler = logging.StreamHandler()
|
18 |
+
console_handler.setLevel(logging.INFO)
|
19 |
+
formatter = logging.Formatter(
|
20 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
21 |
+
) # Corrected the format string
|
22 |
+
console_handler.setFormatter(formatter)
|
23 |
+
logger.addHandler(console_handler)
|
24 |
+
|
25 |
+
DS_NAME = "amaye15/object-segmentation"
|
26 |
+
DATA_DIR = "data"
|
27 |
+
|
28 |
+
|
29 |
+
def get_data():
|
30 |
+
ds = load_dataset(
|
31 |
+
DS_NAME,
|
32 |
+
cache_dir=os.path.join(os.getcwd(), DATA_DIR),
|
33 |
+
streaming=True,
|
34 |
+
download_mode="force_redownload",
|
35 |
+
)
|
36 |
+
for row in ds["train"]:
|
37 |
+
yield row
|
38 |
+
|
39 |
+
|
40 |
+
def process_and_push_data():
|
41 |
+
p = os.path.join(os.getcwd(), DATA_DIR)
|
42 |
+
|
43 |
+
if os.path.exists(p):
|
44 |
+
shutil.rmtree(p)
|
45 |
+
|
46 |
+
os.mkdir(p)
|
47 |
+
|
48 |
+
ds_processed = Dataset.from_generator(get_data)
|
49 |
+
ds_processed.push_to_hub("amaye15/tmp")
|
50 |
+
# logger.info("Data processed and pushed to the hub.")
|
51 |
+
|
52 |
+
|
53 |
+
# Schedule the task to run every minute
|
54 |
+
schedule.every(1).minute.do(process_and_push_data) # Corrected to pass the function
|
55 |
+
|
56 |
+
# Run the scheduler
|
57 |
+
while True:
|
58 |
+
schedule.run_pending()
|
59 |
+
time.sleep(1)
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
huggingface_hub
|
3 |
+
datasets
|
4 |
+
pretty_errors
|