davidberenstein1957 HF staff commited on
Commit
2b4b309
·
1 Parent(s): e4b6cc5

feat: add dataset upload

Browse files
.DS_Store ADDED
Binary file (8.2 kB). View file
 
app.py CHANGED
@@ -5,8 +5,9 @@ from distilabel_dataset_generator.sft import demo
5
  demo = gr.TabbedInterface(
6
  [demo],
7
  ["Supervised Fine-Tuning"],
8
- title="⚗️ Distilabel Dataset Generator",
9
  head="⚗️ Distilabel Dataset Generator",
10
  )
11
 
12
- demo.launch()
 
 
5
  demo = gr.TabbedInterface(
6
  [demo],
7
  ["Supervised Fine-Tuning"],
8
+ title="Distilabel Dataset Generator",
9
  head="⚗️ Distilabel Dataset Generator",
10
  )
11
 
12
+ if __name__ == "__main__":
13
+ demo.launch()
assets/logo.png ADDED
assets/logo.svg ADDED
pdm.lock CHANGED
@@ -5,7 +5,7 @@
5
  groups = ["default"]
6
  strategy = ["inherit_metadata"]
7
  lock_version = "4.5.0"
8
- content_hash = "sha256:ce806528890aa0c2a97087541df0aca8ccc9f07f2df4c66877e0ad764dcc69d1"
9
 
10
  [[metadata.targets]]
11
  requires_python = ">=3.10"
@@ -185,6 +185,20 @@ files = [
185
  {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"},
186
  ]
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  [[package]]
189
  name = "certifi"
190
  version = "2024.8.30"
@@ -196,6 +210,66 @@ files = [
196
  {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
197
  ]
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  [[package]]
200
  name = "charset-normalizer"
201
  version = "3.3.2"
@@ -343,6 +417,41 @@ files = [
343
  {file = "contourpy-1.3.0.tar.gz", hash = "sha256:7ffa0db17717a8ffb127efd0c95a4362d996b892c2904db72428d5b52e1938a4"},
344
  ]
345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  [[package]]
347
  name = "cycler"
348
  version = "0.12.1"
@@ -663,6 +772,23 @@ files = [
663
  {file = "gradio_client-1.3.0.tar.gz", hash = "sha256:d904afeae4f5682add0a6a263542c10e7669ff6c9de0a53a5c2fc9b719a24bb8"},
664
  ]
665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
  [[package]]
667
  name = "h11"
668
  version = "0.14.0"
@@ -755,6 +881,17 @@ files = [
755
  {file = "importlib_resources-6.4.4.tar.gz", hash = "sha256:20600c8b7361938dc0bb2d5ec0297802e575df486f5a544fa414da65e13721f7"},
756
  ]
757
 
 
 
 
 
 
 
 
 
 
 
 
758
  [[package]]
759
  name = "jinja2"
760
  version = "3.1.4"
@@ -1328,6 +1465,18 @@ files = [
1328
  {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"},
1329
  ]
1330
 
 
 
 
 
 
 
 
 
 
 
 
 
1331
  [[package]]
1332
  name = "pydantic"
1333
  version = "2.9.1"
 
5
  groups = ["default"]
6
  strategy = ["inherit_metadata"]
7
  lock_version = "4.5.0"
8
+ content_hash = "sha256:957e0276f679a9f7e65c68e3dbd1a1565f4b515468943faf6bdeb36b65a271a0"
9
 
10
  [[metadata.targets]]
11
  requires_python = ">=3.10"
 
185
  {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"},
186
  ]
187
 
188
+ [[package]]
189
+ name = "authlib"
190
+ version = "1.3.2"
191
+ requires_python = ">=3.8"
192
+ summary = "The ultimate Python library in building OAuth and OpenID Connect servers and clients."
193
+ groups = ["default"]
194
+ dependencies = [
195
+ "cryptography",
196
+ ]
197
+ files = [
198
+ {file = "Authlib-1.3.2-py2.py3-none-any.whl", hash = "sha256:ede026a95e9f5cdc2d4364a52103f5405e75aa156357e831ef2bfd0bc5094dfc"},
199
+ {file = "authlib-1.3.2.tar.gz", hash = "sha256:4b16130117f9eb82aa6eec97f6dd4673c3f960ac0283ccdae2897ee4bc030ba2"},
200
+ ]
201
+
202
  [[package]]
203
  name = "certifi"
204
  version = "2024.8.30"
 
210
  {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
211
  ]
212
 
213
+ [[package]]
214
+ name = "cffi"
215
+ version = "1.17.1"
216
+ requires_python = ">=3.8"
217
+ summary = "Foreign Function Interface for Python calling C code."
218
+ groups = ["default"]
219
+ marker = "platform_python_implementation != \"PyPy\""
220
+ dependencies = [
221
+ "pycparser",
222
+ ]
223
+ files = [
224
+ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
225
+ {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
226
+ {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"},
227
+ {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"},
228
+ {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"},
229
+ {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"},
230
+ {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"},
231
+ {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"},
232
+ {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"},
233
+ {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"},
234
+ {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"},
235
+ {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"},
236
+ {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"},
237
+ {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"},
238
+ {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"},
239
+ {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"},
240
+ {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"},
241
+ {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"},
242
+ {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"},
243
+ {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"},
244
+ {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"},
245
+ {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"},
246
+ {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"},
247
+ {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"},
248
+ {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"},
249
+ {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"},
250
+ {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"},
251
+ {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"},
252
+ {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"},
253
+ {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"},
254
+ {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"},
255
+ {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"},
256
+ {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"},
257
+ {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"},
258
+ {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"},
259
+ {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"},
260
+ {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"},
261
+ {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"},
262
+ {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"},
263
+ {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"},
264
+ {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"},
265
+ {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"},
266
+ {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"},
267
+ {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"},
268
+ {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"},
269
+ {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"},
270
+ {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"},
271
+ ]
272
+
273
  [[package]]
274
  name = "charset-normalizer"
275
  version = "3.3.2"
 
417
  {file = "contourpy-1.3.0.tar.gz", hash = "sha256:7ffa0db17717a8ffb127efd0c95a4362d996b892c2904db72428d5b52e1938a4"},
418
  ]
419
 
420
+ [[package]]
421
+ name = "cryptography"
422
+ version = "43.0.1"
423
+ requires_python = ">=3.7"
424
+ summary = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
425
+ groups = ["default"]
426
+ dependencies = [
427
+ "cffi>=1.12; platform_python_implementation != \"PyPy\"",
428
+ ]
429
+ files = [
430
+ {file = "cryptography-43.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d"},
431
+ {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062"},
432
+ {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962"},
433
+ {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277"},
434
+ {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a"},
435
+ {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042"},
436
+ {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494"},
437
+ {file = "cryptography-43.0.1-cp37-abi3-win32.whl", hash = "sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2"},
438
+ {file = "cryptography-43.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d"},
439
+ {file = "cryptography-43.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d"},
440
+ {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806"},
441
+ {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85"},
442
+ {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c"},
443
+ {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1"},
444
+ {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa"},
445
+ {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4"},
446
+ {file = "cryptography-43.0.1-cp39-abi3-win32.whl", hash = "sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47"},
447
+ {file = "cryptography-43.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb"},
448
+ {file = "cryptography-43.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034"},
449
+ {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d"},
450
+ {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289"},
451
+ {file = "cryptography-43.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84"},
452
+ {file = "cryptography-43.0.1.tar.gz", hash = "sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d"},
453
+ ]
454
+
455
  [[package]]
456
  name = "cycler"
457
  version = "0.12.1"
 
772
  {file = "gradio_client-1.3.0.tar.gz", hash = "sha256:d904afeae4f5682add0a6a263542c10e7669ff6c9de0a53a5c2fc9b719a24bb8"},
773
  ]
774
 
775
+ [[package]]
776
+ name = "gradio"
777
+ version = "4.43.0"
778
+ extras = ["oauth"]
779
+ requires_python = ">=3.8"
780
+ summary = "Python library for easily interacting with trained machine learning models"
781
+ groups = ["default"]
782
+ dependencies = [
783
+ "authlib",
784
+ "gradio==4.43.0",
785
+ "itsdangerous",
786
+ ]
787
+ files = [
788
+ {file = "gradio-4.43.0-py3-none-any.whl", hash = "sha256:a8a785af95b7985d1d17287b1f79add3c913ed79180c4b89ef30cea375329bb9"},
789
+ {file = "gradio-4.43.0.tar.gz", hash = "sha256:0d510e98b7fcb5d829fdc9452dcfef9f060c86130c9e85a539b9e709ba460823"},
790
+ ]
791
+
792
  [[package]]
793
  name = "h11"
794
  version = "0.14.0"
 
881
  {file = "importlib_resources-6.4.4.tar.gz", hash = "sha256:20600c8b7361938dc0bb2d5ec0297802e575df486f5a544fa414da65e13721f7"},
882
  ]
883
 
884
+ [[package]]
885
+ name = "itsdangerous"
886
+ version = "2.2.0"
887
+ requires_python = ">=3.8"
888
+ summary = "Safely pass data to untrusted environments and back."
889
+ groups = ["default"]
890
+ files = [
891
+ {file = "itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef"},
892
+ {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"},
893
+ ]
894
+
895
  [[package]]
896
  name = "jinja2"
897
  version = "3.1.4"
 
1465
  {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"},
1466
  ]
1467
 
1468
+ [[package]]
1469
+ name = "pycparser"
1470
+ version = "2.22"
1471
+ requires_python = ">=3.8"
1472
+ summary = "C parser in Python"
1473
+ groups = ["default"]
1474
+ marker = "platform_python_implementation != \"PyPy\""
1475
+ files = [
1476
+ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
1477
+ {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
1478
+ ]
1479
+
1480
  [[package]]
1481
  name = "pydantic"
1482
  version = "2.9.1"
pyproject.toml CHANGED
@@ -7,7 +7,7 @@ authors = [
7
  ]
8
  dependencies = [
9
  "distilabel[hf-inference-endpoints]>=1.3.2",
10
- "gradio",
11
  "transformers>=4.44.2",
12
  ]
13
  requires-python = ">=3.10"
 
7
  ]
8
  dependencies = [
9
  "distilabel[hf-inference-endpoints]>=1.3.2",
10
+ "gradio[oauth]<5,>=4.38",
11
  "transformers>=4.44.2",
12
  ]
13
  requires-python = ">=3.10"
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  distilabel[hf-inference-endpoints]>=1.3.2
2
  transformers
3
- gradio
 
1
  distilabel[hf-inference-endpoints]>=1.3.2
2
  transformers
3
+ gradio[oauth]
src/distilabel_dataset_generator/sft.py CHANGED
@@ -1,9 +1,13 @@
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  from distilabel.llms import InferenceEndpointsLLM
4
  from distilabel.pipeline import Pipeline
5
  from distilabel.steps.tasks import MagpieGenerator, TextGeneration
6
 
 
 
7
  INFORMATION_SEEKING_PROMPT = (
8
  "You are an AI assistant designed to provide accurate and concise information on a wide"
9
  " range of topics. Your purpose is to assist users in finding specific facts,"
@@ -112,7 +116,7 @@ The prompt you write should follow the same style and structure as the following
112
  User dataset description:
113
  """
114
 
115
- MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
116
 
117
  generate_description = TextGeneration(
118
  llm=InferenceEndpointsLLM(
@@ -129,20 +133,7 @@ generate_description = TextGeneration(
129
  generate_description.load()
130
 
131
 
132
- def _generate_system_prompt(_dataset_description):
133
- return next(
134
- generate_description.process(
135
- [
136
- {
137
- "system_prompt": PROMPT_CREATION_PROMPT,
138
- "instruction": _dataset_description,
139
- }
140
- ]
141
- )
142
- )[0]["generation"]
143
-
144
-
145
- def _generate_dataset(_system_prompt, _num_turns=1, _num_rows=5):
146
  with Pipeline(name="sft") as pipeline:
147
  magpie_step = MagpieGenerator(
148
  llm=InferenceEndpointsLLM(
@@ -157,25 +148,69 @@ def _generate_dataset(_system_prompt, _num_turns=1, _num_rows=5):
157
  num_rows=_num_rows,
158
  system_prompt=_system_prompt,
159
  )
160
- magpie_step.load()
161
- if _num_turns == 1:
162
- outputs = {"instruction": [], "response": []}
163
- for _ in range(_num_rows):
164
- entry = next(magpie_step.process())[0][0]
165
- outputs["instruction"].append(entry["instruction"])
166
- outputs["response"].append(entry["response"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  else:
168
- outputs = {"conversation": []}
169
- for _ in range(_num_rows):
170
- entry = next(magpie_step.process())[0][0]
171
- outputs["conversation"].append(entry["conversation"])
172
- return pd.DataFrame(outputs)
 
 
 
 
 
 
 
 
 
 
173
 
174
 
175
  with gr.Blocks(
176
  title="⚗️ Distilabel Dataset Generator",
177
  head="⚗️ Distilabel Dataset Generator",
178
  ) as demo:
 
 
179
  dataset_description = gr.Textbox(
180
  label="Provide a description of the dataset",
181
  value="A chemistry dataset for an assistant that explains chemical reactions and formulas",
@@ -212,7 +247,7 @@ with gr.Blocks(
212
  )
213
  with gr.Column():
214
  num_rows = gr.Number(
215
- value=1, label="Number of rows in the dataset", minimum=1
216
  )
217
 
218
  dataset_name_push_to_hub = gr.Textbox(label="Dataset Name to push to Hub")
@@ -223,8 +258,7 @@ with gr.Blocks(
223
 
224
  btn_generate_full_dataset.click(
225
  fn=_generate_dataset,
226
- inputs=[system_prompt, num_turns, num_rows],
227
- outputs=[table],
228
  )
229
 
230
  demo
 
1
+ import multiprocessing
2
+
3
  import gradio as gr
4
  import pandas as pd
5
  from distilabel.llms import InferenceEndpointsLLM
6
  from distilabel.pipeline import Pipeline
7
  from distilabel.steps.tasks import MagpieGenerator, TextGeneration
8
 
9
+ from distilabel_dataset_generator.utils import OAuthToken, get_login_button
10
+
11
  INFORMATION_SEEKING_PROMPT = (
12
  "You are an AI assistant designed to provide accurate and concise information on a wide"
13
  " range of topics. Your purpose is to assist users in finding specific facts,"
 
116
  User dataset description:
117
  """
118
 
119
+ MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"
120
 
121
  generate_description = TextGeneration(
122
  llm=InferenceEndpointsLLM(
 
133
  generate_description.load()
134
 
135
 
136
+ def _run_pipeline(result_queue, _num_turns, _num_rows, _system_prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  with Pipeline(name="sft") as pipeline:
138
  magpie_step = MagpieGenerator(
139
  llm=InferenceEndpointsLLM(
 
148
  num_rows=_num_rows,
149
  system_prompt=_system_prompt,
150
  )
151
+ distiset = pipeline.run()
152
+ result_queue.put(distiset)
153
+
154
+
155
+ def _generate_system_prompt(_dataset_description):
156
+ return next(
157
+ generate_description.process(
158
+ [
159
+ {
160
+ "system_prompt": PROMPT_CREATION_PROMPT,
161
+ "instruction": _dataset_description,
162
+ }
163
+ ]
164
+ )
165
+ )[0]["generation"]
166
+
167
+
168
+ def _generate_dataset(
169
+ _system_prompt,
170
+ _num_turns=1,
171
+ _num_rows=5,
172
+ _dataset_name=None,
173
+ _token: OAuthToken = None,
174
+ ):
175
+ gr.Info("Started pipeline execution.")
176
+ result_queue = multiprocessing.Queue()
177
+ p = multiprocessing.Process(
178
+ target=_run_pipeline, args=(result_queue, _num_turns, _num_rows, _system_prompt)
179
+ )
180
+ p.start()
181
+ p.join()
182
+ distiset = result_queue.get()
183
+
184
+ if _dataset_name is not None:
185
+ gr.Info("Pushing dataset to Hugging Face Hub...")
186
+ distiset.push_to_hub(
187
+ repo_id=_dataset_name, private=False, include_script=True, token=_token
188
+ )
189
+ gr.Info("Dataset pushed to Hugging Face Hub: https://huggingface.co")
190
  else:
191
+ # If not pushing to hub, generate the dataset directly
192
+ distiset = distiset["default"]["train"]
193
+ if _num_turns == 1:
194
+ outputs = distiset.to_pandas()[["instruction", "response"]]
195
+ else:
196
+ outputs = {"conversation_id": [], "role": [], "content": []}
197
+ conversations = distiset["conversation"]
198
+ for idx, entry in enumerate(conversations):
199
+ for message in entry["conversation"]:
200
+ outputs["conversation_id"].append(idx + 1)
201
+ outputs["role"].append(message["role"])
202
+ outputs["content"].append(message["content"])
203
+ return pd.DataFrame(outputs)
204
+
205
+ return pd.DataFrame(distiset.to_pandas())
206
 
207
 
208
  with gr.Blocks(
209
  title="⚗️ Distilabel Dataset Generator",
210
  head="⚗️ Distilabel Dataset Generator",
211
  ) as demo:
212
+ get_login_button()
213
+
214
  dataset_description = gr.Textbox(
215
  label="Provide a description of the dataset",
216
  value="A chemistry dataset for an assistant that explains chemical reactions and formulas",
 
247
  )
248
  with gr.Column():
249
  num_rows = gr.Number(
250
+ value=100, label="Number of rows in the dataset", minimum=1
251
  )
252
 
253
  dataset_name_push_to_hub = gr.Textbox(label="Dataset Name to push to Hub")
 
258
 
259
  btn_generate_full_dataset.click(
260
  fn=_generate_dataset,
261
+ inputs=[system_prompt, num_turns, num_rows, dataset_name_push_to_hub],
 
262
  )
263
 
264
  demo
src/distilabel_dataset_generator/utils.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio.oauth import (
3
+ OAUTH_CLIENT_ID,
4
+ OAUTH_CLIENT_SECRET,
5
+ OAUTH_SCOPES,
6
+ OPENID_PROVIDER_URL,
7
+ get_space,
8
+ )
9
+
10
+ if (
11
+ all(
12
+ [
13
+ OAUTH_CLIENT_ID,
14
+ OAUTH_CLIENT_SECRET,
15
+ OAUTH_SCOPES,
16
+ OPENID_PROVIDER_URL,
17
+ ]
18
+ )
19
+ or get_space() is None
20
+ ):
21
+ from gradio.oauth import OAuthToken
22
+ else:
23
+ OAuthToken = str
24
+
25
+
26
+ def get_login_button():
27
+ if (
28
+ all(
29
+ [
30
+ OAUTH_CLIENT_ID,
31
+ OAUTH_CLIENT_SECRET,
32
+ OAUTH_SCOPES,
33
+ OPENID_PROVIDER_URL,
34
+ ]
35
+ )
36
+ or get_space() is None
37
+ ):
38
+ return gr.LoginButton(
39
+ value="Sign in with Hugging Face - a login will reset the data!",
40
+ size="lg",
41
+ )