neindochoh commited on
Commit
bfb1446
·
1 Parent(s): e478321

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. README.md +6 -5
  3. run.py +13 -3
Dockerfile CHANGED
@@ -1,6 +1,6 @@
1
  FROM python:3.10
2
 
3
- ARG SPOTLIGHT_VERSION=1.5.0rc1
4
 
5
  RUN useradd -m -u 1000 user
6
 
 
1
  FROM python:3.10
2
 
3
+ ARG SPOTLIGHT_VERSION=1.5.0
4
 
5
  RUN useradd -m -u 1000 user
6
 
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- title:
3
  emoji: 🔬
4
  colorFrom: indigo
5
  colorTo: green
6
  sdk: docker
7
  app_port: 7860
8
- # models: []
9
- # datasets: []
10
- # tags: []
11
  pinned: false
12
  license: mit
13
- ---
 
 
 
1
  ---
2
+ title: Spotlight beans
3
  emoji: 🔬
4
  colorFrom: indigo
5
  colorTo: green
6
  sdk: docker
7
  app_port: 7860
8
+ datasets: [beans, renumics/spotlight-beans-enrichment]
9
+ tags: [renumics, spotlight, EDA, enriched, data-centric-ai, viewer]
 
10
  pinned: false
11
  license: mit
12
+ ---
13
+
14
+ # Explore beans with [Renumics Spotlight](https://github.com/renumics/spotlight)!
run.py CHANGED
@@ -9,7 +9,7 @@ from typing import Optional
9
 
10
  import datasets
11
  import huggingface_hub
12
- from renumics import spotlight
13
 
14
 
15
  def login() -> None:
@@ -32,6 +32,7 @@ class HFSettings:
32
  revision: Optional[str] = None
33
 
34
  enrichment: Optional[str] = None
 
35
 
36
  @classmethod
37
  def from_environ(cls) -> "HFSettings":
@@ -50,6 +51,7 @@ class HFSettings:
50
  os.environ.get("HF_SPLIT") or None,
51
  os.environ.get("HF_REVISION") or None,
52
  os.environ.get("HF_ENRICHMENT") or None,
 
53
  )
54
 
55
  def __str__(self) -> str:
@@ -75,7 +77,7 @@ if __name__ == "__main__":
75
  hf_settings.enrichment,
76
  hf_settings.subset,
77
  split=hf_settings.split,
78
- revision=hf_settings.revision,
79
  )
80
  if len(ds_enrichment) != len(ds):
81
  raise RuntimeError(
@@ -83,6 +85,12 @@ if __name__ == "__main__":
83
  f"mismatches length of the original dataset ({len(ds)})"
84
  )
85
  ds = datasets.concatenate_datasets([ds, ds_enrichment], split=ds.split, axis=1)
 
 
 
 
 
 
86
  if not isinstance(ds, datasets.Dataset):
87
  raise TypeError(
88
  f"Loaded Hugging Face dataset is of type {type(ds)} instead of "
@@ -90,4 +98,6 @@ if __name__ == "__main__":
90
  "(use environment variables `HF_SUBSET` and `HF_SPLIT` respective)?"
91
  )
92
  print(f"Serving Hugging Face dataset {hf_settings}.")
93
- spotlight.show(ds, host="0.0.0.0", port=7860, wait="forever")
 
 
 
9
 
10
  import datasets
11
  import huggingface_hub
12
+ from renumics import spotlight # type: ignore
13
 
14
 
15
  def login() -> None:
 
32
  revision: Optional[str] = None
33
 
34
  enrichment: Optional[str] = None
35
+ enrichment_revision: Optional[str] = None
36
 
37
  @classmethod
38
  def from_environ(cls) -> "HFSettings":
 
51
  os.environ.get("HF_SPLIT") or None,
52
  os.environ.get("HF_REVISION") or None,
53
  os.environ.get("HF_ENRICHMENT") or None,
54
+ os.environ.get("HF_ENRICHMENT_REVISION") or None,
55
  )
56
 
57
  def __str__(self) -> str:
 
77
  hf_settings.enrichment,
78
  hf_settings.subset,
79
  split=hf_settings.split,
80
+ revision=hf_settings.enrichment_revision,
81
  )
82
  if len(ds_enrichment) != len(ds):
83
  raise RuntimeError(
 
85
  f"mismatches length of the original dataset ({len(ds)})"
86
  )
87
  ds = datasets.concatenate_datasets([ds, ds_enrichment], split=ds.split, axis=1)
88
+
89
+ dtypes = {}
90
+ for col in ds.column_names:
91
+ if "embedding" in col and isinstance(ds.features[col], datasets.Sequence):
92
+ dtypes[col] = spotlight.dtypes.embedding_dtype
93
+
94
  if not isinstance(ds, datasets.Dataset):
95
  raise TypeError(
96
  f"Loaded Hugging Face dataset is of type {type(ds)} instead of "
 
98
  "(use environment variables `HF_SUBSET` and `HF_SPLIT` respective)?"
99
  )
100
  print(f"Serving Hugging Face dataset {hf_settings}.")
101
+ spotlight.show(
102
+ ds, host="0.0.0.0", port=7860, wait="forever", dtype=dtypes, analyze=True
103
+ )