SaulLu commited on
Commit
c0ec2ff
·
1 Parent(s): 2e0cc12

speed get_profiles

Browse files
dashboard_utils/bubbles.py CHANGED
@@ -1,13 +1,16 @@
1
  import datetime
2
  from urllib import parse
3
 
 
 
4
  import requests
5
  import wandb
 
6
 
7
  URL_QUICKSEARCH = "https://huggingface.co/api/quicksearch?"
8
  WANDB_REPO = "learning-at-home/Worker_logs"
9
 
10
-
11
  def get_new_bubble_data():
12
  serialized_data_points, latest_timestamp = get_serialized_data_points()
13
  serialized_data = get_serialized_data(serialized_data_points, latest_timestamp)
@@ -15,26 +18,38 @@ def get_new_bubble_data():
15
 
16
  return serialized_data, profiles
17
 
18
-
19
  def get_profiles(serialized_data_points):
20
  profiles = []
21
- for username in serialized_data_points.keys():
22
- params = {"type": "user", "q": username}
23
- new_url = URL_QUICKSEARCH + parse.urlencode(params)
24
- r = requests.get(new_url)
25
- response = r.json()
26
- try:
27
- avatarUrl = response["users"][0]["avatarUrl"]
28
- except:
29
- avatarUrl = "/avatars/57584cb934354663ac65baa04e6829bf.svg"
30
- if avatarUrl.startswith("/avatars/"):
31
- avatarUrl = f"https://huggingface.co{avatarUrl}"
32
- profiles.append(
33
- {"id": username, "name": username, "src": avatarUrl, "url": f"https://huggingface.co/{username}"}
34
- )
35
- return profiles
 
 
 
 
36
 
 
 
37
 
 
 
 
 
 
 
38
  def get_serialized_data_points():
39
 
40
  api = wandb.Api()
@@ -42,7 +57,6 @@ def get_serialized_data_points():
42
 
43
  serialized_data_points = {}
44
  latest_timestamp = None
45
- print("**start api call")
46
  for run in runs:
47
  run_summary = run.summary._json_dict
48
  run_name = run.name
@@ -87,10 +101,9 @@ def get_serialized_data_points():
87
  # print(e)
88
  # print([key for key in list(run_summary.keys()) if "gradients" not in key])
89
  latest_timestamp = datetime.datetime.utcfromtimestamp(latest_timestamp)
90
- print("**finish api call")
91
  return serialized_data_points, latest_timestamp
92
 
93
-
94
  def get_serialized_data(serialized_data_points, latest_timestamp):
95
  serialized_data_points_v2 = []
96
  max_velocity = 1
 
1
  import datetime
2
  from urllib import parse
3
 
4
+ from concurrent.futures import as_completed
5
+ from requests_futures.sessions import FuturesSession
6
  import requests
7
  import wandb
8
+ from dashboard_utils.time_tracker import simple_time_tracker, _log
9
 
10
  URL_QUICKSEARCH = "https://huggingface.co/api/quicksearch?"
11
  WANDB_REPO = "learning-at-home/Worker_logs"
12
 
13
+ @simple_time_tracker(_log)
14
  def get_new_bubble_data():
15
  serialized_data_points, latest_timestamp = get_serialized_data_points()
16
  serialized_data = get_serialized_data(serialized_data_points, latest_timestamp)
 
18
 
19
  return serialized_data, profiles
20
 
21
+ @simple_time_tracker(_log)
22
  def get_profiles(serialized_data_points):
23
  profiles = []
24
+ anonymous_taken = False
25
+ with FuturesSession() as session:
26
+ futures=[]
27
+ for username in serialized_data_points.keys():
28
+ future = session.get(URL_QUICKSEARCH + parse.urlencode({"type": "user", "q": username}))
29
+ future.username = username
30
+ futures.append(future)
31
+ for future in as_completed(futures):
32
+ resp = future.result()
33
+ username = future.username
34
+ response = resp.json()
35
+ avatarUrl = None
36
+ if response["users"]:
37
+ for user_candidate in response["users"]:
38
+ if user_candidate['user'] == username:
39
+ avatarUrl = response["users"][0]["avatarUrl"]
40
+ break
41
+ if not avatarUrl:
42
+ avatarUrl = "/avatars/57584cb934354663ac65baa04e6829bf.svg"
43
 
44
+ if avatarUrl.startswith("/avatars/"):
45
+ avatarUrl = f"https://huggingface.co{avatarUrl}"
46
 
47
+ profiles.append(
48
+ {"id": username, "name": username, "src": avatarUrl, "url": f"https://huggingface.co/{username}"}
49
+ )
50
+ return profiles
51
+
52
+ @simple_time_tracker(_log)
53
  def get_serialized_data_points():
54
 
55
  api = wandb.Api()
 
57
 
58
  serialized_data_points = {}
59
  latest_timestamp = None
 
60
  for run in runs:
61
  run_summary = run.summary._json_dict
62
  run_name = run.name
 
101
  # print(e)
102
  # print([key for key in list(run_summary.keys()) if "gradients" not in key])
103
  latest_timestamp = datetime.datetime.utcfromtimestamp(latest_timestamp)
 
104
  return serialized_data_points, latest_timestamp
105
 
106
+ @simple_time_tracker(_log)
107
  def get_serialized_data(serialized_data_points, latest_timestamp):
108
  serialized_data_points_v2 = []
109
  max_velocity = 1
dashboard_utils/main_metrics.py CHANGED
@@ -1,7 +1,9 @@
 
1
  import wandb
2
 
3
  WANDB_REPO = "learning-at-home/Main_metrics"
4
 
 
5
  def get_main_metrics():
6
  api = wandb.Api()
7
  runs = api.runs(WANDB_REPO)
 
1
+ from dashboard_utils.time_tracker import simple_time_tracker, _log
2
  import wandb
3
 
4
  WANDB_REPO = "learning-at-home/Main_metrics"
5
 
6
+ @simple_time_tracker(_log)
7
  def get_main_metrics():
8
  api = wandb.Api()
9
  runs = api.runs(WANDB_REPO)
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  streamlit
2
- wandb
 
 
1
  streamlit
2
+ wandb
3
+ requests_futures