ffreemt commited on
Commit
c5a9402
·
1 Parent(s): af413b3

Update config.yaml litellm_settings

Browse files
Files changed (1) hide show
  1. config.yaml +13 -5
config.yaml CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  model_list:
2
  # northflank
3
  - model_name: gpt-4o-mini-northflank
@@ -31,7 +35,7 @@ model_list:
31
  api_key: os.environ/GEMINI_API_KEY1
32
  rpm: 4 # 2 * # of keys
33
  tpm: 6400 # 32,000 * # of keys
34
-
35
  - model_name: gemini-1.5-flash-k1
36
  litellm_params:
37
  model: gemini/gemini-1.5-flash
@@ -42,7 +46,7 @@ model_list:
42
  api_key: os.environ/GEMINI_API_KEY1
43
  rpm: 30 # 15 * # of keys
44
  tpm: 2000000 # 1,000,000 * # of keys
45
-
46
  - model_name: gemini-1.5-pro-k2
47
  litellm_params:
48
  model: gemini/gemini-1.5-pro
@@ -53,7 +57,7 @@ model_list:
53
  api_key: os.environ/GEMINI_API_KEY
54
  rpm: 4 # 2 * # of keys
55
  tpm: 6400 # 32,000 * # of keys
56
-
57
  - model_name: gemini-1.5-flash-k2
58
  litellm_params:
59
  model: gemini/gemini-1.5-flash
@@ -68,7 +72,11 @@ model_list:
68
  litellm_settings:
69
  # Networking settings
70
  request_timeout: 20 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
71
-
72
- general_settings:
 
 
 
 
73
  master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)
74
  routing_strategy: simple-shuffle
 
1
+ # aistudio gemini free, Rate Limits https://ai.google.dev/pricing#1_5pro
2
+ # gemini-1.5-pro: 2 rpm, 32,000 tpm, 1,500 RPD
3
+ # gemini-1.5-flash: 15 rpm, 1,000,000 tpm, 50 RPD
4
+
5
  model_list:
6
  # northflank
7
  - model_name: gpt-4o-mini-northflank
 
35
  api_key: os.environ/GEMINI_API_KEY1
36
  rpm: 4 # 2 * # of keys
37
  tpm: 6400 # 32,000 * # of keys
38
+
39
  - model_name: gemini-1.5-flash-k1
40
  litellm_params:
41
  model: gemini/gemini-1.5-flash
 
46
  api_key: os.environ/GEMINI_API_KEY1
47
  rpm: 30 # 15 * # of keys
48
  tpm: 2000000 # 1,000,000 * # of keys
49
+
50
  - model_name: gemini-1.5-pro-k2
51
  litellm_params:
52
  model: gemini/gemini-1.5-pro
 
57
  api_key: os.environ/GEMINI_API_KEY
58
  rpm: 4 # 2 * # of keys
59
  tpm: 6400 # 32,000 * # of keys
60
+
61
  - model_name: gemini-1.5-flash-k2
62
  litellm_params:
63
  model: gemini/gemini-1.5-flash
 
72
  litellm_settings:
73
  # Networking settings
74
  request_timeout: 20 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
75
+ num_retries: 3
76
+ fallbacks: [{"gemini-1.5-pro": ["gpt-4o-mini"]}]
77
+ allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
78
+ cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
79
+
80
+ general_settings:
81
  master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)
82
  routing_strategy: simple-shuffle