tricktreat commited on
Commit
5471e91
·
1 Parent(s): f3e41d6

text to video

Browse files
Files changed (4) hide show
  1. app.py +4 -2
  2. awesome_chat.py +9 -4
  3. config.gradio.yaml +1 -1
  4. models_server.py +24 -24
app.py CHANGED
@@ -115,7 +115,8 @@ with gr.Blocks() as demo:
115
  openai_api_key = gr.Textbox(
116
  show_label=False,
117
  placeholder="Set your OpenAI API key here and press Enter",
118
- lines=1
 
119
  ).style(container=False)
120
  with gr.Column(scale=0.15, min_width=0):
121
  btn1 = gr.Button("Submit").style(full_height=True)
@@ -125,7 +126,8 @@ with gr.Blocks() as demo:
125
  hugging_face_token = gr.Textbox(
126
  show_label=False,
127
  placeholder="Set your Hugging Face Token here and press Enter",
128
- lines=1
 
129
  ).style(container=False)
130
  with gr.Column(scale=0.15, min_width=0):
131
  btn3 = gr.Button("Submit").style(full_height=True)
 
115
  openai_api_key = gr.Textbox(
116
  show_label=False,
117
  placeholder="Set your OpenAI API key here and press Enter",
118
+ lines=1,
119
+ type="password"
120
  ).style(container=False)
121
  with gr.Column(scale=0.15, min_width=0):
122
  btn1 = gr.Button("Submit").style(full_height=True)
 
126
  hugging_face_token = gr.Textbox(
127
  show_label=False,
128
  placeholder="Set your Hugging Face Token here and press Enter",
129
+ lines=1,
130
+ type="password"
131
  ).style(container=False)
132
  with gr.Column(scale=0.15, min_width=0):
133
  btn3 = gr.Button("Submit").style(full_height=True)
awesome_chat.py CHANGED
@@ -152,6 +152,8 @@ def send_request(data):
152
 
153
  response = requests.post(endpoint, json=data, headers=HEADER, proxies=PROXY)
154
  logger.debug(response.text.strip())
 
 
155
  if use_completion:
156
  return response.json()["choices"][0]["text"].strip()
157
  else:
@@ -576,14 +578,14 @@ def model_inference(model_id, data, hosted_on, task, huggingfacetoken=None):
576
  HUGGINGFACE_HEADERS = None
577
  if hosted_on == "unknown":
578
  r = status(model_id)
579
- logger.debug("Local Server Status: " + str(r.json()))
580
- if r.status_code == 200 and "loaded" in r.json() and r.json()["loaded"]:
581
  hosted_on = "local"
582
  else:
583
  huggingfaceStatusUrl = f"https://api-inference.huggingface.co/status/{model_id}"
584
  r = requests.get(huggingfaceStatusUrl, headers=HUGGINGFACE_HEADERS, proxies=PROXY)
585
  logger.debug("Huggingface Status: " + str(r.json()))
586
- if r.status_code == 200 and "loaded" in r.json() and r.json()["loaded"]:
587
  hosted_on = "huggingface"
588
  try:
589
  if hosted_on == "local":
@@ -603,7 +605,7 @@ def get_model_status(model_id, url, headers, queue = None):
603
  r = requests.get(url, headers=headers, proxies=PROXY)
604
  else:
605
  r = status(model_id)
606
- if r.status_code == 200 and "loaded" in r.json() and r.json()["loaded"]:
607
  if queue:
608
  queue.put((model_id, True, endpoint_type))
609
  return True
@@ -836,6 +838,9 @@ def chat_huggingface(messages, openaikey = None, huggingfacetoken = None, return
836
  task_str = parse_task(context, input, openaikey).strip()
837
  logger.info(task_str)
838
 
 
 
 
839
  if task_str == "[]": # using LLM response for empty task
840
  record_case(success=False, **{"input": input, "task": [], "reason": "task parsing fail: empty", "op": "chitchat"})
841
  response = chitchat(messages, openaikey)
 
152
 
153
  response = requests.post(endpoint, json=data, headers=HEADER, proxies=PROXY)
154
  logger.debug(response.text.strip())
155
+ if "choices" not in response.json():
156
+ return response.json()
157
  if use_completion:
158
  return response.json()["choices"][0]["text"].strip()
159
  else:
 
578
  HUGGINGFACE_HEADERS = None
579
  if hosted_on == "unknown":
580
  r = status(model_id)
581
+ logger.debug("Local Server Status: " + str(r))
582
+ if "loaded" in r and r["loaded"]:
583
  hosted_on = "local"
584
  else:
585
  huggingfaceStatusUrl = f"https://api-inference.huggingface.co/status/{model_id}"
586
  r = requests.get(huggingfaceStatusUrl, headers=HUGGINGFACE_HEADERS, proxies=PROXY)
587
  logger.debug("Huggingface Status: " + str(r.json()))
588
+ if "loaded" in r and r["loaded"]:
589
  hosted_on = "huggingface"
590
  try:
591
  if hosted_on == "local":
 
605
  r = requests.get(url, headers=headers, proxies=PROXY)
606
  else:
607
  r = status(model_id)
608
+ if "loaded" in r and r["loaded"]:
609
  if queue:
610
  queue.put((model_id, True, endpoint_type))
611
  return True
 
838
  task_str = parse_task(context, input, openaikey).strip()
839
  logger.info(task_str)
840
 
841
+ if "error" in task_str:
842
+ return {"message": "You exceeded your current quota, please check your plan and billing details."}
843
+
844
  if task_str == "[]": # using LLM response for empty task
845
  record_case(success=False, **{"input": input, "task": [], "reason": "task parsing fail: empty", "op": "chitchat"})
846
  response = chitchat(messages, openaikey)
config.gradio.yaml CHANGED
@@ -8,7 +8,7 @@ log_file: logs/debug.log
8
  model: text-davinci-003 # text-davinci-003
9
  use_completion: true
10
  inference_mode: hybrid # local, huggingface or hybrid
11
- local_deployment: minimal # minimal, standard or full
12
  num_candidate_models: 5
13
  max_description_length: 100
14
  proxy:
 
8
  model: text-davinci-003 # text-davinci-003
9
  use_completion: true
10
  inference_mode: hybrid # local, huggingface or hybrid
11
+ local_deployment: full # minimal, standard or full
12
  num_candidate_models: 5
13
  max_description_length: 100
14
  proxy:
models_server.py CHANGED
@@ -78,9 +78,9 @@ def load_pipes(local_deployment):
78
  if local_deployment in ["full"]:
79
  other_pipes = {
80
  "nlpconnect/vit-gpt2-image-captioning":{
81
- "model": VisionEncoderDecoderModel.from_pretrained(f"nlpconnect/vit-gpt2-image-captioning"),
82
- "feature_extractor": ViTImageProcessor.from_pretrained(f"nlpconnect/vit-gpt2-image-captioning"),
83
- "tokenizer": AutoTokenizer.from_pretrained(f"nlpconnect/vit-gpt2-image-captioning"),
84
  "device": "cuda:0"
85
  },
86
  # "Salesforce/blip-image-captioning-large": {
@@ -89,7 +89,7 @@ def load_pipes(local_deployment):
89
  # "device": "cuda:0"
90
  # },
91
  "damo-vilab/text-to-video-ms-1.7b": {
92
- "model": DiffusionPipeline.from_pretrained(f"damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16"),
93
  "device": "cuda:0"
94
  },
95
  # "facebook/maskformer-swin-large-ade": {
@@ -112,11 +112,11 @@ def load_pipes(local_deployment):
112
  "device": "cuda:0"
113
  },
114
  "espnet/kan-bayashi_ljspeech_vits": {
115
- "model": Text2Speech.from_pretrained(f"espnet/kan-bayashi_ljspeech_vits"),
116
  "device": "cuda:0"
117
  },
118
  "lambdalabs/sd-image-variations-diffusers": {
119
- "model": DiffusionPipeline.from_pretrained(f"lambdalabs/sd-image-variations-diffusers"), #torch_dtype=torch.float16
120
  "device": "cuda:0"
121
  },
122
  # "CompVis/stable-diffusion-v1-4": {
@@ -128,7 +128,7 @@ def load_pipes(local_deployment):
128
  # "device": "cuda:0"
129
  # },
130
  "runwayml/stable-diffusion-v1-5": {
131
- "model": DiffusionPipeline.from_pretrained(f"runwayml/stable-diffusion-v1-5"),
132
  "device": "cuda:0"
133
  },
134
  # "microsoft/speecht5_tts":{
@@ -143,10 +143,10 @@ def load_pipes(local_deployment):
143
  # "device": "cuda:0"
144
  # },
145
  "microsoft/speecht5_vc":{
146
- "processor": SpeechT5Processor.from_pretrained(f"microsoft/speecht5_vc"),
147
- "model": SpeechT5ForSpeechToSpeech.from_pretrained(f"microsoft/speecht5_vc"),
148
- "vocoder": SpeechT5HifiGan.from_pretrained(f"microsoft/speecht5_hifigan"),
149
- "embeddings_dataset": load_dataset(f"Matthijs/cmu-arctic-xvectors", split="validation"),
150
  "device": "cuda:0"
151
  },
152
  # "julien-c/wine-quality": {
@@ -158,13 +158,13 @@ def load_pipes(local_deployment):
158
  # "device": "cuda:0"
159
  # },
160
  "facebook/maskformer-swin-base-coco": {
161
- "feature_extractor": MaskFormerFeatureExtractor.from_pretrained(f"facebook/maskformer-swin-base-coco"),
162
- "model": MaskFormerForInstanceSegmentation.from_pretrained(f"facebook/maskformer-swin-base-coco"),
163
  "device": "cuda:0"
164
  },
165
  "Intel/dpt-hybrid-midas": {
166
- "model": DPTForDepthEstimation.from_pretrained(f"Intel/dpt-hybrid-midas", low_cpu_mem_usage=True),
167
- "feature_extractor": DPTFeatureExtractor.from_pretrained(f"Intel/dpt-hybrid-midas"),
168
  "device": "cuda:0"
169
  }
170
  }
@@ -176,15 +176,15 @@ def load_pipes(local_deployment):
176
  # "device": "cuda:0"
177
  # },
178
  "openai/whisper-base": {
179
- "model": pipeline(task="automatic-speech-recognition", model=f"openai/whisper-base"),
180
  "device": "cuda:0"
181
  },
182
  "microsoft/speecht5_asr": {
183
- "model": pipeline(task="automatic-speech-recognition", model=f"microsoft/speecht5_asr"),
184
  "device": "cuda:0"
185
  },
186
  "Intel/dpt-large": {
187
- "model": pipeline(task="depth-estimation", model=f"Intel/dpt-large"),
188
  "device": "cuda:0"
189
  },
190
  # "microsoft/beit-base-patch16-224-pt22k-ft22k": {
@@ -192,11 +192,11 @@ def load_pipes(local_deployment):
192
  # "device": "cuda:0"
193
  # },
194
  "facebook/detr-resnet-50-panoptic": {
195
- "model": pipeline(task="image-segmentation", model=f"facebook/detr-resnet-50-panoptic"),
196
  "device": "cuda:0"
197
  },
198
  "facebook/detr-resnet-101": {
199
- "model": pipeline(task="object-detection", model=f"facebook/detr-resnet-101"),
200
  "device": "cuda:0"
201
  },
202
  # "openai/clip-vit-large-patch14": {
@@ -204,7 +204,7 @@ def load_pipes(local_deployment):
204
  # "device": "cuda:0"
205
  # },
206
  "google/owlvit-base-patch32": {
207
- "model": pipeline(task="zero-shot-object-detection", model=f"google/owlvit-base-patch32"),
208
  "device": "cuda:0"
209
  },
210
  # "microsoft/DialoGPT-medium": {
@@ -248,15 +248,15 @@ def load_pipes(local_deployment):
248
  # "device": "cuda:0"
249
  # },
250
  "impira/layoutlm-document-qa": {
251
- "model": pipeline(task="document-question-answering", model=f"impira/layoutlm-document-qa"),
252
  "device": "cuda:0"
253
  },
254
  "ydshieh/vit-gpt2-coco-en": {
255
- "model": pipeline(task="image-to-text", model=f"ydshieh/vit-gpt2-coco-en"),
256
  "device": "cuda:0"
257
  },
258
  "dandelin/vilt-b32-finetuned-vqa": {
259
- "model": pipeline(task="visual-question-answering", model=f"dandelin/vilt-b32-finetuned-vqa"),
260
  "device": "cuda:0"
261
  }
262
  }
 
78
  if local_deployment in ["full"]:
79
  other_pipes = {
80
  "nlpconnect/vit-gpt2-image-captioning":{
81
+ "model": VisionEncoderDecoderModel.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
82
+ "feature_extractor": ViTImageProcessor.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
83
+ "tokenizer": AutoTokenizer.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
84
  "device": "cuda:0"
85
  },
86
  # "Salesforce/blip-image-captioning-large": {
 
89
  # "device": "cuda:0"
90
  # },
91
  "damo-vilab/text-to-video-ms-1.7b": {
92
+ "model": DiffusionPipeline.from_pretrained(f"{local_models}damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16"),
93
  "device": "cuda:0"
94
  },
95
  # "facebook/maskformer-swin-large-ade": {
 
112
  "device": "cuda:0"
113
  },
114
  "espnet/kan-bayashi_ljspeech_vits": {
115
+ "model": Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_vits"),
116
  "device": "cuda:0"
117
  },
118
  "lambdalabs/sd-image-variations-diffusers": {
119
+ "model": DiffusionPipeline.from_pretrained(f"{local_models}lambdalabs/sd-image-variations-diffusers"), #torch_dtype=torch.float16
120
  "device": "cuda:0"
121
  },
122
  # "CompVis/stable-diffusion-v1-4": {
 
128
  # "device": "cuda:0"
129
  # },
130
  "runwayml/stable-diffusion-v1-5": {
131
+ "model": DiffusionPipeline.from_pretrained(f"{local_models}runwayml/stable-diffusion-v1-5"),
132
  "device": "cuda:0"
133
  },
134
  # "microsoft/speecht5_tts":{
 
143
  # "device": "cuda:0"
144
  # },
145
  "microsoft/speecht5_vc":{
146
+ "processor": SpeechT5Processor.from_pretrained(f"{local_models}microsoft/speecht5_vc"),
147
+ "model": SpeechT5ForSpeechToSpeech.from_pretrained(f"{local_models}microsoft/speecht5_vc"),
148
+ "vocoder": SpeechT5HifiGan.from_pretrained(f"{local_models}microsoft/speecht5_hifigan"),
149
+ "embeddings_dataset": load_dataset(f"{local_models}Matthijs/cmu-arctic-xvectors", split="validation"),
150
  "device": "cuda:0"
151
  },
152
  # "julien-c/wine-quality": {
 
158
  # "device": "cuda:0"
159
  # },
160
  "facebook/maskformer-swin-base-coco": {
161
+ "feature_extractor": MaskFormerFeatureExtractor.from_pretrained(f"{local_models}facebook/maskformer-swin-base-coco"),
162
+ "model": MaskFormerForInstanceSegmentation.from_pretrained(f"{local_models}facebook/maskformer-swin-base-coco"),
163
  "device": "cuda:0"
164
  },
165
  "Intel/dpt-hybrid-midas": {
166
+ "model": DPTForDepthEstimation.from_pretrained(f"{local_models}Intel/dpt-hybrid-midas", low_cpu_mem_usage=True),
167
+ "feature_extractor": DPTFeatureExtractor.from_pretrained(f"{local_models}Intel/dpt-hybrid-midas"),
168
  "device": "cuda:0"
169
  }
170
  }
 
176
  # "device": "cuda:0"
177
  # },
178
  "openai/whisper-base": {
179
+ "model": pipeline(task="automatic-speech-recognition", model=f"{local_models}openai/whisper-base"),
180
  "device": "cuda:0"
181
  },
182
  "microsoft/speecht5_asr": {
183
+ "model": pipeline(task="automatic-speech-recognition", model=f"{local_models}microsoft/speecht5_asr"),
184
  "device": "cuda:0"
185
  },
186
  "Intel/dpt-large": {
187
+ "model": pipeline(task="depth-estimation", model=f"{local_models}Intel/dpt-large"),
188
  "device": "cuda:0"
189
  },
190
  # "microsoft/beit-base-patch16-224-pt22k-ft22k": {
 
192
  # "device": "cuda:0"
193
  # },
194
  "facebook/detr-resnet-50-panoptic": {
195
+ "model": pipeline(task="image-segmentation", model=f"{local_models}facebook/detr-resnet-50-panoptic"),
196
  "device": "cuda:0"
197
  },
198
  "facebook/detr-resnet-101": {
199
+ "model": pipeline(task="object-detection", model=f"{local_models}facebook/detr-resnet-101"),
200
  "device": "cuda:0"
201
  },
202
  # "openai/clip-vit-large-patch14": {
 
204
  # "device": "cuda:0"
205
  # },
206
  "google/owlvit-base-patch32": {
207
+ "model": pipeline(task="zero-shot-object-detection", model=f"{local_models}google/owlvit-base-patch32"),
208
  "device": "cuda:0"
209
  },
210
  # "microsoft/DialoGPT-medium": {
 
248
  # "device": "cuda:0"
249
  # },
250
  "impira/layoutlm-document-qa": {
251
+ "model": pipeline(task="document-question-answering", model=f"{local_models}impira/layoutlm-document-qa"),
252
  "device": "cuda:0"
253
  },
254
  "ydshieh/vit-gpt2-coco-en": {
255
+ "model": pipeline(task="image-to-text", model=f"{local_models}ydshieh/vit-gpt2-coco-en"),
256
  "device": "cuda:0"
257
  },
258
  "dandelin/vilt-b32-finetuned-vqa": {
259
+ "model": pipeline(task="visual-question-answering", model=f"{local_models}dandelin/vilt-b32-finetuned-vqa"),
260
  "device": "cuda:0"
261
  }
262
  }