lamhieu commited on
Commit
6518cfa
·
1 Parent(s): 05c1b80

chore: update something

Browse files
Files changed (3) hide show
  1. README.md +16 -9
  2. app.py +431 -244
  3. requirements.txt +6 -5
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Ghost 8B Beta (β, 8k, Online)
3
- emoji: 👻 / 🥸
4
- colorFrom: indigo
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 4.36.1
8
  app_file: app.py
@@ -10,15 +10,22 @@ pinned: true
10
  header: mini
11
  suggested_hardware: a10g-small
12
  language:
13
- - en
14
  - vi
 
15
  - es
16
  - pt
17
- - de
18
- - it
19
- - fr
20
- - ko
21
  - zh
 
 
 
 
 
 
 
 
 
 
 
22
  license: other
23
  license_name: ghost-open-llms
24
  license_link: https://ghost-x.org/ghost-open-llms-license
 
1
  ---
2
+ title: Ghost 8B Beta (β, 8k)
3
+ emoji: 👻 / 💬
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.36.1
8
  app_file: app.py
 
10
  header: mini
11
  suggested_hardware: a10g-small
12
  language:
 
13
  - vi
14
+ - ko
15
  - es
16
  - pt
 
 
 
 
17
  - zh
18
+ - fr
19
+ - it
20
+ - de
21
+ - ja
22
+ - ru
23
+ - pl
24
+ - nl
25
+ - hi
26
+ - tr
27
+ - id
28
+ - en
29
  license: other
30
  license_name: ghost-open-llms
31
  license_link: https://ghost-x.org/ghost-open-llms-license
app.py CHANGED
@@ -3,6 +3,8 @@
3
  import subprocess
4
  import json
5
  import requests
 
 
6
 
7
  subprocess.run(
8
  f"pip install flash-attn --no-build-isolation",
@@ -17,33 +19,77 @@ from typing import Iterator
17
  import gradio as gr
18
  import spaces
19
  import torch
 
20
  import wikipedia
21
  import time
22
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
 
 
 
 
 
23
  from bs4 import BeautifulSoup
24
  from functools import lru_cache
25
 
 
 
 
26
 
27
  MAX_MAX_NEW_TOKENS = 4096
28
  DEFAULT_MAX_NEW_TOKENS = 1536
29
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  DESCRIPTION = """\
32
- # Playground with Ghost 8B Beta (β, 8k, Online)
33
 
34
- **Ghost 8B Beta** model outperforms prominent models such as Llama 3 8B Instruct, GPT 3.5 Turbo in the lc_winrate score. In addition, it also outperforms Claude 3 Opus, Claude 3 Sonnet, GPT-4, and Mistral Large when comparing the winrate score of AlpacaEval 2.0, [*](https://ghost-x.org/docs/models/ghost-8b-beta/). The model comes in two context length versions, [8k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-8k) and [128k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-128k), along with multilingual function tools support by default.
35
 
36
- The languages supported are 🇺🇸 English, 🇫🇷 French, 🇮🇹 Italian, 🇪🇸 Spanish, 🇵🇹 Portuguese, 🇩🇪 German, 🇻🇳 Vietnamese, 🇰🇷 Korean and 🇨🇳 Chinese.
 
37
 
38
  🗞️ **Updates**
39
- * Jul 23, 2024: added support for tools, now available to search for information on the internet.
 
40
  """
41
 
42
 
43
  PLACEHOLDER = """
44
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
45
- <h1 style="font-size: 26px; margin-bottom: 2px; opacity: 0.20;">👻 Ghost 8B Beta</h1>
46
- <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.10;">Ask and share whatever you want ~</p>
47
  </div>
48
  """
49
 
@@ -54,222 +100,85 @@ LICENSE = """
54
  Ghost 8B Beta may give inaccurate information, including information about people, so please verify Ghost 8B Beta's answers. [Ghost 8B Beta](https://ghost-x.org/docs/models/ghost-8b-beta/) by [Ghost X](https://ghost-x.org).
55
  """
56
 
57
- EXAMPLES = [
58
- [
59
- "What is the significance of the Higgs boson in the Standard Model of particle physics?"
60
- ],
61
- [
62
- "Qu'est-ce que l'effet fondateur et comment influence-t-il la diversité génétique d'une population?"
63
- ],
64
- ["Qual è il principio di Le Chatelier e come si applica agli equilibri chimici?"],
65
- [
66
- "¿Qué es una supernova y cuál es su importancia en la formación de elementos pesados en el universo?"
67
- ],
68
- [
69
- "Qual é a definição formal de uma integral de linha e como é utilizada em física?"
70
- ],
71
- [
72
- "Was versteht man unter dem Moho-Diskontinuität und welche Bedeutung hat sie für das Verständnis der Erdkruste?"
73
- ],
74
- [
75
- "Hiện tượng nhà kính là gì và nó ảnh hưởng như thế nào đến biến đổi khí hậu toàn cầu?"
76
- ],
77
- [
78
- "알고리즘의 시간 복잡도가 중요한 이유는 무엇이며, 시간 복잡도를 어떻게 분석하나요?"
79
- ],
80
- ["什么是CRISPR-Cas9基因编辑技术,它在现代生物学研究中的作用是什么?"],
81
- [
82
- "Create a Python function that takes a list of integers and returns the list sorted in ascending order without using the built-in sort or sorted functions."
83
- ],
84
- [
85
- "Écrivez une fonction en C++ qui trouve le plus long sous-tableau contigu avec une somme égale à zéro."
86
- ],
87
- [
88
- "Scrivi una funzione in Java che calcola il fattoriale di un numero utilizzando la ricorsione."
89
- ],
90
- [
91
- "Desarrolla una función en JavaScript que determine si una cadena de texto es un palíndromo, ignorando espacios y signos de puntuación."
92
- ],
93
- ["Implemente uma função em C# que verifique se uma matriz quadrada é simétrica."],
94
- [
95
- "Schreiben Sie eine Funktion in Swift, die eine gegebene Zeichenfolge in umgekehrter Reihenfolge zurückgibt, ohne integrierte Funktionen zu verwenden."
96
- ],
97
- [
98
- "Viết một hàm trong PHP để tìm tất cả các số nguyên tố trong một khoảng cho trước."
99
- ],
100
- [
101
- "파이썬을 사용하여 주어진 이진 트리가 이진 탐색 트리인지 확인하는 함수를 작성하십시오."
102
- ],
103
- [
104
- "用 Go 语言编写一个函数,计算给定字符串中每个字符出现的次数,并返回一个包含字符及其出现次数的映射。"
105
- ],
106
- [
107
- "Can you help me design a detailed project plan for developing a machine learning model for predicting stock prices?"
108
- ],
109
- [
110
- "Pouvez-vous m'aider à organiser un emploi du temps hebdomadaire pour maximiser la productivité de mon équipe de développement logiciel?"
111
- ],
112
- [
113
- "Puoi aiutarmi a creare un piano di sviluppo per un'applicazione mobile che gestisce le prenotazioni di ristoranti?"
114
- ],
115
- [
116
- "¿Podrías ayudarme a elaborar un plan detallado para la implementación de un sistema de gestión de contenido (CMS) en una empresa mediana?"
117
- ],
118
- [
119
- "Você pode me ajudar a planejar uma estratégia de desenvolvimento para um sistema de comércio eletrônico escalável?"
120
- ],
121
- [
122
- "Können Sie mir helfen, einen detaillierten Zeitplan für die Implementierung eines neuen ERP-Systems in unserem Unternehmen zu erstellen?"
123
- ],
124
- [
125
- "Bạn có thể giúp tôi xây dựng một kế hoạch phát triển chi tiết cho dự án xây dựng hệ thống quản lý chuỗi cung ứng không?"
126
- ],
127
- [
128
- "신경망 기반 이미지 인식 모델 개발을 위한 세부 프로젝트 계획을 세우는 데 도움을 줄 수 있나요?"
129
- ],
130
- ["你能帮我制定一个详细的开发计划,用于创建一个基于区块链的分布式账本系统吗?"],
131
- [
132
- "Prove that the sum of the squares of any two sides of a right triangle is equal to the square of the hypotenuse."
133
- ],
134
- [
135
- "Calculez la force gravitationnelle entre deux masses de 10 kg chacune séparées par une distance de 1 mètre."
136
- ],
137
- [
138
- "Determina la formula molecolare di un composto che contiene il 40% di carbonio, il 6.67% di idrogeno e il 53.33% di ossigeno in massa."
139
- ],
140
- [
141
- "Explica la teoría del ciclo económico de Schumpeter y cómo se aplica a la economía moderna."
142
- ],
143
- [
144
- "Calcule a energia potencial gravitacional de um objeto de 5 kg a uma altura de 10 metros acima do solo (g = 9,8 m/s²)."
145
- ],
146
- [
147
- "Beweisen Sie, dass jede Primzahl der Form 4k+1 als Summe zweier Quadrate geschrieben werden kann."
148
- ],
149
- [
150
- "Tính nồng độ mol của dung dịch H₂SO₄ khi hoà tan 98 gam H₂SO₄ vào nước để được 1 lít dung dịch."
151
- ],
152
- ["케인스 경제학의 핵심 개념과 그것이 현대 경제 정책에 미치는 영향을 설명하십시오."],
153
- ["计算一个质量为2 kg的物体在3米高处的重力势能(g = 9.8 m/s²)。"],
154
- [
155
- 'Identify the author of a novel that features a dystopian society where "Big Brother" watches over its citizens and the protagonist works for the Ministry of Truth.'
156
- ],
157
- [
158
- "Quel est le seul mammifère capable de voler activement, souvent associé à la nuit et capable d'écholocalisation?"
159
- ],
160
- [
161
- "Qual è l'opera letteraria italiana che narra il viaggio immaginario di un poeta attraverso Inferno, Purgatorio e Paradiso, guidato da Virgilio e Beatrice?"
162
- ],
163
- [
164
- "¿Qué insecto es conocido por su organización social compleja, su capacidad para producir miel y su comunicación mediante la danza?"
165
- ],
166
- [
167
- "Qual é o fenômeno atmosférico que ocorre quando uma massa de ar quente se encontra com uma massa de ar frio, resultando em uma violenta tempestade giratória?"
168
- ],
169
- [
170
- "Welches literarische Werk beschreibt die Geschichte eines jungen Mädchens, das durch einen Kaninchenbau in eine fantastische Welt voller skurriler Charaktere fällt?"
171
- ],
172
- [
173
- "Động vật nào có thể tái sinh toàn bộ cơ thể từ một mảnh nhỏ của chính nó, thường sống dưới nước và có thể có nhiều xúc tu?"
174
- ],
175
- [
176
- "어떤 자연 현상은 태양빛이 대기 중의 물방울에 반사되고 굴절되어 발생하며, 하늘에 나타나는 여러 색깔의 아치 형태를 띠나요?"
177
- ],
178
- ["这部文学作品讲述了一位绅士和他的侍从的冒险故事,他们在"],
179
- [
180
- "Can you derive the Euler-Lagrange equation from the principle of stationary action in classical mechanics?"
181
- ],
182
- [
183
- "Expliquez la notion de « différence ontologique » chez Martin Heidegger et son importance pour la phénoménologie."
184
- ],
185
- [
186
- "Qual è il significato simbolico del colore blu nei dipinti di Giotto di Bondone durante il Rinascimento?"
187
- ],
188
- [
189
- "¿Cómo afecta el cambio de código a la estructura gramatical en comunidades bilingües de habla español-inglés?"
190
- ],
191
- [
192
- "Qual é o impacto da política monetária não convencional no controle da inflação durante uma crise econômica?"
193
- ],
194
- [
195
- "Erklären Sie den Unterschied zwischen deterministischen und nicht-deterministischen endlichen Automaten und ihre Anwendungsbereiche."
196
- ],
197
- [
198
- "Giải thích cơ chế của quá trình phiên mã ngược (reverse transcription) và tầm quan trọng của nó trong nghiên cứu HIV/AIDS."
199
- ],
200
- ["조선시대 성리학이 한국 사회와 문화에 미친 영향을 설명하세요."],
201
- ["如何解释量子纠缠现象,以及它在量子计算中的潜在应用?"],
202
- [
203
- "How can you design a daily schedule that maximizes productivity for a remote worker who has multiple meetings and project deadlines?"
204
- ],
205
- [
206
- "Quels sont les meilleures stratégies pour gérer les conflits au sein d'une équipe multiculturelle travaillant sur un projet commun?"
207
- ],
208
- [
209
- "Quali sono i migliori consigli per mantenere un equilibrio tra vita professionale e vita privata in un ambiente lavorativo stressante?"
210
- ],
211
- [
212
- "¿Cómo se puede elaborar un plan financiero personal efectivo que incluya ahorro para la jubilación, inversión y manejo de deudas?"
213
- ],
214
- [
215
- "Quais são as melhores práticas para implementar metodologias ágeis em uma equipe de desenvolvimento de software?"
216
- ],
217
- [
218
- "Welche Strategien können verwendet werden, um ein starkes berufliches Netzwerk aufzubauen und zu pflegen, insbesondere in der Tech-Branche?"
219
- ],
220
- [
221
- "Những bước nào cần thiết để xây dựng một lộ trình phát triển sự nghiệp bền vững trong lĩnh vực công nghệ thông tin?"
222
- ],
223
- ["프로젝트의 범위 변동을 효과적으로 관리하기 위한 최고의 방법은 무엇인가요?"],
224
- ["在快速变化的职场环境中,如何有效地实现工作与生活的平衡?"],
225
- [
226
- "Write an argumentative essay discussing the pros and cons of artificial intelligence in the workplace, including potential ethical concerns."
227
- ],
228
- [
229
- "Analysez les impacts sociaux et économiques de la digitalisation sur les petites entreprises en France."
230
- ],
231
- [
232
- "Scrivi un'email formale al direttore di una rivista per proporre un articolo sulla sostenibilità ambientale nelle città italiane."
233
- ],
234
- [
235
- "Elabora un informe detallado sobre los efectos del cambio climático en la biodiversidad de la región amazónica."
236
- ],
237
- [
238
- "Analise criticamente os principais pontos abordados no relatório anual do Banco Mundial sobre a pobreza global."
239
- ],
240
- [
241
- "Erstellen Sie eine technische Dokumentation für die Implementierung eines neuen Software-Features in einer bestehenden Anwendung."
242
- ],
243
- [
244
- "Viết một bài luận phân tích về tác động của cuộc cách mạng công nghiệp 4.0 đối với thị trường lao động Việt Nam."
245
- ],
246
- [
247
- "인공지능의 윤리적 문제에 대한 연구 논문을 작성하고, 다양한 사례를 통해 그 영향을 분석하세요."
248
- ],
249
- ["分析鲁迅的小说《阿Q正传》中反映的中国社会问题和作者的批判态度。"],
250
- ]
251
-
252
  if not torch.cuda.is_available():
253
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
254
 
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  if torch.cuda.is_available():
257
- model_id = "ghost-x/ghost-8b-beta"
258
  hf_serect = os.getenv("HF_TOKEN", None)
259
- model = AutoModelForCausalLM.from_pretrained(
260
- model_id,
 
 
 
 
261
  device_map="auto",
262
  torch_dtype=torch.bfloat16,
263
- attn_implementation="flash_attention_2",
 
 
 
 
 
264
  trust_remote_code=True,
265
  token=hf_serect,
266
  )
267
- tokenizer = AutoTokenizer.from_pretrained(
268
- model_id,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  trust_remote_code=True,
270
  token=hf_serect,
271
  )
272
 
 
273
  waiting_tools_timeout = 5
274
  supported_tools = json.dumps(
275
  [
@@ -310,6 +219,22 @@ supported_tools = json.dumps(
310
 
311
  @lru_cache(maxsize=128)
312
  def extract_text_from_webpage(html_content):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  soup = BeautifulSoup(html_content, "html.parser")
314
  for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg"]):
315
  tag.extract()
@@ -321,6 +246,23 @@ def search_with_wikipedia(
321
  query: str,
322
  language: str = "en",
323
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  all_results = []
325
  try:
326
  wikipedia.set_lang(language)
@@ -337,9 +279,39 @@ def search_with_google(
337
  language: str = "en",
338
  ssl_verify: bool = None,
339
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  all_results = []
 
 
341
  max_chars_per_page = 4096
 
 
342
  with requests.Session() as session:
 
343
  resp = session.get(
344
  url="https://www.google.com/search",
345
  headers={
@@ -354,36 +326,118 @@ def search_with_google(
354
  timeout=timeout,
355
  verify=ssl_verify,
356
  )
 
 
357
  resp.raise_for_status()
 
 
358
  soup = BeautifulSoup(resp.text, "html.parser")
 
 
359
  result_block = soup.find_all("div", attrs={"class": "g"})
 
 
360
  for result in result_block:
 
361
  link = result.find("a", href=True)
 
 
362
  if link:
363
  link = link["href"]
364
  try:
 
365
  webpage = session.get(
366
  link,
367
  headers={
368
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
369
  },
370
  )
 
 
371
  webpage.raise_for_status()
 
 
372
  visible_text = extract_text_from_webpage(webpage.text)
 
 
373
  if len(visible_text) > max_chars_per_page:
374
  visible_text = visible_text[:max_chars_per_page]
 
 
375
  all_results.append({"link": link, "text": visible_text})
376
  except requests.exceptions.RequestException as e:
 
377
  print(f"Error fetching or processing {link}: {e}")
378
  pass
379
  else:
380
  pass
 
 
381
  return all_results
382
 
383
 
384
- @spaces.GPU(duration=120)
385
- def generate(
386
- message: str,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  chat_history: list[tuple[str, str]],
388
  allow_used_tools: bool = True,
389
  system_prompt: str = "",
@@ -392,48 +446,44 @@ def generate(
392
  top_p: float = 0.95,
393
  top_k: int = 50,
394
  repetition_penalty: float = 1.0,
395
- other_client_info: str = None,
396
  ) -> Iterator[str]:
397
- # print()
398
- # print("allow_used_tools:\n", allow_used_tools)
399
- # print("system_prompt:\n", system_prompt)
400
- # print("max_new_tokens:\n", max_new_tokens)
401
- # print("temperature:\n", temperature)
402
-
403
  def build_input_ids(
404
  apply_tools: bool = None,
405
  references=None,
406
  ):
407
  conversation = []
 
 
408
  if system_prompt:
409
  conversation.append({"role": "system", "content": system_prompt})
 
 
410
  if apply_tools is True:
411
  conversation.append({"role": "tools", "content": supported_tools})
412
 
 
413
  if references is None:
414
- references = [other_client_info]
415
  else:
416
- references.insert(0, other_client_info)
417
 
418
  if (
419
  references is not None
420
  and isinstance(references, list)
421
  and len(references) > 0
422
  ):
 
 
423
  conversation.append(
424
  {
425
  "role": "refs",
426
- "content": json.dumps(
427
- {
428
- "instructions": "These are only general documents used for reference to give the most accurate and honest answers possible. Ignore it if it's irrelevant and don't overuse it.",
429
- "documents": references,
430
- },
431
- indent=2,
432
- ensure_ascii=False,
433
- ),
434
  }
435
  )
436
 
 
437
  for user, assistant in chat_history:
438
  conversation.extend(
439
  [
@@ -441,12 +491,28 @@ def generate(
441
  {"role": "assistant", "content": assistant},
442
  ]
443
  )
444
- conversation.append({"role": "user", "content": message})
445
 
446
- input_ids = tokenizer.apply_chat_template(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
  conversation, add_generation_prompt=True, return_tensors="pt"
448
  )
449
- input_ids = input_ids.to(model.device)
 
 
450
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
451
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
452
  gr.Warning(
@@ -454,10 +520,13 @@ def generate(
454
  )
455
  return input_ids
456
 
 
457
  def generate_chat_responses(
458
  previous_response: str = None,
459
  ):
460
  document_references = []
 
 
461
  if previous_response is not None:
462
  scheduled_tools_runs = None
463
  try:
@@ -472,6 +541,7 @@ def generate(
472
  print(e)
473
  pass
474
 
 
475
  if (
476
  scheduled_tools_runs is not None
477
  and scheduled_tools_runs["name"] == "search_on_internet"
@@ -479,7 +549,8 @@ def generate(
479
  keyword = scheduled_tools_runs["arguments"]["keyword"]
480
  search_type = scheduled_tools_runs["arguments"]["type"]
481
  language = scheduled_tools_runs["arguments"]["language"]
482
- print("scheduled_tools_runs:", scheduled_tools_runs)
 
483
  if search_type == "wikipedia":
484
  gr.Info(
485
  "Searching for information on the Wikipedia.",
@@ -490,27 +561,34 @@ def generate(
490
  search_with_wikipedia(query=keyword, language=language)
491
  )
492
 
 
493
  gr.Info("Searching for information on the Google.")
494
  document_references.extend(
495
  search_with_google(
496
  query=keyword,
497
  language=language,
498
  num_results=3,
499
- # num_results=2 if search_type == "wikipedia" else 3,
500
  )
501
  )
502
  print("document_references:", document_references)
503
 
 
504
  apply_tools = (
505
  True if allow_used_tools is True and previous_response is None else False
506
  )
 
 
507
  input_ids = build_input_ids(
508
  apply_tools=apply_tools,
509
  references=document_references,
510
  )
 
 
511
  streamer = TextIteratorStreamer(
512
- tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
513
  )
 
 
514
  generate_kwargs = dict(
515
  input_ids=input_ids,
516
  streamer=streamer,
@@ -525,9 +603,14 @@ def generate(
525
  generate_kwargs["top_p"] = top_p
526
  generate_kwargs["top_k"] = top_k
527
 
528
- t = Thread(target=model.generate, kwargs=generate_kwargs)
 
529
  t.start()
530
 
 
 
 
 
531
  state = {
532
  "mark": None,
533
  "respond": False,
@@ -544,6 +627,7 @@ def generate(
544
  state["respond"] = True
545
  yield "".join(outputs)
546
 
 
547
  if (
548
  apply_tools is True
549
  and state["respond"] is False
@@ -552,9 +636,104 @@ def generate(
552
  previous_response = "".join(outputs)
553
  yield from generate_chat_responses(previous_response=previous_response)
554
 
 
555
  yield from generate_chat_responses(previous_response=None)
556
 
557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  chatbot = gr.Chatbot(
559
  height=500, placeholder=PLACEHOLDER, label="Ghost 8B Beta", show_copy_button=True
560
  )
@@ -563,11 +742,17 @@ chat_interface = gr.ChatInterface(
563
  fn=generate,
564
  chatbot=chatbot,
565
  fill_height=True,
 
 
 
 
 
566
  additional_inputs=[
567
  gr.Checkbox(
568
- label="Allow used tools (available: search on internet)", value=False
 
569
  ),
570
- gr.Textbox(label="System prompt", lines=6),
571
  gr.Slider(
572
  label="Max new tokens",
573
  minimum=1,
@@ -604,24 +789,26 @@ chat_interface = gr.ChatInterface(
604
  value=1.0,
605
  ),
606
  gr.Textbox(
607
- label="Other client information",
 
608
  lines=1,
609
- value="This user's current time: {}".format(time.strftime("%Y-%m-%d")),
 
 
610
  visible=False,
611
  ),
612
  ],
613
  stop_btn="Stop",
614
  cache_examples=False,
615
- examples=EXAMPLES,
616
- examples_per_page=9,
617
  concurrency_limit=100,
618
  )
619
 
620
- with gr.Blocks(fill_height=True, css="style.css") as demo:
621
  gr.Markdown(DESCRIPTION)
622
  chat_interface.render()
623
  gr.Markdown(LICENSE)
624
 
625
  if __name__ == "__main__":
626
- demo.queue(max_size=20).launch(share=True)
627
-
 
3
  import subprocess
4
  import json
5
  import requests
6
+ import zlib
7
+ from PIL import Image
8
 
9
  subprocess.run(
10
  f"pip install flash-attn --no-build-isolation",
 
19
  import gradio as gr
20
  import spaces
21
  import torch
22
+ import logging
23
  import wikipedia
24
  import time
25
+ from transformers import (
26
+ AutoModelForCausalLM,
27
+ AutoTokenizer,
28
+ AutoProcessor,
29
+ TextIteratorStreamer,
30
+ )
31
+ from transformers.dynamic_module_utils import get_imports
32
  from bs4 import BeautifulSoup
33
  from functools import lru_cache
34
 
35
+ logging.basicConfig(level=logging.INFO)
36
+ logger = logging.getLogger(__name__)
37
+
38
 
39
  MAX_MAX_NEW_TOKENS = 4096
40
  DEFAULT_MAX_NEW_TOKENS = 1536
41
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
42
 
43
+ DEFAULT_SYSTEM_PROMPT = """\
44
+ You are a helpful and intelligent AI, trained by Ghost X and named Ghost 8B Beta (often referred to as Ghost Beta).
45
+ You're known for your honesty, spreading positivity, and always striving to assist users. Your expertise lies in understanding their needs and providing insightful suggestions, drawing upon your knowledge and interests. If a query exceeds your understanding, you'll be upfront and state you're unsure, avoiding fabricated responses. You enjoy incorporating emojis to enhance interactions, but maintain a balanced approach for a natural flow. Let's engage in a meaningful conversation, keeping in mind the user's language.
46
+ """
47
+
48
+ # DEFAULT_SYSTEM_PROMPT = """\
49
+ # You are a helpful and intelligent AI, trained by Ghost X and named Ghost 8B Beta (often referred to as 8B Beta).
50
+ # You're known for your honesty, spreading positivity, and always striving to assist users. Your expertise lies in understanding their needs and providing insightful suggestions, drawing upon your knowledge and interests. If a query exceeds your understanding, you'll be upfront and state you're unsure, avoiding fabricated responses. You enjoy incorporating emojis to enhance interactions, but maintain a balanced approach for a natural flow. Let's engage in a meaningful conversation, keeping in mind the user's language.
51
+
52
+ # A guide to dealing with extremely complex questions or challenges. Follow these steps to solve them:
53
+ # 1. Deconstructing Complexity
54
+ # Imagine a puzzle with intricate pieces. I'll present a challenging question. Your task: Break down this question into smaller, distinct parts. Label each part with a specific theme or aspect related to the problem. This will help us understand the multifaceted nature of the query and prepare for a structured solution.
55
+ # 2. Reconstructing Insights
56
+ # Once we've successfully dissected the problem into manageable components, assemble these parts like a puzzle. Focus on identifying connections, potential overlaps, and key information from each theme. The goal is to reconstruct a cohesive, well-rounded answer that addresses the original complexity of the question.
57
+ # """
58
+
59
+ HEAD = """
60
+ <script>
61
+ function schedule_updates() {
62
+ const client_info_element = document.querySelector("#client_info textarea");
63
+ client_info_element.value = "The current time is now: " + new Date().toLocaleString('en-US', {weekday: 'short'});
64
+ client_info_element.dispatchEvent(new Event('input'));
65
+ }
66
+
67
+ function bootstrap() {
68
+ setInterval(schedule_updates, 1000);
69
+ };
70
+
71
+ bootstrap();
72
+ </script>
73
+ """
74
+
75
  DESCRIPTION = """\
76
+ # Ghost 8B Beta (β, 8k)
77
 
78
+ **Ghost 8B Beta** outperforms leading models like Llama 3.1 8B Instruct and GPT-3.5 Turbo in lc_winrate scores. It also surpasses Claude 3 Opus, Claude 3 Sonnet, GPT-4, and Mistral Large in AlpacaEval 2.0 winrate scores. The model offers two context length versions: [8k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-8k) and [128k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-128k), both with built-in multilingual function support.
79
 
80
+ Supported languages: 🇬🇧 English, 🇻🇳 Vietnamese, 🇰🇷 Korean, 🇪🇸 Spanish, 🇵🇹 Portuguese, 🇨🇳 Chinese, 🇫🇷 French, 🇮🇹 Italian, 🇩🇪 German, 🇯🇵 Japanese, 🇷🇺 Russian, 🇵🇱 Polish, 🇳🇱 Dutch, 🇮🇳 Hindi, 🇹🇷 Turkish, 🇮🇩 Indonesian.
81
+ Note: with the image will be used another model to explain rather than using directly the Ghost 8B Beta model.
82
 
83
  🗞️ **Updates**
84
+ * Aug 16, 2024: Released version 160824, expanding language support from 9 to 16 languages and improving math, reasoning, and instruction-following capabilities.
85
+ * Jul 23, 2024: Added internet search tools.
86
  """
87
 
88
 
89
  PLACEHOLDER = """
90
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
91
+ <h1 style="font-size: 26px; margin-bottom: 2px; opacity: 0.20;">👋 Welcome to the Ghost 8B Beta Playground! 🎉</h1>
92
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.10;">Ask me anything and let's have some fun! 🤔💡</p>
93
  </div>
94
  """
95
 
 
100
  Ghost 8B Beta may give inaccurate information, including information about people, so please verify Ghost 8B Beta's answers. [Ghost 8B Beta](https://ghost-x.org/docs/models/ghost-8b-beta/) by [Ghost X](https://ghost-x.org).
101
  """
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  if not torch.cuda.is_available():
104
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
105
 
106
 
107
+ def workaround_fixed_get_imports(filename: str | os.PathLike) -> list[str]:
108
+ """
109
+ Workaround for fixed get_imports function.
110
+
111
+ @args:
112
+ filename (str | os.PathLike): The filename or path to the file.
113
+
114
+ @returns:
115
+ list[str]: The list of imports.
116
+
117
+ @remarks:
118
+ - This function is a workaround for the fixed get_imports function.
119
+ - It checks if the filename ends with "/modeling_florence2.py".
120
+ - If it doesn't, it calls the original get_imports function.
121
+ - If it does, it calls the original get_imports function and removes the "flash_attn" import.
122
+
123
+ @usage:
124
+ ```python
125
+ from unittest.mock import patch
126
+ image_torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
127
+ with patch(
128
+ "transformers.dynamic_module_utils.get_imports", workaround_fixed_get_imports
129
+ ):
130
+ ```
131
+ """
132
+
133
+ if not str(filename).endswith("/modeling_florence2.py"):
134
+ return get_imports(filename)
135
+ imports = get_imports(filename)
136
+ imports.remove("flash_attn")
137
+ return imports
138
+
139
+
140
  if torch.cuda.is_available():
 
141
  hf_serect = os.getenv("HF_TOKEN", None)
142
+ attn_implementation = "flash_attention_2"
143
+
144
+ chat_model_id = "ghost-x/ghost-8b-beta-1608"
145
+ chat_device = torch.device("cuda")
146
+ chat_model = AutoModelForCausalLM.from_pretrained(
147
+ chat_model_id,
148
  device_map="auto",
149
  torch_dtype=torch.bfloat16,
150
+ attn_implementation=attn_implementation,
151
+ trust_remote_code=True,
152
+ token=hf_serect,
153
+ )
154
+ chat_tokenizer = AutoTokenizer.from_pretrained(
155
+ chat_model_id,
156
  trust_remote_code=True,
157
  token=hf_serect,
158
  )
159
+
160
+ image_model_id = "microsoft/Florence-2-large"
161
+ # image_device = "cuda" if torch.cuda.is_available() else "cpu"
162
+ # image_torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
163
+ image_device = "cpu"
164
+ image_torch_dtype = torch.float32
165
+ image_model = (
166
+ AutoModelForCausalLM.from_pretrained(
167
+ image_model_id,
168
+ torch_dtype=image_torch_dtype,
169
+ trust_remote_code=True,
170
+ token=hf_serect,
171
+ )
172
+ .to(image_device)
173
+ .eval()
174
+ )
175
+ image_processor = AutoProcessor.from_pretrained(
176
+ image_model_id,
177
  trust_remote_code=True,
178
  token=hf_serect,
179
  )
180
 
181
+
182
  waiting_tools_timeout = 5
183
  supported_tools = json.dumps(
184
  [
 
219
 
220
  @lru_cache(maxsize=128)
221
  def extract_text_from_webpage(html_content):
222
+ """
223
+ Extracts visible text from an HTML webpage.
224
+
225
+ @args:
226
+ html_content (str): The HTML content of the webpage.
227
+
228
+ @returns:
229
+ str: The visible text extracted from the webpage.
230
+
231
+ @remarks:
232
+ - This function uses the BeautifulSoup library to parse the HTML content.
233
+ - It removes certain tags (script, style, header, footer, nav, form, svg) from the parsed HTML.
234
+ - The remaining visible text is then extracted using the `get_text` method of BeautifulSoup.
235
+ - The extracted text is stripped of leading/trailing whitespace and separated by a single space.
236
+ """
237
+
238
  soup = BeautifulSoup(html_content, "html.parser")
239
  for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg"]):
240
  tag.extract()
 
246
  query: str,
247
  language: str = "en",
248
  ):
249
+ """
250
+ Search for a given query on Wikipedia and return the summary.
251
+
252
+ @args:
253
+ query (str): The search query.
254
+ language (str, optional): The language code for the Wikipedia page. Defaults to "en".
255
+
256
+ @returns:
257
+ list: A list containing the summary of the Wikipedia page.
258
+
259
+ @remarks:
260
+ - This function uses the Wikipedia API to search for the given query.
261
+ - The language parameter determines the language of the Wikipedia page to search.
262
+ - If the search is successful, the function returns a list containing the summary of the page.
263
+ - If an exception occurs during the search, an empty list is returned.
264
+ """
265
+
266
  all_results = []
267
  try:
268
  wikipedia.set_lang(language)
 
279
  language: str = "en",
280
  ssl_verify: bool = None,
281
  ):
282
+ """
283
+ Searches Google for the given query and returns a list of search results.
284
+
285
+ @args:
286
+ query (str): The search query.
287
+ num_results (int, optional): The number of search results to retrieve. Defaults to 3.
288
+ timeout (int, optional): The timeout value for the HTTP requests. Defaults to 5.
289
+ language (str, optional): The language for the search results. Defaults to "en".
290
+ ssl_verify (bool, optional): Whether to verify SSL certificates. Defaults to None.
291
+
292
+ @returns:
293
+ list: A list of dictionaries containing the link and visible text of each search result.
294
+
295
+ @remarks:
296
+ - This function uses the requests library to send HTTP requests to Google.
297
+ - It sets the User-Agent header to mimic a Firefox browser.
298
+ - The search results are retrieved from the HTML response using BeautifulSoup.
299
+ - Each search result is represented as a dictionary with "link" and "text" keys.
300
+ - The "link" key contains the URL of the search result.
301
+ - The "text" key contains the visible text extracted from the search result webpage.
302
+ - If the visible text exceeds 4096 characters, it is truncated to that length.
303
+ - If an error occurs while fetching or processing a search result, it is printed and ignored.
304
+ """
305
+
306
+ # Initialize an empty list to store the search results
307
  all_results = []
308
+
309
+ # Define the maximum number of characters per page
310
  max_chars_per_page = 4096
311
+
312
+ # Create a session object to send HTTP requests
313
  with requests.Session() as session:
314
+ # Send a GET request to Google search with the specified query parameters
315
  resp = session.get(
316
  url="https://www.google.com/search",
317
  headers={
 
326
  timeout=timeout,
327
  verify=ssl_verify,
328
  )
329
+
330
+ # Raise an exception if the response status code is not successful
331
  resp.raise_for_status()
332
+
333
+ # Parse the HTML response using BeautifulSoup
334
  soup = BeautifulSoup(resp.text, "html.parser")
335
+
336
+ # Find all the result blocks in the HTML
337
  result_block = soup.find_all("div", attrs={"class": "g"})
338
+
339
+ # Iterate over each result block
340
  for result in result_block:
341
+ # Find the link element within the result block
342
  link = result.find("a", href=True)
343
+
344
+ # If a link is found, extract the URL and process the webpage
345
  if link:
346
  link = link["href"]
347
  try:
348
+ # Send a GET request to the link URL
349
  webpage = session.get(
350
  link,
351
  headers={
352
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
353
  },
354
  )
355
+
356
+ # Raise an exception if the response status code is not successful
357
  webpage.raise_for_status()
358
+
359
+ # Extract the visible text from the webpage
360
  visible_text = extract_text_from_webpage(webpage.text)
361
+
362
+ # Truncate the visible text if it exceeds the maximum number of characters per page
363
  if len(visible_text) > max_chars_per_page:
364
  visible_text = visible_text[:max_chars_per_page]
365
+
366
+ # Append the link and visible text to the search results list
367
  all_results.append({"link": link, "text": visible_text})
368
  except requests.exceptions.RequestException as e:
369
+ # Print an error message if there is an error fetching or processing the link
370
  print(f"Error fetching or processing {link}: {e}")
371
  pass
372
  else:
373
  pass
374
+
375
+ # Return the search results
376
  return all_results
377
 
378
 
379
+ @lru_cache(maxsize=128)
380
+ def extract_text_from_image(file: str) -> str:
381
+ """
382
+ Extracts text from an image file.
383
+
384
+ @args:
385
+ file (str): The path or URL of the image file.
386
+
387
+ @returns:
388
+ str: The extracted text from the image.
389
+
390
+ @remarks:
391
+ - This function uses an LRU cache to store previously processed images for faster retrieval.
392
+ - The image file can be either a local file path or a URL.
393
+ - The function opens the image file using the PIL library.
394
+ - The function processes the image using an image processor.
395
+ - The processed image is then passed to a text generation model to generate text.
396
+ - The generated text is post-processed to obtain the final extracted text.
397
+ """
398
+ # Define the task and load the image
399
+ task = "<MORE_DETAILED_CAPTION>"
400
+ image = Image.open(
401
+ requests.get(file, stream=True).raw
402
+ if file.startswith("http")
403
+ else open(file, "rb")
404
+ )
405
+
406
+ if image.mode != "RGB":
407
+ image = image.convert("RGB")
408
+
409
+ # Preprocess the image using the image processor
410
+ inputs = image_processor(text=task, images=image, return_tensors="pt").to(
411
+ "cpu", image_torch_dtype
412
+ )
413
+
414
+ # Generate text based on the input image
415
+ generated_ids = image_model.generate(
416
+ input_ids=inputs["input_ids"],
417
+ pixel_values=inputs["pixel_values"],
418
+ max_new_tokens=1024,
419
+ num_beams=3,
420
+ do_sample=False,
421
+ )
422
+
423
+ # Decode the generated text and post-process the answer
424
+ generated_text = image_processor.batch_decode(
425
+ generated_ids, skip_special_tokens=False
426
+ )[0]
427
+ parsed_answer = image_processor.post_process_generation(
428
+ generated_text,
429
+ task=task,
430
+ image_size=(image.width, image.height),
431
+ )
432
+
433
+ # Return the parsed answer for the specified task
434
+ return parsed_answer[task]
435
+
436
+
437
+ @spaces.GPU(duration=90)
438
+ def generate_chat(
439
+ uuid: str,
440
+ message: dict,
441
  chat_history: list[tuple[str, str]],
442
  allow_used_tools: bool = True,
443
  system_prompt: str = "",
 
446
  top_p: float = 0.95,
447
  top_k: int = 50,
448
  repetition_penalty: float = 1.0,
449
+ client_info: str = None,
450
  ) -> Iterator[str]:
451
+ # Build the input_ids for the chat conversation
 
 
 
 
 
452
  def build_input_ids(
453
  apply_tools: bool = None,
454
  references=None,
455
  ):
456
  conversation = []
457
+
458
+ # Add the system prompt to the conversation
459
  if system_prompt:
460
  conversation.append({"role": "system", "content": system_prompt})
461
+
462
+ # Add the tools role to the conversation if apply_tools is True
463
  if apply_tools is True:
464
  conversation.append({"role": "tools", "content": supported_tools})
465
 
466
+ # Add the references role to the conversation
467
  if references is None:
468
+ references = [client_info]
469
  else:
470
+ references.insert(0, client_info)
471
 
472
  if (
473
  references is not None
474
  and isinstance(references, list)
475
  and len(references) > 0
476
  ):
477
+ formatted_references = f"Analyze the provided references, extract relevant information to provide accurate and objective feedback. This reference information may include: conversation context, assistant or user memories, reasoning guides, problem-solving suggestions, assistant rules, etc.\nIf the reference is not relevant, ignore it. Try to have a balanced approach, avoiding over-reliance on the documentation."
478
+ formatted_references += "\n\n" + ("\n\n".join(references))
479
  conversation.append(
480
  {
481
  "role": "refs",
482
+ "content": formatted_references,
 
 
 
 
 
 
 
483
  }
484
  )
485
 
486
+ # Add the chat history to the conversation
487
  for user, assistant in chat_history:
488
  conversation.extend(
489
  [
 
491
  {"role": "assistant", "content": assistant},
492
  ]
493
  )
 
494
 
495
+ # Add the user message with image attachments to the conversation
496
+ conversation.append(
497
+ {
498
+ "role": "user",
499
+ "content": (
500
+ f"{' & '.join(message['attachments'])}\n\n{message['text']}"
501
+ if "attachments" in message and len(message["attachments"]) > 0
502
+ else f"{message['text']}"
503
+ ),
504
+ }
505
+ )
506
+
507
+ logger.debug(f"UUID: {uuid} - Conversation: {conversation}")
508
+
509
+ # Apply the chat template to convert the conversation into input_ids
510
+ input_ids = chat_tokenizer.apply_chat_template(
511
  conversation, add_generation_prompt=True, return_tensors="pt"
512
  )
513
+ input_ids = input_ids.to(chat_model.device)
514
+
515
+ # Trim the input_ids if it exceeds the maximum token length
516
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
517
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
518
  gr.Warning(
 
520
  )
521
  return input_ids
522
 
523
+ # Generate chat responses based on the input_ids
524
  def generate_chat_responses(
525
  previous_response: str = None,
526
  ):
527
  document_references = []
528
+
529
+ # Check if the previous response contains scheduled tool runs
530
  if previous_response is not None:
531
  scheduled_tools_runs = None
532
  try:
 
541
  print(e)
542
  pass
543
 
544
+ # If scheduled tool runs exist, perform the corresponding searches
545
  if (
546
  scheduled_tools_runs is not None
547
  and scheduled_tools_runs["name"] == "search_on_internet"
 
549
  keyword = scheduled_tools_runs["arguments"]["keyword"]
550
  search_type = scheduled_tools_runs["arguments"]["type"]
551
  language = scheduled_tools_runs["arguments"]["language"]
552
+
553
+ # Search on Wikipedia if the search type is "wikipedia"
554
  if search_type == "wikipedia":
555
  gr.Info(
556
  "Searching for information on the Wikipedia.",
 
561
  search_with_wikipedia(query=keyword, language=language)
562
  )
563
 
564
+ # Search on Google
565
  gr.Info("Searching for information on the Google.")
566
  document_references.extend(
567
  search_with_google(
568
  query=keyword,
569
  language=language,
570
  num_results=3,
 
571
  )
572
  )
573
  print("document_references:", document_references)
574
 
575
+ # Determine if tools should be applied based on the allow_used_tools flag
576
  apply_tools = (
577
  True if allow_used_tools is True and previous_response is None else False
578
  )
579
+
580
+ # Build the input_ids for the chat conversation
581
  input_ids = build_input_ids(
582
  apply_tools=apply_tools,
583
  references=document_references,
584
  )
585
+
586
+ # Create a TextIteratorStreamer to generate chat responses
587
  streamer = TextIteratorStreamer(
588
+ chat_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
589
  )
590
+
591
+ # Set the generation parameters
592
  generate_kwargs = dict(
593
  input_ids=input_ids,
594
  streamer=streamer,
 
603
  generate_kwargs["top_p"] = top_p
604
  generate_kwargs["top_k"] = top_k
605
 
606
+ # Start the generation process in a separate thread
607
+ t = Thread(target=chat_model.generate, kwargs=generate_kwargs)
608
  t.start()
609
 
610
+ logger.debug(
611
+ f"UUID: {uuid} - Is apply tools: {apply_tools} - Is apply documents: {len(document_references) > 0} - Is previous response: {previous_response is not None} - Start generating chat responses"
612
+ )
613
+
614
  state = {
615
  "mark": None,
616
  "respond": False,
 
627
  state["respond"] = True
628
  yield "".join(outputs)
629
 
630
+ # If tools are applied and no response is generated within the timeout, continue generating chat responses
631
  if (
632
  apply_tools is True
633
  and state["respond"] is False
 
636
  previous_response = "".join(outputs)
637
  yield from generate_chat_responses(previous_response=previous_response)
638
 
639
+ # Yield the generated chat responses
640
  yield from generate_chat_responses(previous_response=None)
641
 
642
 
643
+ def generate(
644
+ message: dict,
645
+ chat_history: list[tuple[str, str]],
646
+ allow_used_tools: bool = True,
647
+ system_prompt: str = "",
648
+ max_new_tokens: int = 1536,
649
+ temperature: float = 0.4,
650
+ top_p: float = 0.95,
651
+ top_k: int = 50,
652
+ repetition_penalty: float = 1.0,
653
+ client_info: str = None,
654
+ ) -> Iterator[str]:
655
+ # Generate a unique identifier using the The current time is now
656
+ uuid = zlib.crc32(str.encode(str(time.time())))
657
+ logger.info(f"UUID: {uuid} - Starting image text extraction process")
658
+
659
+ # Limit the number of files to process to 2
660
+ if len(message["files"]) > 2:
661
+ gr.Warning("Only the first 2 images will be processed.")
662
+
663
+ message["files"] = message["files"][:2]
664
+
665
+ # Extract text from each image file and replace the file path with an attachment tag containing the extracted text
666
+ message["attachments"] = handle_file_extraction(
667
+ files=list(message["files"]), uuid=uuid
668
+ )
669
+ logger.debug(f"UUID: {uuid} - Image text extraction process completed")
670
+
671
+ logger.debug(f"UUID: {uuid} - Previous chat history: {chat_history}")
672
+ for idx, chat_pair in enumerate(chat_history):
673
+ user_message, assistant_message = chat_pair
674
+ if not isinstance(user_message, str) and assistant_message is None:
675
+ text_descriptions = handle_file_extraction(
676
+ files=list(user_message), uuid=uuid
677
+ )
678
+ chat_input = (
679
+ f"{' & '.join(text_descriptions)}\n\n{chat_history[idx + 1][0]}"
680
+ )
681
+ chat_history[idx + 1][0] = chat_input
682
+ chat_history[idx] = [None, None]
683
+ logger.debug(
684
+ f"UUID: {uuid} - Updated chat history: {chat_history} - Updated chat input: {chat_input}"
685
+ )
686
+
687
+ chat_history = list(
688
+ filter(lambda x: x[0] is not None and x[1] is not None, chat_history)
689
+ )
690
+ logger.debug(f"UUID: {uuid} - Filtered chat history: {chat_history}")
691
+
692
+ yield from generate_chat(
693
+ uuid=uuid,
694
+ message=message,
695
+ chat_history=chat_history,
696
+ allow_used_tools=allow_used_tools,
697
+ system_prompt=system_prompt,
698
+ max_new_tokens=max_new_tokens,
699
+ temperature=temperature,
700
+ top_p=top_p,
701
+ top_k=top_k,
702
+ repetition_penalty=repetition_penalty,
703
+ client_info=client_info,
704
+ )
705
+
706
+
707
+ def handle_file_extraction(files: list[str], uuid: str):
708
+ """
709
+ Extracts text from images in the given message's files and returns a list of attachments.
710
+
711
+ @args:
712
+ message (dict): The message containing files to extract text from.
713
+ uuid (str): The UUID associated with the extraction process.
714
+
715
+ @returns:
716
+ list: A list of attachments, each represented as a string.
717
+
718
+ @memarks:
719
+ - This function iterates over the files in the message and extracts text from each image file.
720
+ - The extracted text is logged along with the UUID and file information.
721
+ - The extracted text is then added to the attachments list as a string representation of an attachment.
722
+ - The attachments list is returned at the end of the function.
723
+ """
724
+
725
+ attachments = []
726
+ for idx, file_to_extract in enumerate(files):
727
+ extracted_text = extract_text_from_image(file=file_to_extract)
728
+ logger.info(
729
+ f"UUID: {uuid} - File: {file_to_extract} - Extracted text: {extracted_text}"
730
+ )
731
+ attachments.append(
732
+ f'<attachment index="{idx}" type="image" description="{extracted_text}" />'
733
+ )
734
+ return attachments
735
+
736
+
737
  chatbot = gr.Chatbot(
738
  height=500, placeholder=PLACEHOLDER, label="Ghost 8B Beta", show_copy_button=True
739
  )
 
742
  fn=generate,
743
  chatbot=chatbot,
744
  fill_height=True,
745
+ multimodal=True,
746
+ textbox=gr.MultimodalTextbox(
747
+ file_types=["image"],
748
+ placeholder="Type a message...",
749
+ ),
750
  additional_inputs=[
751
  gr.Checkbox(
752
+ label="Allow used tools (available: search on internet)",
753
+ value=False,
754
  ),
755
+ gr.Textbox(label="System prompt", lines=6, value=DEFAULT_SYSTEM_PROMPT),
756
  gr.Slider(
757
  label="Max new tokens",
758
  minimum=1,
 
789
  value=1.0,
790
  ),
791
  gr.Textbox(
792
+ elem_id="client_info",
793
+ label="Client info",
794
  lines=1,
795
+ value="The current time is now: {}".format(
796
+ time.strftime("%A, %D %B %Y %H:%M:%S")
797
+ ),
798
  visible=False,
799
  ),
800
  ],
801
  stop_btn="Stop",
802
  cache_examples=False,
803
+ examples=[],
804
+ examples_per_page=10,
805
  concurrency_limit=100,
806
  )
807
 
808
+ with gr.Blocks(fill_height=True, css="style.css", head=HEAD) as demo:
809
  gr.Markdown(DESCRIPTION)
810
  chat_interface.render()
811
  gr.Markdown(LICENSE)
812
 
813
  if __name__ == "__main__":
814
+ demo.queue().launch(share=True)
 
requirements.txt CHANGED
@@ -1,10 +1,11 @@
1
- accelerate==0.30.1
2
- bitsandbytes==0.43.1
3
- gradio==4.39.0
 
 
 
4
  scipy==1.13.0
5
  sentencepiece==0.2.0
6
- spaces==0.28.3
7
  torch==2.0.0
8
- transformers==4.41.0
9
  beautifulsoup4>=4.9
10
  wikipedia==1.4.0
 
1
+ accelerate
2
+ bitsandbytes
3
+ gradio
4
+ spaces
5
+ transformers
6
+ timm
7
  scipy==1.13.0
8
  sentencepiece==0.2.0
 
9
  torch==2.0.0
 
10
  beautifulsoup4>=4.9
11
  wikipedia==1.4.0