MadsGalsgaard commited on
Commit
a971cfb
·
verified ·
1 Parent(s): 4dbd8e9

Setting Up environment of llama3.2 model

Browse files
Files changed (1) hide show
  1. app.py +233 -129
app.py CHANGED
@@ -439,144 +439,248 @@
439
 
440
 
441
 
442
- ###########new clientkey
443
 
444
 
445
- import os
446
- import time
447
- import spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  import torch
449
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
450
- import gradio as gr
451
  from threading import Thread
 
 
 
 
 
 
 
 
452
 
453
- MODEL = "THUDM/LongWriter-llama3.1-8b"
454
-
455
- TITLE = "<h1><center>AreaX LLC-llama3.1-8b</center></h1>"
456
-
457
- PLACEHOLDER = """
458
- <center>
459
- <p>Hi! I'm AreaX AI Agent, capable of generating 10,000+ words. How can I assist you today?</p>
460
- </center>
461
- """
462
-
463
- CSS = """
464
- .duplicate-button {
465
- margin: auto !important;
466
- color: white !important;
467
- background: black !important;
468
- border-radius: 100vh !important;
469
- }
470
- h3 {
471
- text-align: center;
472
- }
473
- """
474
-
475
- device = "cuda" if torch.cuda.is_available() else "cpu"
476
-
477
- tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
478
- model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
479
- model = model.eval()
480
-
481
- @spaces.GPU()
482
- def stream_chat(
483
- message: str,
484
- history: list,
485
- system_prompt: str,
486
- temperature: float = 0.5,
487
- max_new_tokens: int = 32768,
488
- top_p: float = 1.0,
489
- top_k: int = 50,
490
- ):
491
- print(f'message: {message}')
492
- print(f'history: {history}')
493
-
494
- full_prompt = f"<<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
495
- for prompt, answer in history:
496
- full_prompt += f"[INST]{prompt}[/INST]{answer}"
497
- full_prompt += f"[INST]{message}[/INST]"
498
-
499
- inputs = tokenizer(full_prompt, truncation=False, return_tensors="pt").to(device)
500
- context_length = inputs.input_ids.shape[-1]
501
-
502
- streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
503
-
504
- generate_kwargs = dict(
505
- inputs=inputs.input_ids,
506
- max_new_tokens=max_new_tokens,
507
- do_sample=True,
508
- top_p=top_p,
509
- top_k=top_k,
510
- temperature=temperature,
511
- num_beams=1,
512
- streamer=streamer,
513
- )
514
-
515
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
516
- thread.start()
517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
  buffer = ""
 
519
  for new_text in streamer:
520
  buffer += new_text
 
 
521
  yield buffer
522
 
523
- chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
524
-
525
- with gr.Blocks(css=CSS, theme="soft") as demo:
526
- gr.HTML(TITLE)
527
- gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
528
- gr.ChatInterface(
529
- fn=stream_chat,
530
- chatbot=chatbot,
531
- fill_height=True,
532
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
533
- additional_inputs=[
534
- gr.Textbox(
535
- value="You are a helpful assistant capable of generating long-form content.",
536
- label="System Prompt",
537
- render=False,
538
- ),
539
- gr.Slider(
540
- minimum=0,
541
- maximum=1,
542
- step=0.1,
543
- value=0.5,
544
- label="Temperature",
545
- render=False,
546
- ),
547
- gr.Slider(
548
- minimum=1024,
549
- maximum=32768,
550
- step=1024,
551
- value=32768,
552
- label="Max new tokens",
553
- render=False,
554
- ),
555
- gr.Slider(
556
- minimum=0.0,
557
- maximum=1.0,
558
- step=0.1,
559
- value=1.0,
560
- label="Top p",
561
- render=False,
562
- ),
563
- gr.Slider(
564
- minimum=1,
565
- maximum=100,
566
- step=1,
567
- value=50,
568
- label="Top k",
569
- render=False,
570
- ),
571
  ],
572
- # examples=[
573
- # ["Write a 5000-word comprehensive guide on machine learning for beginners."],
574
- # ["Create a detailed 3000-word business plan for a sustainable energy startup."],
575
- # ["Compose a 2000-word short story set in a futuristic underwater city."],
576
- # ["Develop a 4000-word research proposal on the potential effects of climate change on global food security."],
577
- # ],
578
- # cache_examples=False,
579
- )
580
-
581
- if __name__ == "__main__":
582
- demo.launch()
 
439
 
440
 
441
 
442
+ ###########new clientkey 04 ruunng chlrhah
443
 
444
 
445
+ # import os
446
+ # import time
447
+ # import spaces
448
+ # import torch
449
+ # from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
450
+ # import gradio as gr
451
+ # from threading import Thread
452
+
453
+ # MODEL = "THUDM/LongWriter-llama3.1-8b"
454
+
455
+ # TITLE = "<h1><center>AreaX LLC-llama3.1-8b</center></h1>"
456
+
457
+ # PLACEHOLDER = """
458
+ # <center>
459
+ # <p>Hi! I'm AreaX AI Agent, capable of generating 10,000+ words. How can I assist you today?</p>
460
+ # </center>
461
+ # """
462
+
463
+ # CSS = """
464
+ # .duplicate-button {
465
+ # margin: auto !important;
466
+ # color: white !important;
467
+ # background: black !important;
468
+ # border-radius: 100vh !important;
469
+ # }
470
+ # h3 {
471
+ # text-align: center;
472
+ # }
473
+ # """
474
+
475
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
476
+
477
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
478
+ # model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
479
+ # model = model.eval()
480
+
481
+ # @spaces.GPU()
482
+ # def stream_chat(
483
+ # message: str,
484
+ # history: list,
485
+ # system_prompt: str,
486
+ # temperature: float = 0.5,
487
+ # max_new_tokens: int = 32768,
488
+ # top_p: float = 1.0,
489
+ # top_k: int = 50,
490
+ # ):
491
+ # print(f'message: {message}')
492
+ # print(f'history: {history}')
493
+
494
+ # full_prompt = f"<<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
495
+ # for prompt, answer in history:
496
+ # full_prompt += f"[INST]{prompt}[/INST]{answer}"
497
+ # full_prompt += f"[INST]{message}[/INST]"
498
+
499
+ # inputs = tokenizer(full_prompt, truncation=False, return_tensors="pt").to(device)
500
+ # context_length = inputs.input_ids.shape[-1]
501
+
502
+ # streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
503
+
504
+ # generate_kwargs = dict(
505
+ # inputs=inputs.input_ids,
506
+ # max_new_tokens=max_new_tokens,
507
+ # do_sample=True,
508
+ # top_p=top_p,
509
+ # top_k=top_k,
510
+ # temperature=temperature,
511
+ # num_beams=1,
512
+ # streamer=streamer,
513
+ # )
514
+
515
+ # thread = Thread(target=model.generate, kwargs=generate_kwargs)
516
+ # thread.start()
517
+
518
+ # buffer = ""
519
+ # for new_text in streamer:
520
+ # buffer += new_text
521
+ # yield buffer
522
+
523
+ # chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
524
+
525
+ # with gr.Blocks(css=CSS, theme="soft") as demo:
526
+ # gr.HTML(TITLE)
527
+ # gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
528
+ # gr.ChatInterface(
529
+ # fn=stream_chat,
530
+ # chatbot=chatbot,
531
+ # fill_height=True,
532
+ # additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
533
+ # additional_inputs=[
534
+ # gr.Textbox(
535
+ # value="You are a helpful assistant capable of generating long-form content.",
536
+ # label="System Prompt",
537
+ # render=False,
538
+ # ),
539
+ # gr.Slider(
540
+ # minimum=0,
541
+ # maximum=1,
542
+ # step=0.1,
543
+ # value=0.5,
544
+ # label="Temperature",
545
+ # render=False,
546
+ # ),
547
+ # gr.Slider(
548
+ # minimum=1024,
549
+ # maximum=32768,
550
+ # step=1024,
551
+ # value=32768,
552
+ # label="Max new tokens",
553
+ # render=False,
554
+ # ),
555
+ # gr.Slider(
556
+ # minimum=0.0,
557
+ # maximum=1.0,
558
+ # step=0.1,
559
+ # value=1.0,
560
+ # label="Top p",
561
+ # render=False,
562
+ # ),
563
+ # gr.Slider(
564
+ # minimum=1,
565
+ # maximum=100,
566
+ # step=1,
567
+ # value=50,
568
+ # label="Top k",
569
+ # render=False,
570
+ # ),
571
+ # ],
572
+ # # examples=[
573
+ # # ["Write a 5000-word comprehensive guide on machine learning for beginners."],
574
+ # # ["Create a detailed 3000-word business plan for a sustainable energy startup."],
575
+ # # ["Compose a 2000-word short story set in a futuristic underwater city."],
576
+ # # ["Develop a 4000-word research proposal on the potential effects of climate change on global food security."],
577
+ # # ],
578
+ # # cache_examples=False,
579
+ # )
580
+
581
+ # if __name__ == "__main__":
582
+ # demo.launch()
583
+
584
+
585
+
586
+ ###04
587
+ from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer
588
+ from PIL import Image
589
+ import requests
590
  import torch
 
 
591
  from threading import Thread
592
+ import gradio as gr
593
+ from gradio import FileData
594
+ import time
595
+ import spaces
596
+ ckpt = "meta-llama/Llama-3.2-11B-Vision-Instruct"
597
+ model = MllamaForConditionalGeneration.from_pretrained(ckpt,
598
+ torch_dtype=torch.bfloat16).to("cuda")
599
+ processor = AutoProcessor.from_pretrained(ckpt)
600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
 
602
+ @spaces.GPU
603
+ def bot_streaming(message, history, max_new_tokens=250):
604
+
605
+ txt = message["text"]
606
+ ext_buffer = f"{txt}"
607
+
608
+ messages= []
609
+ images = []
610
+
611
+
612
+ for i, msg in enumerate(history):
613
+ if isinstance(msg[0], tuple):
614
+ messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
615
+ messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
616
+ images.append(Image.open(msg[0][0]).convert("RGB"))
617
+ elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
618
+ # messages are already handled
619
+ pass
620
+ elif isinstance(history[i-1][0], str) and isinstance(msg[0], str): # text only turn
621
+ messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
622
+ messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
623
+
624
+ # add current message
625
+ if len(message["files"]) == 1:
626
+
627
+ if isinstance(message["files"][0], str): # examples
628
+ image = Image.open(message["files"][0]).convert("RGB")
629
+ else: # regular input
630
+ image = Image.open(message["files"][0]["path"]).convert("RGB")
631
+ images.append(image)
632
+ messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
633
+ else:
634
+ messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
635
+
636
+
637
+ texts = processor.apply_chat_template(messages, add_generation_prompt=True)
638
+
639
+ if images == []:
640
+ inputs = processor(text=texts, return_tensors="pt").to("cuda")
641
+ else:
642
+ inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
643
+ streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
644
+
645
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
646
+ generated_text = ""
647
+
648
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
649
+ thread.start()
650
  buffer = ""
651
+
652
  for new_text in streamer:
653
  buffer += new_text
654
+ generated_text_without_prompt = buffer
655
+ time.sleep(0.01)
656
  yield buffer
657
 
658
+
659
+ demo = gr.ChatInterface(fn=bot_streaming, title="Multimodal Llama", examples=[
660
+ [{"text": "Which era does this piece belong to? Give details about the era.", "files":["./examples/rococo.jpg"]},
661
+ 200],
662
+ [{"text": "Where do the droughts happen according to this diagram?", "files":["./examples/weather_events.png"]},
663
+ 250],
664
+ [{"text": "What happens when you take out white cat from this chain?", "files":["./examples/ai2d_test.jpg"]},
665
+ 250],
666
+ [{"text": "How long does it take from invoice date to due date? Be short and concise.", "files":["./examples/invoice.png"]},
667
+ 250],
668
+ [{"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./examples/wat_arun.jpg"]},
669
+ 250],
670
+ ],
671
+ textbox=gr.MultimodalTextbox(),
672
+ additional_inputs = [gr.Slider(
673
+ minimum=10,
674
+ maximum=500,
675
+ value=250,
676
+ step=10,
677
+ label="Maximum number of new tokens to generate",
678
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
  ],
680
+ cache_examples=False,
681
+ description="Try Multimodal Llama by Meta with transformers in this demo. Upload an image, and start chatting about it, or simply try one of the examples below. To learn more about Llama Vision, visit [our blog post](https://huggingface.co/blog/llama32). ",
682
+ stop_btn="Stop Generation",
683
+ fill_height=True,
684
+ multimodal=True)
685
+
686
+ demo.launch(debug=True)