Spaces:

wenkai
/

FAPM_demo

Runtime error

App Files Files Community

wenkai commited on Jun 25, 2024

Commit

2b26389

verified ·

1 Parent(s): 83df3cd

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -116

app.py CHANGED Viewed

@@ -1,91 +1,28 @@
-import gradio as gr
-from transformers import AutoProcessor, AutoModelForCausalLM
-import spaces
-import torch.nn.functional as F
-import copy
 import torch
-import random
-import numpy as np
 from esm import pretrained, FastaBatchedDataset
-def get_model(model_id):
-    a, b = pretrained.load_model_and_alphabet(model_id.split('/')[1])
-    a.to('cuda').eval()
-    return (a, b)
-models = {
-    'facebook/esm2_t36_3B_UR50D': get_model('facebook/esm2_t36_3B_UR50D'),
-    }
-DESCRIPTION = "Esm2 embedding"
-colormap = ['blue','orange','green','purple','brown','pink','gray','olive','cyan','red',
-            'lime','indigo','violet','aqua','magenta','coral','gold','tan','skyblue']
 @spaces.GPU
-def run_example(protein, model_id='facebook/esm2_t36_3B_UR50D'):
-    model_esm, alphabet = models[model_id]
-    protein_name = 'protein_name'
-    protein_seq = protein
-    include = 'per_tok'
-    repr_layers = [36]
-    truncation_seq_length = 1024
-    toks_per_batch = 4096
-    print("start")
-    dataset = FastaBatchedDataset([protein_name], [protein_seq])
-    print("dataset prepared")
-    batches = dataset.get_batch_indices(toks_per_batch, extra_toks_per_seq=1)
-    print("batches prepared")
-    data_loader = torch.utils.data.DataLoader(
-        dataset, collate_fn=alphabet.get_batch_converter(truncation_seq_length), batch_sampler=batches
-    )
-    print(f"Read sequences")
-    return_contacts = "contacts" in include
-    assert all(-(model_esm.num_layers + 1) <= i <= model_esm.num_layers for i in repr_layers)
-    repr_layers = [(i + model_esm.num_layers + 1) % (model_esm.num_layers + 1) for i in repr_layers]
-    with torch.no_grad():
-        for batch_idx, (labels, strs, toks) in enumerate(data_loader):
-            print(
-                f"Processing {batch_idx + 1} of {len(batches)} batches ({toks.size(0)} sequences)"
-            )
-            if torch.cuda.is_available():
-                toks = toks.to(device="cuda", non_blocking=True)
-            out = model_esm(toks, repr_layers=repr_layers, return_contacts=return_contacts)
-            representations = {
-                layer: t.to(device="cpu") for layer, t in out["representations"].items()
-            }
-            if return_contacts:
-                contacts = out["contacts"].to(device="cpu")
-            for i, label in enumerate(labels):
-                result = {"label": label}
-                truncate_len = min(truncation_seq_length, len(strs[i]))
-                # Call clone on tensors to ensure tensors are not views into a larger representation
-                # See https://github.com/pytorch/pytorch/issues/1995
-                if "per_tok" in include:
-                    result["representations"] = {
-                        layer: t[i, 1: truncate_len + 1].clone()
-                        for layer, t in representations.items()
-                    }
-                if "mean" in include:
-                    result["mean_representations"] = {
-                        layer: t[i, 1: truncate_len + 1].mean(0).clone()
-                        for layer, t in representations.items()
-                    }
-                if "bos" in include:
-                    result["bos_representations"] = {
-                        layer: t[i, 0].clone() for layer, t in representations.items()
-                    }
-                if return_contacts:
-                    result["contacts"] = contacts[i, : truncate_len, : truncate_len].clone()
-            esm_emb = result['representations'][36]
     '''
     inputs = tokenizer([protein], return_tensors="pt", padding=True, truncation=True).to('cuda')
     with torch.no_grad():
@@ -93,40 +30,36 @@ def run_example(protein, model_id='facebook/esm2_t36_3B_UR50D'):
     esm_emb = outputs.last_hidden_state.detach()[0]
     '''
     print("esm embedding generated")
-    esm_emb = F.pad(esm_emb.t(), (0, 1024 - len(esm_emb))).t()
-    torch.save(esm_emb, 'example.pt')
-    return gr.File.update(value="example.pt", visible=True)
-css = """
-  #output {
-    height: 500px;
-    overflow: auto;
-    border: 1px solid #ccc;
-  }
-"""
-with gr.Blocks(css=css) as demo:
-    gr.Markdown(DESCRIPTION)
-    with gr.Tab(label="Esm2 embedding generation"):
-        with gr.Row():
-            with gr.Column():
-                input_protein = gr.Textbox(type="text", label="Upload sequence")
-                model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='microsoft/Florence-2-large')
-                submit_btn = gr.Button(value="Submit")
-            with gr.Column():
-                button = gr.Button("Export")
-                pt = gr.File(interactive=False, visible=False)
-        # gr.Examples(
-        #     examples=[
-        #         ["image1.jpg", 'Object Detection'],
-        #     ],
-        #     inputs=[input_img, task_prompt],
-        #     outputs=[output_text, output_img],
-        #     fn=process_image,
-        #     cache_examples=True,
-        #     label='Try examples'
-        # )
-        button.click(run_example, [input_protein, model_selector], pt)
-demo.launch(debug=True)

+import os
 import torch
+import torch.nn as nn
+import pandas as pd
+import torch.nn.functional as F
+from lavis.models.protein_models.protein_function_opt import Blip2ProteinMistral
+from lavis.models.base_model import FAPMConfig
+import spaces
+import gradio as gr
+from esm_scripts.extract import run_demo
 from esm import pretrained, FastaBatchedDataset
+# from transformers import EsmTokenizer, EsmModel
+# Load the model
+model = Blip2ProteinMistral(config=FAPMConfig(), esm_size='3b')
+model.load_checkpoint("model/checkpoint_mf2.pth")
+model.to('cuda')
 @spaces.GPU
+def generate_caption(protein, prompt):
+    esm_emb = torch.load('data/emb_esm2_3b/P18281.pt')['representations'][36]
     '''
     inputs = tokenizer([protein], return_tensors="pt", padding=True, truncation=True).to('cuda')
     with torch.no_grad():
     esm_emb = outputs.last_hidden_state.detach()[0]
     '''
     print("esm embedding generated")
+    esm_emb = F.pad(esm_emb.t(), (0, 1024 - len(esm_emb))).t().to('cuda')
+    print("esm embedding processed")
+    samples = {'name': ['protein_name'],
+               'image': torch.unsqueeze(esm_emb, dim=0),
+               'text_input': ['none'],
+               'prompt': [prompt]}
+    # Generate the output
+    prediction = model.generate(samples, length_penalty=0., num_beams=15, num_captions=10, temperature=1.,
+                                repetition_penalty=1.0)
+    return prediction
+    # return "test"
+# Define the FAPM interface
+description = """Quick demonstration of the FAPM model for protein function prediction. Upload an protein sequence to generate a function description. Modify the Prompt to provide the taxonomy information.
+The model used in this app is available at [Hugging Face Model Hub](https://huggingface.co/wenkai/FAPM) and the source code can be found on [GitHub](https://github.com/xiangwenkai/FAPM/tree/main)."""
+iface = gr.Interface(
+    fn=generate_caption,
+    inputs=[gr.Textbox(type="text", label="Upload sequence"), gr.Textbox(type="text", label="Prompt")],
+    outputs=gr.Textbox(label="Generated description"),
+    description=description
+)
+# Launch the interface
+iface.launch()