import uvicorn import threading from typing import Optional from transformers import pipeline from transformers import AutoTokenizer, AutoModelForTokenClassification import pandas as pd #import datasets from pprint import pprint import gradio as gr from transformers import pipeline from fastapi import FastAPI from pydantic import BaseModel from typing import List, Dict # Define the FastAPI app app = FastAPI() model_cache: Optional[object] = None def load_model(): tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav") model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav") # Mapping labels id2label = model.config.id2label # Print the label mapping print(f"Can recognise the following labels {id2label}") # Load the NER model and tokenizer from Hugging Face #ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") model = pipeline("ner", model=model, tokenizer = tokenizer) return model def load_plod_cw_dataset(): from datasets import load_dataset dataset = load_dataset("surrey-nlp/PLOD-CW") return dataset def get_cached_model(): global model_cache if model_cache is None: model_cache = load_model() return model_cache # Cache the model when the server starts model = get_cached_model() class Entity(BaseModel): entity: str score: float start: int end: int word: str class NERResponse(BaseModel): entities: List[Entity] class NERRequest(BaseModel): text: str @app.get("/hello") def read_root(): return {"message": "Hello, World!"} @app.post("/ner", response_model=NERResponse) def get_entities(request: NERRequest): print(request) model = get_cached_model() # Use the NER model to detect entities entities = model(request.text) print(entities[0].keys()) # Convert entities to the response model response_entities = [Entity(**entity) for entity in entities] print(response_entities[0]) return NERResponse(entities=response_entities) def get_color_for_label(label: str) -> str: # Define a mapping of labels to colors color_mapping = { "I-LF": "red", "B-AC": "blue", "LOC": "green", # Add more labels and colors as needed } return color_mapping.get(label, "black") # Default to black if label not found # Define the Gradio interface function def ner_demo(text): model = get_cached_model() entities = model(text) #return {"entities": entities} # Color code the entities color_coded_text = text for entity in entities: #print(entity) start, end, label = entity["start"], entity["end"], entity["entity"] color = get_color_for_label(label) # You need to define this function entity_text = text[start:end] colored_entity = f'{entity_text}' color_coded_text = color_coded_text[:start] + colored_entity + color_coded_text[end:] return color_coded_text PROJECT_INTRO = "This is a HF Spaces hosted Gradio App built by NLP Group 27 . The model has been trained on surrey-nlp/PLOD-CW dataset" # Create the Gradio interface demo = gr.Interface( fn=ner_demo, inputs=gr.Textbox(lines=10, placeholder="Enter text here..."), outputs="html", #outputs=gr.JSON(), title="Named Entity Recognition on PLOD-CW ", description=f"{PROJECT_INTRO}\n\nEnter text to extract named entities using a NER model." ) # Function to run FastAPI def run_fastapi(): uvicorn.run(app, host="0.0.0.0", port=8000) # Function to run Gradio def run_gradio(): demo.launch(server_name="0.0.0.0", server_port=7860) # Run both servers in separate threads threading.Thread(target=run_fastapi).start() threading.Thread(target=run_gradio).start()