Spaces:

LampOfSocrates
/

hf_gradio_plodcw_group27

Sleeping

Lamp Socrates

Latest commit

4ad296d 9 months ago

3.9 kB

	import uvicorn
	import threading
	from typing import Optional
	from transformers import pipeline
	from transformers import AutoTokenizer, AutoModelForTokenClassification
	import pandas as pd
	#import datasets
	from pprint import pprint

	import gradio as gr
	from transformers import pipeline
	from fastapi import FastAPI
	from pydantic import BaseModel
	from typing import List, Dict

	# Define the FastAPI app
	app = FastAPI()
	model_cache: Optional[object] = None

	def load_model():

	tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
	model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
	# Mapping labels
	id2label = model.config.id2label
	# Print the label mapping
	print(f"Can recognise the following labels {id2label}")

	# Load the NER model and tokenizer from Hugging Face
	#ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
	model = pipeline("ner", model=model, tokenizer = tokenizer)
	return model

	def load_plod_cw_dataset():
	from datasets import load_dataset
	dataset = load_dataset("surrey-nlp/PLOD-CW")
	return dataset

	def get_cached_model():
	global model_cache
	if model_cache is None:
	model_cache = load_model()
	return model_cache

	# Cache the model when the server starts
	model = get_cached_model()



	class Entity(BaseModel):
	entity: str
	score: float
	start: int
	end: int
	word: str

	class NERResponse(BaseModel):
	entities: List[Entity]

	class NERRequest(BaseModel):
	text: str

	@app.get("/hello")
	def read_root():
	return {"message": "Hello, World!"}


	@app.post("/ner", response_model=NERResponse)
	def get_entities(request: NERRequest):
	print(request)
	model = get_cached_model()
	# Use the NER model to detect entities
	entities = model(request.text)
	print(entities[0].keys())
	# Convert entities to the response model
	response_entities = [Entity(**entity) for entity in entities]
	print(response_entities[0])
	return NERResponse(entities=response_entities)

	def get_color_for_label(label: str) -> str:
	# Define a mapping of labels to colors
	color_mapping = {
	"I-LF": "red",
	"B-AC": "blue",
	"LOC": "green",
	# Add more labels and colors as needed
	}
	return color_mapping.get(label, "black") # Default to black if label not found


	# Define the Gradio interface function
	def ner_demo(text):
	model = get_cached_model()
	entities = model(text)
	#return {"entities": entities}

	# Color code the entities
	color_coded_text = text
	for entity in entities:
	#print(entity)
	start, end, label = entity["start"], entity["end"], entity["entity"]
	color = get_color_for_label(label) # You need to define this function
	entity_text = text[start:end]
	colored_entity = f'<span style="color: {color}; font-weight: bold;">{entity_text}</span>'
	color_coded_text = color_coded_text[:start] + colored_entity + color_coded_text[end:]

	return color_coded_text

	PROJECT_INTRO = "This is a HF Spaces hosted Gradio App built by NLP Group 27 . The model has been trained on surrey-nlp/PLOD-CW dataset"
	# Create the Gradio interface
	demo = gr.Interface(
	fn=ner_demo,
	inputs=gr.Textbox(lines=10, placeholder="Enter text here..."),
	outputs="html",
	#outputs=gr.JSON(),
	title="Named Entity Recognition on PLOD-CW ",
	description=f"{PROJECT_INTRO}\n\nEnter text to extract named entities using a NER model."
	)

	# Function to run FastAPI
	def run_fastapi():
	uvicorn.run(app, host="0.0.0.0", port=8000)

	# Function to run Gradio
	def run_gradio():
	demo.launch(server_name="0.0.0.0", server_port=7860)

	# Run both servers in separate threads
	threading.Thread(target=run_fastapi).start()
	threading.Thread(target=run_gradio).start()