nbroad
/

deberta-v3-base-prompt-injection-v2-O2

Inference Endpoints

Model card Files Files and versions Community

nbroad HF staff commited on Oct 12, 2024

Commit

f1ad91f

•

1 Parent(s): bfdd558

flexibility for cpu or cuda ep

Files changed (1) hide show

handler.py +13 -2

handler.py CHANGED Viewed

@@ -11,15 +11,26 @@ if torch.backends.cudnn.is_available():
 class EndpointHandler():
     def __init__(self, path=""):
         # load the optimized model
         model = ORTModelForSequenceClassification.from_pretrained(
             path,
             export=False,
-            provider="CUDAExecutionProvider",
         )
         tokenizer = AutoTokenizer.from_pretrained(path)
         # create inference pipeline
-        self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=0)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:

 class EndpointHandler():
     def __init__(self, path=""):
+        on_cuda = torch.cuda.is_available()
         # load the optimized model
+        provider = "CPUExecutionProvider"
+        if on_cuda:
+            provider = "CUDAExecutionProvider"
         model = ORTModelForSequenceClassification.from_pretrained(
             path,
             export=False,
+            provider=provider,
         )
         tokenizer = AutoTokenizer.from_pretrained(path)
+        device = -1
+        if on_cuda:
+            device = 0
         # create inference pipeline
+        self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]: