Spaces:

MJobe
/

document-vqa-v2

Sleeping

App Files Files Community

MJobe commited on Nov 14, 2024

Commit

694da95

verified ·

1 Parent(s): f0e6e2e

Update main.py

Browse files

Files changed (1) hide show

main.py +45 -35

main.py CHANGED Viewed

@@ -406,53 +406,63 @@ def get_sub_classification(statement: str) -> str:
             return sub_label
     return "None"  # Default to "None" if no keywords match
-@app.post("/classify_with_subcategory/", description="Quickly classify text into predefined categories.")
-async def fast_classify_text(statement: str = Form(...)):
     try:
-        # Check for empty or "N/A" statements
         if not statement or statement.strip().lower() == "n/a":
-            return {"classification": "Note not clear", "confidence": 1.0, "sub_classification": "None", "scores": {}}
-        # Determine main classification based on keywords
-        if any(keyword.lower() in statement.lower() for keyword in change_to_quote_keywords):
-            main_classification = "Change to Quote"
-            sub_classification = "None"
-        elif any(keyword.lower() in statement.lower() for keyword in copy_quote_requested_keywords):
-            main_classification = "Copy Quote Requested"
-            # Perform sub-classification for Copy Quote Requested
-            if "msrp" in statement.lower():
-                sub_classification = "MRSP"
-            elif "all pricing" in statement.lower():
-                sub_classification = "All"
-            elif "direct" in statement.lower():
-                sub_classification = "Direct"
-            else:
-                sub_classification = "None"  # No sub-classification when keywords don’t match
         else:
-            # Call the Hugging Face model for cases where keywords don’t match
             loop = asyncio.get_running_loop()
-            result = await loop.run_in_executor(
-                executor,
-                lambda: nlp_sequence_classification(statement, labels, multi_label=False)
             )
-            main_classification = result["labels"][0]
-            main_confidence = result["scores"][0]
-            scores = dict(zip(result["labels"], result["scores"]))
-            sub_classification = "None"  # Set sub-classification to None for non-matching keywords
-            return {
-                "classification": main_classification,
-                "confidence": main_confidence,
-                "sub_classification": sub_classification,
-                "scores": scores
-            }
     except asyncio.TimeoutError:
         return JSONResponse(content="Classification timed out. Try a shorter input or increase timeout.", status_code=504)
     except HTTPException as http_exc:
         return JSONResponse(content=f"HTTP error: {http_exc.detail}", status_code=http_exc.status_code)
     except Exception as e:
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
 # Set up CORS middleware

             return sub_label
     return "None"  # Default to "None" if no keywords match
+@app.post("/classify_with_subcategory/", response_model=ClassificationResponse, description="Classify text into main categories with subcategories.")
+async def classify_with_subcategory(statement: str = Form(...)) -> ClassificationResponse:
     try:
+        # Check if the statement is empty or "N/A"
         if not statement or statement.strip().lower() == "n/a":
+            return ClassificationResponse(
+                classification="Notes not clear",
+                sub_classification="None",
+                confidence=1.0,
+                scores={"main": 1.0}
+            )
+        # Keyword-based classification override
+        if check_keywords(statement, change_to_quote_keywords):
+            main_best_label = "Change to quote"
+            main_best_score = 1.0  # High confidence since it's a direct match
+        elif check_keywords(statement, copy_quote_requested_keywords):
+            main_best_label = "Copy quote requested"
+            main_best_score = 1.0
         else:
+            # If no keywords matched, perform the main classification using the model
             loop = asyncio.get_running_loop()
+            main_classification_result = await loop.run_in_executor(
+                None,
+                lambda: nlp_sequence_classification(statement, main_labels, multi_label=False)
             )
+            # Extract the best main classification label and confidence score
+            main_best_label = main_classification_result["labels"][0]
+            main_best_score = main_classification_result["scores"][0]
+        # Perform sub-classification only if the main classification is "Copy quote requested"
+        if main_best_label == "Copy quote requested":
+            best_sub_label = get_sub_classification(statement)
+        else:
+            best_sub_label = "None"
+        # Gather the scores for response
+        scores = {"main": main_best_score}
+        if best_sub_label != "None":
+            scores[best_sub_label] = 1.0  # Assign full confidence to sub-classification matches
+        return ClassificationResponse(
+            classification=main_best_label,
+            sub_classification=best_sub_label,
+            confidence=main_best_score,
+            scores=scores
+        )
     except asyncio.TimeoutError:
+        # Handle timeout errors
         return JSONResponse(content="Classification timed out. Try a shorter input or increase timeout.", status_code=504)
     except HTTPException as http_exc:
+        # Handle HTTP errors
         return JSONResponse(content=f"HTTP error: {http_exc.detail}", status_code=http_exc.status_code)
     except Exception as e:
+        # Handle any other errors
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
 # Set up CORS middleware