Spaces:

gabcares
/

sepsis-fastapi

Sleeping

App Files Files Community

gabcares commited on Jul 27, 2024

Commit

07eac76

verified ·

1 Parent(s): 2832c92

Source code sepsis FastAPI

Browse files

- RESTFul API
- GraphQL

Files changed (8) hide show

Dockerfile +25 -0
assets/favicon.ico +0 -0
config.py +75 -0
graph_ql.py +151 -0
main.py +9 -0
requirements.txt +198 -0
rest.py +201 -0
utils/pipeline_helper.py +23 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.11.9-slim
+# Copy requirements file
+COPY requirements.txt .
+# Update pip
+RUN pip --timeout=3000 install --no-cache-dir --upgrade pip
+# Install dependecies
+RUN pip --timeout=3000 install --no-cache-dir -r requirements.txt
+# Make project directory
+RUN mkdir -p /src/api/
+# Set working directory
+WORKDIR /src/api
+# Copy API
+COPY . .
+# Expose app port
+EXPOSE 7860
+# Start application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

assets/favicon.ico ADDED Viewed

config.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from pathlib import Path
+# ENV when using standalone uvicorn server running FastAPI in api directory
+ENV_PATH = Path('../../env/online.env')
+ONE_DAY_SEC = 24*60*60
+ONE_WEEK_SEC = ONE_DAY_SEC*7
+PIPELINE_FUNCTION_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/pipeline_func/pipeline_functions.joblib"
+RANDOM_FOREST_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/RandomForestClassifier.joblib"
+XGBOOST_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/XGBClassifier.joblib"
+ADABOOST_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/AdaBoostClassifier.joblib"
+CATBOOST_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/CatBoostClassifier.joblib"
+DECISION_TREE_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/DecisionTreeClassifier.joblib"
+KNN_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/KNeighborsClassifier.joblib"
+LGBM_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/LGBMClassifier.joblib"
+LOG_REG_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/LogisticRegression.joblib"
+SVC_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/SVC.joblib"
+ENCODER_URL = "https://raw.githubusercontent.com/D0nG4667/sepsis_prediction_full_stack/main/dev/models/enc/encoder.joblib"
+ALL_MODELS = {
+    "AdaBoostClassifier": ADABOOST_URL,
+    "CatBoostClassifier": CATBOOST_URL,
+    "DecisionTreeClassifier": DECISION_TREE_URL,
+    "KNeighborsClassifier": KNN_URL,
+    "LGBMClassifier": LGBM_URL,
+    "LogisticRegression": LOG_REG_URL,
+    "RandomForestClassifier": RANDOM_FOREST_URL,
+    "SupportVectorClassifier": SVC_URL,
+    "XGBoostClassifier": XGBOOST_URL
+}
+DESCRIPTION = """
+This API identifies ICU patients at risk of developing sepsis using `9 models` of which `Random Forest Classifier` and `XGBoost Classifier` are the best.\n
+The models were trained on [The John Hopkins University datasets at Kaggle](https://www.kaggle.com/datasets/chaunguynnghunh/sepsis?select=README.md).\n
+### Features
+`PRG:` Plasma glucose\n
+`PL:` Blood Work Result-1 (mu U/ml)\n
+`PR:` Blood Pressure (mm Hg)\n
+`SK:` Blood Work Result-2 (mm)\n
+`TS:` Blood Work Result-3 (mu U/ml)\n
+`M11:` Body mass index (weight in kg/(height in m)^2\n
+`BD2:` Blood Work Result-4 (mu U/ml)\n
+`Age:` patients age (years)\n
+`Insurance:` If a patient holds a valid insurance card\n
+### Results
+**Sepsis prediction:** *Positive* if a patient in ICU will develop a sepsis, and *Negative* otherwise\n
+**Sepsis probability:** In percentage\n
+### GraphQL API
+To explore the GraphQL sub-application (built-with strawberry) to this RESTFul API click the link below.\n
+🍓[GraphQL](/graphql)
+### Let's Connect
+👨‍⚕️ `Gabriel Okundaye`\n
+[<img src="https://upload.wikimedia.org/wikipedia/commons/c/ca/LinkedIn_logo_initials.png" alt="LinkedIn" width="20" height="20">  LinkendIn](https://www.linkedin.com/in/dr-gabriel-okundaye)
+[<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" height="20">  GitHub](https://github.com/D0nG4667/sepsis_prediction_full_stack)
+"""

graph_ql.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import strawberry
+from strawberry.asgi import GraphQL
+import pandas as pd
+import joblib
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing._label import LabelEncoder
+import httpx
+from io import BytesIO
+from typing import Tuple, List, Optional, Union
+from enum import Enum
+from config import RANDOM_FOREST_URL, XGBOOST_URL, ENCODER_URL
+import logging
+# API input features
+@strawberry.enum
+class ModelChoice(Enum):
+    RandomForestClassifier = RANDOM_FOREST_URL
+    XGBoostClassifier = XGBOOST_URL
+@strawberry.input
+class SepsisFeatures:
+    prg: List[int]
+    pl: List[int]
+    pr: List[int]
+    sk: List[int]
+    ts: List[int]
+    m11: List[float]
+    bd2: List[float]
+    age: List[int]
+    insurance: List[int]
+@strawberry.type
+class Url:
+    url: str
+    pipeline_url: str
+    encoder_url: str
+@strawberry.type
+class ResultData:
+    prediction: List[str]
+    probability: List[float]
+@strawberry.type
+class PredictionResponse:
+    execution_msg: str
+    execution_code: int
+    result: ResultData
+@strawberry.type
+class ErrorResponse:
+    execution_msg: str
+    execution_code: int
+    error: Optional[str]
+logging.basicConfig(level=logging.ERROR,
+                    format='%(asctime)s - %(levelname)s - %(message)s')
+async def url_to_data(url: Url) -> BytesIO:
+    async with httpx.AsyncClient() as client:
+        response = await client.get(url)
+        response.raise_for_status()  # Ensure we catch any HTTP errors
+        # Convert response content to BytesIO object
+        data = BytesIO(response.content)
+        return data
+# Load the model pipelines and encoder
+async def load_pipeline(pipeline_url: Url, encoder_url: Url) -> Tuple[Pipeline, LabelEncoder]:
+    pipeline, encoder = None, None
+    try:
+        pipeline: Pipeline = joblib.load(await url_to_data(pipeline_url))
+        encoder: LabelEncoder = joblib.load(await url_to_data(encoder_url))
+    except Exception as e:
+        logging.error(
+            "Omg, an error occurred in loading the pipeline resources: %s", e)
+    finally:
+        return pipeline, encoder
+async def pipeline_classifier(pipeline: Pipeline, encoder: LabelEncoder, data: SepsisFeatures) -> Union[ErrorResponse, PredictionResponse]:
+    msg = 'Execution failed'
+    code = 0
+    output = ErrorResponse(**{'execution_msg': msg,
+                              'execution_code': code, 'error': None})
+    try:
+        # Create dataframe
+        df = pd.DataFrame.from_dict(data.__dict__)
+        # Make prediction
+        preds = pipeline.predict(df)
+        preds_int = [int(pred) for pred in preds]
+        predictions = encoder.inverse_transform(preds_int)
+        probabilities_np = pipeline.predict_proba(df)
+        probabilities = [round(float(max(prob)*100), 2)
+                         for prob in probabilities_np]
+        result = ResultData(**{"prediction": predictions,
+                               "probability": probabilities}
+                            )
+        msg = 'Execution was successful'
+        code = 1
+        output = PredictionResponse(
+            **{'execution_msg': msg,
+               'execution_code': code, 'result': result}
+        )
+    except Exception as e:
+        error = f"Omg, pipeline classifier and/or encoder failure. {e}"
+        output = ErrorResponse(**{'execution_msg': msg,
+                                  'execution_code': code, 'error': error})
+    finally:
+        return output
+@strawberry.type
+class Query:
+    @strawberry.field
+    async def predict_sepsis(self, model: ModelChoice, data: SepsisFeatures) -> Union[ErrorResponse, PredictionResponse]:
+        pipeline_url: Url = model.value
+        pipeline, encoder = await load_pipeline(pipeline_url, ENCODER_URL)
+        output = await pipeline_classifier(pipeline, encoder, data)
+        return output
+# Create the GraphQL Schema
+schema = strawberry.Schema(query=Query)
+# Create the GraphQL application
+graphql_app = GraphQL(schema)

main.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from fastapi.responses import RedirectResponse
+from graph_ql import graphql_app
+from rest import app
+# Add Graph QL Application to the FastAPI RESTFul Application
+app.add_route("/graphql", graphql_app)
+app.add_websocket_route("/graphql", graphql_app)

requirements.txt ADDED Viewed

	@@ -0,0 +1,198 @@

+# aiocache==0.12.2
+aiohttp==3.9.5
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.2.0
+argon2-cffi==21.3.0
+argon2-cffi-bindings==21.2.0
+asttokens==2.4.1
+async-lru==2.0.4
+attrs==23.1.0
+Babel==2.11.0
+beautifulsoup4==4.12.3
+bleach==4.1.0
+blinker==1.8.2
+Brotli==1.0.9
+cachetools==5.4.0
+catboost==1.2.3
+certifi==2024.7.4
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+comm==0.2.2
+contourpy==1.2.1
+cycler==0.12.1
+debugpy==1.8.2
+decorator==5.1.1
+defusedxml==0.7.1
+dnspython==2.6.1
+email_validator==2.2.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+executing==2.0.1
+extra-streamlit-components==0.1.71
+Faker==26.0.0
+fastapi==0.111.0
+fastapi-cache2==0.2.1
+fastapi-cli==0.0.4
+fastjsonschema==2.16.2
+favicon==0.7.0
+filelock==3.15.4
+fonttools==4.53.1
+frozenlist==1.4.1
+fsspec==2024.6.1
+gitdb==4.0.11
+GitPython==3.1.43
+graphql-core==3.2.3
+graphviz==0.20.3
+h11==0.14.0
+htbuilder==0.6.2
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.24.1
+idna==3.7
+imbalanced-learn==0.12.3
+importlib_metadata==8.0.0
+inquirerpy==0.3.4
+# ipykernel==6.29.5
+# ipython==8.26.0
+# ipywidgets==8.1.3
+jedi==0.19.1
+Jinja2==3.1.4
+joblib==1.4.2
+json5==0.9.6
+jsonschema==4.19.2
+jsonschema-specifications==2023.12.1
+# jupyter_client==8.6.2
+# jupyter_core==5.7.2
+# jupyter-events==0.10.0
+# jupyter-lsp==2.2.0
+# jupyter_server==2.14.1
+# jupyter_server_terminals==0.4.4
+# jupyterlab==4.0.11
+# jupyterlab-pygments==0.1.2
+# jupyterlab_server==2.25.1
+# jupyterlab_widgets==3.0.11
+# kaleido==0.1.0.post1
+kiwisolver==1.4.5
+libcst==1.4.0
+lightgbm==4.4.0
+lxml==5.2.2
+Markdown==3.6
+markdown-it-py==3.0.0
+markdownlit==0.0.7
+MarkupSafe==2.1.3
+# matplotlib==3.9.1
+# matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==2.0.4
+more-itertools==10.3.0
+multidict==6.0.5
+# nbclient==0.8.0
+# nbconvert==7.10.0
+# nbformat==5.9.2
+nest_asyncio==1.6.0
+notebook_shim==0.2.3
+numpy==1.26.4
+orjson==3.10.6
+overrides==7.4.0
+packaging==24.1
+pandas==2.2.2
+pandocfilters==1.5.0
+parso==0.8.4
+pendulum==3.0.0
+pfzy==0.3.4
+pickleshare==0.7.5
+pillow==10.4.0
+pip==24.0
+platformdirs==4.2.2
+# plotly==5.22.0
+prometheus-client==0.14.1
+prompt_toolkit==3.0.47
+protobuf==5.27.2
+psutil==6.0.0
+pure_eval==0.2.3
+pyarrow==17.0.0
+pycparser==2.21
+pydantic==2.8.2
+pydantic_core==2.20.1
+pydeck==0.9.1
+Pygments==2.18.0
+pymdown-extensions==10.8.1
+pyparsing==3.1.2
+PySocks==1.7.1
+python-dateutil==2.9.0
+python-dotenv==1.0.1
+python-json-logger==2.0.7
+python-multipart==0.0.9
+pytz==2024.1
+# pywin32==306
+# pywinpty==2.0.10
+PyYAML==6.0.1
+pyzmq==26.0.3
+redis==5.0.7
+referencing==0.35.1
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.1
+rpds-py==0.10.6
+scikit-learn==1.5.0
+scipy==1.14.0
+Send2Trash==1.8.2
+setuptools==69.5.1
+shellingham==1.5.4
+six==1.16.0
+# skops==0.10.0
+smmap==5.0.1
+sniffio==1.3.0
+soupsieve==2.5
+st-annotated-text==4.0.1
+st-theme==1.2.3
+stack-data==0.6.2
+starlette==0.37.2
+strawberry-graphql==0.236.2
+# streamlit==1.36.0
+# streamlit-camera-input-live==0.2.0
+# streamlit-card==1.0.2
+# streamlit-embedcode==0.1.2
+# streamlit-extras==0.4.3
+# streamlit-faker==0.0.3
+# streamlit-image-coordinates==0.1.9
+# streamlit-keyup==0.2.4
+# streamlit-toggle-switch==1.0.2
+# streamlit-vertical-slider==2.5.5
+tabulate==0.9.0
+tenacity==8.5.0
+terminado==0.17.1
+threadpoolctl==3.5.0
+time-machine==2.14.2
+tinycss2==1.2.1
+toml==0.10.2
+toolz==0.12.1
+tornado==6.4.1
+tqdm==4.66.4
+traitlets==5.14.3
+typer==0.12.3
+typing_extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.2
+uvicorn==0.30.1
+validators==0.33.0
+watchdog==4.0.1
+watchfiles==0.22.0
+wcwidth==0.2.13
+webencodings==0.5.1
+webp==0.4.0
+websocket-client==1.8.0
+websockets==12.0
+wheel==0.43.0
+widgetsnbextension==4.0.11
+# win-inet-pton==1.1.0
+xgboost==2.0.3
+yarl==1.9.4
+zipp==3.19.2

rest.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import os
+from dotenv import load_dotenv
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, Query
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi_cache import FastAPICache
+from fastapi_cache.backends.redis import RedisBackend
+from fastapi_cache.coder import PickleCoder
+from fastapi_cache.decorator import cache
+import logging
+from redis import asyncio as aioredis
+from pydantic import BaseModel, Field
+from typing import Tuple, List, Union, Optional
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing._label import LabelEncoder
+import joblib
+import pandas as pd
+import httpx
+from io import BytesIO
+from config import ONE_DAY_SEC, ONE_WEEK_SEC, XGBOOST_URL, RANDOM_FOREST_URL, ENCODER_URL, ENV_PATH, DESCRIPTION, ALL_MODELS
+load_dotenv(ENV_PATH)
+@asynccontextmanager
+async def lifespan(_: FastAPI) -> AsyncIterator[None]:
+    url = os.getenv("REDIS_URL")
+    username = os.getenv("REDIS_USERNAME")
+    password = os.getenv("REDIS_PASSWORD")
+    redis = aioredis.from_url(url=url, username=username,
+                              password=password, encoding="utf8", decode_responses=True)
+    FastAPICache.init(RedisBackend(redis), prefix="fastapi-cache")
+    yield
+# FastAPI Object
+app = FastAPI(
+    title='Sepsis classification',
+    version='1.0.0',
+    description=DESCRIPTION,
+    lifespan=lifespan,
+)
+app.mount("/assets", StaticFiles(directory="assets"), name="assets")
+@app.get('/favicon.ico', include_in_schema=False)
+async def favicon():
+    file_name = "favicon.ico"
+    file_path = os.path.join(app.root_path, "assets", file_name)
+    return FileResponse(path=file_path, headers={"Content-Disposition": "attachment; filename=" + file_name})
+# API input features
+class SepsisFeatures(BaseModel):
+    prg: List[int] = Field(description="PRG: Plasma glucose")
+    pl: List[int] = Field(description="PL: Blood Work Result-1 (mu U/ml)")
+    pr: List[int] = Field(description="PR: Blood Pressure (mm Hg)")
+    sk: List[int] = Field(description="SK: Blood Work Result-2 (mm)")
+    ts: List[int] = Field(description="TS: Blood Work Result-3 (mu U/ml)")
+    m11: List[float] = Field(
+        description="M11: Body mass index (weight in kg/(height in m)^2")
+    bd2: List[float] = Field(description="BD2: Blood Work Result-4 (mu U/ml)")
+    age: List[int] = Field(description="Age: patients age (years)")
+    insurance: List[int] = Field(
+        description="Insurance: If a patient holds a valid insurance card")
+class Url(BaseModel):
+    url: str
+    pipeline_url: str
+    encoder_url: str
+class ResultData(BaseModel):
+    prediction: List[str]
+    probability: List[float]
+class PredictionResponse(BaseModel):
+    execution_msg: str
+    execution_code: int
+    result: ResultData
+class ErrorResponse(BaseModel):
+    execution_msg: str
+    execution_code: int
+    error: Optional[str]
+logging.basicConfig(level=logging.ERROR,
+                    format='%(asctime)s - %(levelname)s - %(message)s')
+# Load the model pipelines and encoder
+# Cache for 1 day
+@cache(expire=ONE_DAY_SEC, namespace='pipeline_resource', coder=PickleCoder)
+async def load_pipeline(pipeline_url: Url, encoder_url: Url) -> Tuple[Pipeline, LabelEncoder]:
+    async def url_to_data(url: Url):
+        async with httpx.AsyncClient() as client:
+            response = await client.get(url)
+            response.raise_for_status()  # Ensure we catch any HTTP errors
+            # Convert response content to BytesIO object
+            data = BytesIO(response.content)
+            return data
+    pipeline, encoder = None, None
+    try:
+        pipeline: Pipeline = joblib.load(await url_to_data(pipeline_url))
+        encoder: LabelEncoder = joblib.load(await url_to_data(encoder_url))
+    except Exception as e:
+        logging.error(
+            "Omg, an error occurred in loading the pipeline resources: %s", e)
+    finally:
+        return pipeline, encoder
+# Endpoints
+# Status endpoint: check if api is online
+@app.get('/')
+@cache(expire=ONE_WEEK_SEC, namespace='status_check')  # Cache for 1 week
+async def status_check():
+    return {"Status": "API is online..."}
+@cache(expire=ONE_DAY_SEC, namespace='pipeline_classifier')  # Cache for 1 day
+async def pipeline_classifier(pipeline: Pipeline, encoder: LabelEncoder, data: SepsisFeatures) -> Union[ErrorResponse, PredictionResponse]:
+    msg = 'Execution failed'
+    code = 0
+    output = ErrorResponse(**{'execution_msg': msg,
+                              'execution_code': code, 'error': None})
+    try:
+        # Create dataframe
+        df = pd.DataFrame.from_dict(data.__dict__)
+        # Make prediction
+        preds = pipeline.predict(df)
+        preds_int = [int(pred) for pred in preds]
+        predictions = encoder.inverse_transform(preds_int)
+        probabilities_np = pipeline.predict_proba(df)
+        probabilities = [round(float(max(prob)*100), 2)
+                         for prob in probabilities_np]
+        result = ResultData(**{"prediction": predictions,
+                            "probability": probabilities})
+        msg = 'Execution was successful'
+        code = 1
+        output = PredictionResponse(
+            **{'execution_msg': msg,
+               'execution_code': code, 'result': result}
+        )
+    except Exception as e:
+        error = f"Omg, pipeline classifier and/or encoder failure. {e}"
+        output = ErrorResponse(**{'execution_msg': msg,
+                                  'execution_code': code, 'error': error})
+    finally:
+        return output
+# Random forest endpoint: classify sepsis with random forest
+@app.post('/api/v1/random_forest/prediction', tags=['Random Forest'])
+async def random_forest_classifier(data: SepsisFeatures) -> Union[ErrorResponse, PredictionResponse]:
+    random_forest_pipeline, encoder = await load_pipeline(RANDOM_FOREST_URL, ENCODER_URL)
+    output = await pipeline_classifier(random_forest_pipeline, encoder, data)
+    return output
+# Xgboost endpoint: classify sepsis with xgboost
+@app.post('/api/v1/xgboost/prediction', tags=['XGBoost'])
+async def xgboost_classifier(data: SepsisFeatures) -> Union[ErrorResponse, PredictionResponse]:
+    xgboost_pipeline, encoder = await load_pipeline(XGBOOST_URL, ENCODER_URL)
+    output = await pipeline_classifier(xgboost_pipeline, encoder, data)
+    return output
+@app.post('/api/v1/prediction', tags=['All Models'])
+async def query_sepsis_prediction(data: SepsisFeatures, model: str = Query('RandomForestClassifier', enum=list(ALL_MODELS.keys()))) -> Union[ErrorResponse, PredictionResponse]:
+    pipeline_url: Url = ALL_MODELS[model]
+    pipeline, encoder = await load_pipeline(pipeline_url, ENCODER_URL)
+    output = await pipeline_classifier(pipeline, encoder, data)
+    return output

utils/pipeline_helper.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import pandas as pd
+from typing import Union
+numerical_features = ['prg', 'pl', 'pr', 'sk', 'ts', 'm11', 'bd2', 'age']
+categorical_features = ['insurance']
+new_features = ['age_group']
+def as_category(data: Union[pd.DataFrame | pd.Series]) -> Union[pd.DataFrame | pd.Series]:
+    return data.astype('category')
+def feature_creation(df: pd.DataFrame) -> pd.DataFrame:
+    df_copy = df.copy()
+    if 'age_group' not in df_copy.columns and 'age' in df_copy.columns:
+        df_copy['age_group'] = df_copy['age'].apply(
+            lambda x: '60 and above' if x >= 60 else 'below 60')
+        df_copy['age_group'] = as_category(df_copy['age_group'])
+        df_copy.drop(columns='age', inplace=True)
+    return df_copy