Skip to content

Commit

Permalink
Merge pull request #147 from CDU-data-science-team/138_final_API_updates
Browse files Browse the repository at this point in the history
138 final api updates
  • Loading branch information
yiwen-h authored Oct 6, 2023
2 parents 7eefe7c + 562312d commit 535665f
Show file tree
Hide file tree
Showing 31 changed files with 1,489 additions and 3,365 deletions.
2 changes: 1 addition & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
omit = tests\*
*\__init__.py
*\params.py
api\test_api.py
api\test_api_locally.py
setup.py
test_rules.py

Expand Down
6 changes: 2 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
*__pycache__*
notebooks/*
my.conf
test_results_label/*
test_results_criticality/*
site/
dist/
.vscode/
Expand All @@ -12,6 +10,6 @@ test_multilabel/*
.env
api/rsconnect-python/*
.coverage
api/bert*
*_labels.xlsx
current_best_model/final_bert/bert
current_best_model/final_bert/bert_multilabel
current_best_model/sentiment/bert_sentiment
14 changes: 9 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
FROM python:3.10.12-slim-bookworm
FROM python:3.10.13-slim
VOLUME /data

COPY docker-requirements.txt requirements.txt
COPY pxtextmining /pxtextmining
COPY pyproject.toml /pyproject.toml
COPY docker_README.md /README.md
RUN pip install --upgrade pip setuptools \
&& pip install -r requirements.txt \
&& pip install . \
&& rm -rf /root/.cache

COPY api/bert_sentiment bert_sentiment
COPY current_best_model/sentiment/bert_sentiment bert_sentiment
COPY current_best_model/final_bert/bert_multilabel bert_multilabel
COPY current_best_model/final_svc/final_svc.sav /final_svc.sav
COPY current_best_model/final_xgb/final_xgb.sav /final_xgb.sav
COPY --chmod=755 docker_run.py docker_run.py

LABEL org.opencontainers.image.source=https://github.com/cdu-data-science-team/pxtextmining
Expand Down
233 changes: 24 additions & 209 deletions api/api.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,15 @@
import os
import pickle
from typing import List, Union
from typing import List

import pandas as pd
from fastapi import FastAPI
from pydantic import BaseModel, validator
from tensorflow.keras.saving import load_model

from api import schemas
from pxtextmining.factories.factory_predict_unlabelled_text import (
predict_multilabel_sklearn,
predict_sentiment_bert,
)

minor_cats_v5 = [
"Gratitude/ good experience",
"Negative experience",
"Not assigned",
"Organisation & efficiency",
"Funding & use of financial resources",
"Non-specific praise for staff",
"Non-specific dissatisfaction with staff",
"Staff manner & personal attributes",
"Number & deployment of staff",
"Staff responsiveness",
"Staff continuity",
"Competence & training",
"Unspecified communication",
"Staff listening, understanding & involving patients",
"Information directly from staff during care",
"Information provision & guidance",
"Being kept informed, clarity & consistency of information",
"Service involvement with family/ carers",
"Patient contact with family/ carers",
"Contacting services",
"Appointment arrangements",
"Appointment method",
"Timeliness of care",
"Pain management",
"Diagnosis & triage",
"Referals & continuity of care",
"Length of stay/ duration of care",
"Discharge",
"Care plans",
"Patient records",
"Links with non-NHS organisations",
"Cleanliness, tidiness & infection control",
"Safety & security",
"Provision of medical equipment",
"Service location",
"Transport to/ from services",
"Parking",
"Electronic entertainment",
"Feeling safe",
"Patient appearance & grooming",
"Mental Health Act",
"Equality, Diversity & Inclusion",
"Admission",
"Collecting patients feedback",
"Labelling not possible",
"Environment & Facilities",
"Supplying & understanding medication",
"Activities & access to fresh air",
"Food & drink provision & facilities",
"Sensory experience",
"Impact of treatment/ care",
]
from pxtextmining.params import minor_cats

description = """
This API is for classifying patient experience qualitative data,
Expand All @@ -77,101 +22,17 @@
"name": "multilabel",
"description": "Generate multilabel predictions for given text.",
},
{
"name": "sentiment",
"description": "Generate predicted sentiment for given text.",
},
]


async def load_sentiment_model():
model_path = "bert_sentiment"
if not os.path.exists(model_path):
model_path = os.path.join("api", model_path)
loaded_model = load_model(model_path)
return loaded_model


async def get_sentiment_predictions(
text_to_predict, loaded_model, preprocess_text, additional_features
):
predictions = predict_sentiment_bert(
text_to_predict,
loaded_model,
preprocess_text=preprocess_text,
additional_features=additional_features,
)
return predictions


class Test(BaseModel):
test: str

class Config:
schema_extra = {"example": {"test": "Hello"}}


class ItemIn(BaseModel):
comment_id: str
comment_text: str
question_type: str

class Config:
schema_extra = {
"example": {
"comment_id": "01",
"comment_text": "Nurses were friendly. Parking was awful.",
"question_type": "nonspecific",
}
}

@validator("question_type")
def question_type_validation(cls, v):
if v not in ["what_good", "could_improve", "nonspecific"]:
raise ValueError(
"question_type must be one of what_good, could_improve, or nonspecific"
)
return v


class MultilabelOut(BaseModel):
comment_id: str
comment_text: str
labels: list

class Config:
schema_extra = {
"example": {
"comment_id": "01",
"comment_text": "Nurses were friendly. Parking was awful.",
"labels": ["Staff manner & personal attributes", "Parking"],
}
}


class SentimentOut(BaseModel):
comment_id: str
comment_text: str
sentiment: Union[int, str]

class Config:
schema_extra = {
"example": {
"comment_id": "01",
"comment_text": "Nurses were friendly. Parking was awful.",
"sentiment": 3,
}
}


app = FastAPI(
title="pxtextmining API",
description=description,
version="0.0.1",
version="1.0.0",
contact={
"name": "Patient Experience Qualitative Data Categorisation",
"url": "https://cdu-data-science-team.github.io/PatientExperience-QDC/",
"email": "CDUDataScience@nottshc.nhs.uk",
"email": "chris.beeley1@nhs.net",
},
license_info={
"name": "MIT License",
Expand All @@ -181,99 +42,53 @@ class Config:
)


@app.get("/", response_model=Test, tags=["index"])
@app.get("/", response_model=schemas.Test, tags=["index"])
def index():
return {"test": "Hello"}


@app.post(
"/predict_multilabel", response_model=List[MultilabelOut], tags=["multilabel"]
"/predict_multilabel",
response_model=List[schemas.MultilabelOut],
tags=["multilabel"],
)
async def predict_multilabel(items: List[ItemIn]):
"""Accepts comment ids, comment text and question type as JSON in a POST request. Makes predictions using trained SVC model.
async def predict_multilabel(items: List[schemas.ItemIn]):
"""Accepts comment ids and comment text as JSON in a POST request. Makes predictions using trained SVC model.
Args:
items (List[ItemIn]): JSON list of dictionaries with the following compulsory keys:
- `comment_id` (str)
- `comment_text` (str)
- `question_type` (str)
The 'question_type' must be one of three values: 'nonspecific', 'what_good', and 'could_improve'.
For example, `[{'comment_id': '1', 'comment_text': 'Thank you', 'question_type': 'what_good'},
{'comment_id': '2', 'comment_text': 'Food was cold', 'question_type': 'could_improve'}]`
Returns:
(dict): Keys are: `comment_id`, `comment_text`, and predicted `labels`.
(dict): Keys are: `comment_id` and predicted `labels`.
"""

# Process received data
df = pd.DataFrame([i.dict() for i in items], dtype=str)
df_newindex = df.set_index("comment_id")
if df_newindex.index.duplicated().sum() != 0:
raise ValueError("comment_id must all be unique values")
df_newindex.index.rename("Comment ID", inplace=True)
text_to_predict = df_newindex[["comment_text", "question_type"]]
text_to_predict = text_to_predict.rename(
columns={"comment_text": "FFT answer", "question_type": "FFT_q_standardised"}
df_for_preds = df.copy().rename(
columns={"comment_id": "Comment ID", "comment_text": "FFT answer"}
)
df_for_preds = df_for_preds.set_index("Comment ID")
if df_for_preds.index.duplicated().sum() != 0:
raise ValueError("comment_id must all be unique values")
text_to_predict = df_for_preds["FFT answer"]
# Make predictions
model_path = "svc_minorcats_v5.sav"
model_path = "final_svc.sav"
if not os.path.isfile(model_path):
model_path = os.path.join("api", model_path)
with open(model_path, "rb") as model:
loaded_model = pickle.load(model)
preds_df = predict_multilabel_sklearn(
text_to_predict, loaded_model, labels=minor_cats_v5, additional_features=True
text_to_predict, loaded_model, labels=minor_cats, additional_features=False
)
# Join predicted labels with received data
preds_df["comment_id"] = preds_df.index.astype(str)
merged = pd.merge(df, preds_df, how="left", on="comment_id")
merged["labels"] = merged["labels"].fillna("").apply(list)
for i in merged["labels"].index:
if len(merged["labels"].loc[i]) < 1:
merged["labels"].loc[i].append("Labelling not possible")
return_dict = merged[["comment_id", "comment_text", "labels"]].to_dict(
orient="records"
)
return return_dict


@app.post("/predict_sentiment", response_model=List[SentimentOut], tags=["sentiment"])
async def predict_sentiment(items: List[ItemIn]):
"""Accepts comment ids, comment text and question type as JSON in a POST request. Makes predictions using trained Tensorflow Keras model.
Args:
items (List[ItemIn]): JSON list of dictionaries with the following compulsory keys:
- `comment_id` (str)
- `comment_text` (str)
- `question_type` (str)
The 'question_type' must be one of three values: 'nonspecific', 'what_good', and 'could_improve'.
For example, `[{'comment_id': '1', 'comment_text': 'Thank you', 'question_type': 'what_good'},
{'comment_id': '2', 'comment_text': 'Food was cold', 'question_type': 'could_improve'}]`
Returns:
(dict): Keys are: `comment_id`, `comment_text`, and predicted `labels`.
"""

# Process received data
loaded_model = await load_sentiment_model()
df = pd.DataFrame([i.dict() for i in items], dtype=str)
df_newindex = df.set_index("comment_id")
if df_newindex.index.duplicated().sum() != 0:
raise ValueError("comment_id must all be unique values")
df_newindex.index.rename("Comment ID", inplace=True)
text_to_predict = df_newindex[["comment_text", "question_type"]]
text_to_predict = text_to_predict.rename(
columns={"comment_text": "FFT answer", "question_type": "FFT_q_standardised"}
)
# Make predictions
preds_df = await get_sentiment_predictions(
text_to_predict, loaded_model, preprocess_text=False, additional_features=True
)
# Join predicted labels with received data
preds_df["comment_id"] = preds_df.index.astype(str)
merged = pd.merge(df, preds_df, how="left", on="comment_id")
merged["sentiment"] = merged["sentiment"].fillna("Labelling not possible")
return_dict = merged[["comment_id", "comment_text", "sentiment"]].to_dict(
orient="records"
)
label_list = merged.loc[i, "labels"]
if len(label_list) < 1:
merged.loc[i, "labels"].append("Labelling not possible")
return_dict = merged[["comment_id", "labels"]].to_dict(orient="records")
return return_dict
File renamed without changes.
Loading

0 comments on commit 535665f

Please sign in to comment.