jeremiebasso
commited on
Commit
•
8fe5582
1
Parent(s):
fee204b
initial commit
Browse files- .gitignore +37 -0
- app.py +46 -0
- configuration.py +8 -0
- onnx_model.py +82 -0
- postprocess.py +76 -0
- requirements.txt +7 -0
- theme.py +36 -0
- utils.py +52 -0
.gitignore
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
|
30 |
+
.mypy_cache
|
31 |
+
.vscode
|
32 |
+
.pylintrc
|
33 |
+
.python-version
|
34 |
+
.prettierignore
|
35 |
+
*/.pytest_cache
|
36 |
+
.env
|
37 |
+
model/*
|
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer
|
3 |
+
from transformers.utils import logging as hf_logging
|
4 |
+
|
5 |
+
from configuration import Config
|
6 |
+
from onnx_model import ONNXModel
|
7 |
+
from postprocess import get_sentiment
|
8 |
+
from theme import theme
|
9 |
+
from utils import download_model
|
10 |
+
|
11 |
+
hf_logging.disable_progress_bar()
|
12 |
+
config = Config()
|
13 |
+
|
14 |
+
model_path = download_model("ml-sentiment-adapter", "production")
|
15 |
+
model = ONNXModel.from_dir(model_path)
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(model.model_info.base_model)
|
17 |
+
|
18 |
+
|
19 |
+
def predict(sentence: str):
|
20 |
+
encoding = tokenizer([sentence], truncation=True, return_tensors="np")
|
21 |
+
logits = model(**encoding)
|
22 |
+
score, sentiment = get_sentiment(logits, config.negative_threshold, config.positive_threshold, config.zero)
|
23 |
+
result = {
|
24 |
+
sentiment: score
|
25 |
+
}
|
26 |
+
return result
|
27 |
+
|
28 |
+
|
29 |
+
demo = gr.Interface(
|
30 |
+
fn=predict,
|
31 |
+
inputs=gr.Textbox(label="Customer Review", value="Lettria truelly handled all the overhead of a NLP project !"),
|
32 |
+
outputs=gr.Label(label="Sentiment Level"),
|
33 |
+
title="Lettria's Customer Sentiment Analysis",
|
34 |
+
description="Introducing our Sentiment Analysis API powered by Deep Learning! It provides an easy-to-use solution for analyzing and understanding the sentiment expressed in text. With this API, you can gain valuable insights from customer feedback, social media posts, and reviews by accurately classifying text into positive, negative, or neutral sentiment categories. Seamlessly integrate it into your applications to make data-driven decisions, monitor brand reputation, and enhance customer satisfaction in real-time. Uncover the true sentiment behind text and unlock the power of sentiment analysis today!",
|
35 |
+
examples=[
|
36 |
+
"I absolutely loved the movie! The storyline was captivating, and the acting was superb.",
|
37 |
+
"I'm extremely disappointed with the quality of the product. It broke within a week of use.",
|
38 |
+
"Today has been an average day. Nothing particularly good or bad happened.",
|
39 |
+
"This book is a masterpiece. The author's writing style is brilliant, and the characters are well-developed.",
|
40 |
+
"I'm feeling neutral about the new restaurant. The ambiance was nice, but the food was mediocre.",
|
41 |
+
],
|
42 |
+
theme=theme,
|
43 |
+
allow_flagging="never",
|
44 |
+
)
|
45 |
+
|
46 |
+
demo.launch()
|
configuration.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
2 |
+
|
3 |
+
|
4 |
+
class Config(BaseSettings):
|
5 |
+
model_config = SettingsConfigDict(env_prefix="lt_")
|
6 |
+
negative_threshold: float = -0.65
|
7 |
+
positive_threshold: float = 0.37
|
8 |
+
zero: float = 0
|
onnx_model.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import json
|
4 |
+
from dataclasses import dataclass
|
5 |
+
from pathlib import Path
|
6 |
+
from typing import Any
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
import onnxruntime as ort
|
10 |
+
from loguru import logger
|
11 |
+
from onnxruntime.transformers.io_binding_helper import TypeHelper
|
12 |
+
|
13 |
+
|
14 |
+
@dataclass
|
15 |
+
class ModelInfo:
|
16 |
+
base_model: str
|
17 |
+
|
18 |
+
@classmethod
|
19 |
+
def from_dir(cls, model_dir: Path):
|
20 |
+
with open(model_dir / "metadata.json", "r", encoding="utf-8") as file:
|
21 |
+
data = json.load(file)
|
22 |
+
return ModelInfo(base_model=data["bert_type"])
|
23 |
+
|
24 |
+
|
25 |
+
class ONNXModel:
|
26 |
+
def __init__(self, model: ort.InferenceSession, model_info: ModelInfo) -> None:
|
27 |
+
self.model = model
|
28 |
+
self.model_info = model_info
|
29 |
+
self.model_path = Path(model._model_path) # type: ignore
|
30 |
+
self.model_name = self.model_path.name
|
31 |
+
|
32 |
+
self.providers = model.get_providers()
|
33 |
+
|
34 |
+
if self.providers[0] in ["CUDAExecutionProvider", "TensorrtExecutionProvider"]:
|
35 |
+
self.device = "cuda"
|
36 |
+
else:
|
37 |
+
self.device = "cpu"
|
38 |
+
|
39 |
+
self.io_types = TypeHelper.get_io_numpy_type_map(model)
|
40 |
+
|
41 |
+
self.input_names = [el.name for el in model.get_inputs()]
|
42 |
+
self.output_name = model.get_outputs()[0].name
|
43 |
+
|
44 |
+
@staticmethod
|
45 |
+
def load_session(
|
46 |
+
path: str | Path,
|
47 |
+
provider: str = "CPUExecutionProvider",
|
48 |
+
session_options: ort.SessionOptions | None = None,
|
49 |
+
provider_options: dict[str, Any] | None = None,
|
50 |
+
) -> ort.InferenceSession:
|
51 |
+
providers = [provider]
|
52 |
+
if provider == "TensorrtExecutionProvider":
|
53 |
+
providers.append("CUDAExecutionProvider")
|
54 |
+
elif provider == "CUDAExecutionProvider":
|
55 |
+
providers.append("CPUExecutionProvider")
|
56 |
+
|
57 |
+
if not isinstance(path, str):
|
58 |
+
path = Path(path) / "model.onnx"
|
59 |
+
|
60 |
+
providers_options = None
|
61 |
+
if provider_options is not None:
|
62 |
+
providers_options = [provider_options] + [{} for _ in range(len(providers) - 1)]
|
63 |
+
|
64 |
+
session = ort.InferenceSession(
|
65 |
+
str(path),
|
66 |
+
providers=providers,
|
67 |
+
sess_options=session_options,
|
68 |
+
provider_options=providers_options,
|
69 |
+
)
|
70 |
+
logger.info("Session loaded")
|
71 |
+
return session
|
72 |
+
|
73 |
+
@classmethod
|
74 |
+
def from_dir(cls, model_dir: str | Path) -> ONNXModel:
|
75 |
+
return ONNXModel(ONNXModel.load_session(model_dir), ModelInfo.from_dir(model_dir))
|
76 |
+
|
77 |
+
def __call__(self, **model_inputs: np.ndarray):
|
78 |
+
model_inputs = {
|
79 |
+
input_name: tensor.astype(self.io_types[input_name]) for input_name, tensor in model_inputs.items()
|
80 |
+
}
|
81 |
+
|
82 |
+
return self.model.run([self.output_name], model_inputs)[0]
|
postprocess.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
|
4 |
+
def softmax(x: np.ndarray, axis=1) -> np.ndarray:
|
5 |
+
"""
|
6 |
+
Computes softmax array along the specified axis.
|
7 |
+
"""
|
8 |
+
e_x = np.exp(x)
|
9 |
+
return e_x / e_x.sum(axis=axis, keepdims=True)
|
10 |
+
|
11 |
+
|
12 |
+
def calibrate_sentiment_score(
|
13 |
+
sentiment: float,
|
14 |
+
thresh_neg: float,
|
15 |
+
thresh_pos: float,
|
16 |
+
zero: float = 0,
|
17 |
+
) -> float:
|
18 |
+
if thresh_neg != (zero - 1) / 2:
|
19 |
+
alpha_neg = -(3 * zero - 1 - 4 * thresh_neg) / (2 * zero - 2 - 4 * thresh_neg) / 2
|
20 |
+
if -1 < alpha_neg and alpha_neg < 0:
|
21 |
+
raise ValueError(f"Incorrect value: {thresh_neg=} is too far from -0.5!")
|
22 |
+
if thresh_pos != (zero + 1) / 2:
|
23 |
+
alpha_pos = -(4 * thresh_pos - 1 - 3 * zero) / (2 + 2 * zero - 4 * thresh_pos) / 2
|
24 |
+
if 0 < alpha_pos and alpha_pos < 1:
|
25 |
+
raise ValueError(f"Incorrect value: {thresh_pos=} is too far from 0.5!")
|
26 |
+
if sentiment < 0:
|
27 |
+
return (2 * zero - 2 - 4 * thresh_neg) * sentiment**2 + (3 * zero - 1 - 4 * thresh_neg) * sentiment + zero
|
28 |
+
elif sentiment > 0:
|
29 |
+
return (2 + 2 * zero - 4 * thresh_pos) * sentiment**2 + (4 * thresh_pos - 1 - 3 * zero) * sentiment + zero
|
30 |
+
return zero
|
31 |
+
|
32 |
+
|
33 |
+
def calibrate_sentiment(
|
34 |
+
sentiments: np.ndarray[float],
|
35 |
+
thresh_neg: float,
|
36 |
+
thresh_pos: float,
|
37 |
+
zero: float,
|
38 |
+
) -> np.ndarray[np.float64]:
|
39 |
+
result = np.array(
|
40 |
+
[
|
41 |
+
calibrate_sentiment_score(sentiment, thresh_neg=thresh_neg, thresh_pos=thresh_pos, zero=zero)
|
42 |
+
for sentiment in sentiments
|
43 |
+
]
|
44 |
+
)
|
45 |
+
return result.astype(np.float64)
|
46 |
+
|
47 |
+
|
48 |
+
def scale_value(value, in_min, in_max, out_min, out_max):
|
49 |
+
if in_min <= value <= in_max:
|
50 |
+
scaled_value = (value - in_min) / (in_max - in_min) * (out_max - out_min) + out_min
|
51 |
+
return scaled_value.round(3)
|
52 |
+
else:
|
53 |
+
raise ValueError(f"Input value must be in the range [{in_min}, {in_max}]")
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
def get_sentiment(
|
58 |
+
logits: np.ndarray,
|
59 |
+
thresh_neg: float,
|
60 |
+
thresh_pos: float,
|
61 |
+
zero: float,
|
62 |
+
):
|
63 |
+
probabilities = softmax(logits, axis=1)
|
64 |
+
sentiments = np.matmul(probabilities, np.arange(5)) / 2 - 1
|
65 |
+
score = calibrate_sentiment(
|
66 |
+
sentiments=sentiments,
|
67 |
+
thresh_neg=thresh_neg,
|
68 |
+
thresh_pos=thresh_pos,
|
69 |
+
zero=zero,
|
70 |
+
)[0]
|
71 |
+
if score < -0.33:
|
72 |
+
return scale_value(score, -1, -0.33, 0, 1), "NEGATIVE"
|
73 |
+
elif score < 0.33:
|
74 |
+
return scale_value(score, -0.33, 0.33, 0, 1), "NEUTRAL"
|
75 |
+
else:
|
76 |
+
return scale_value(score, 0.33, 1, 0, 1), "POSITIVE"
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
pydantic
|
3 |
+
pydantic_settings
|
4 |
+
numpy
|
5 |
+
onnxruntime
|
6 |
+
loguru
|
7 |
+
mlflow
|
theme.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
_PRIMARY = gr.themes.colors.Color(
|
4 |
+
name="lt_1",
|
5 |
+
c50="#7A48E7",
|
6 |
+
c100="#7A48E7",
|
7 |
+
c200="#7A48E7",
|
8 |
+
c300="#7A48E7",
|
9 |
+
c400="#7A48E7",
|
10 |
+
c500="#7A48E7",
|
11 |
+
c600="#F7F0FF",
|
12 |
+
c700="#F7F0FF",
|
13 |
+
c800="#F7F0FF",
|
14 |
+
c900="#F7F0FF",
|
15 |
+
c950="#F7F0FF",
|
16 |
+
)
|
17 |
+
|
18 |
+
_SECONDARY = gr.themes.colors.Color(
|
19 |
+
name="lt_2",
|
20 |
+
c50="#F6F0FF",
|
21 |
+
c100="#F6F0FF",
|
22 |
+
c200="#F6F0FF",
|
23 |
+
c300="#F5F0FF",
|
24 |
+
c400="#F5F0FF",
|
25 |
+
c500="#D8CDF6",
|
26 |
+
c600="#D8CDF6",
|
27 |
+
c700="#BCADEC",
|
28 |
+
c800="#BCADEC",
|
29 |
+
c900="#9580D8",
|
30 |
+
c950="#9580D8",
|
31 |
+
)
|
32 |
+
|
33 |
+
theme = gr.themes.Default(
|
34 |
+
primary_hue=_PRIMARY,
|
35 |
+
secondary_hue=_SECONDARY,
|
36 |
+
)
|
utils.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Utils"""
|
2 |
+
from __future__ import annotations
|
3 |
+
|
4 |
+
import json
|
5 |
+
from pathlib import Path
|
6 |
+
from typing import Literal
|
7 |
+
|
8 |
+
from loguru import logger
|
9 |
+
|
10 |
+
|
11 |
+
def download_model(
|
12 |
+
model_name: str,
|
13 |
+
model_stage: Literal["staging", "production"],
|
14 |
+
model_dir: str | Path = "model",
|
15 |
+
) -> Path:
|
16 |
+
"""Download model from mlflow"""
|
17 |
+
import mlflow.artifacts
|
18 |
+
import mlflow.models
|
19 |
+
from mlflow.client import MlflowClient
|
20 |
+
|
21 |
+
logger.info(f"Looking for model {model_name}/{model_stage}")
|
22 |
+
|
23 |
+
if isinstance(model_dir, str):
|
24 |
+
model_dir = Path(model_dir)
|
25 |
+
|
26 |
+
client = MlflowClient()
|
27 |
+
model_versions = client.get_latest_versions(model_name, stages=[model_stage])
|
28 |
+
if len(model_versions) != 1:
|
29 |
+
raise ValueError(f"No model version for {model_name}/{model_stage}")
|
30 |
+
|
31 |
+
artifact_uri = model_versions[0].source
|
32 |
+
model_version = model_versions[0].version
|
33 |
+
|
34 |
+
logger.info(f"Found version {model_version} for {model_name}/{model_stage}")
|
35 |
+
|
36 |
+
model_path = model_dir / artifact_uri.split("/")[-1] # type: ignore
|
37 |
+
if model_path.exists():
|
38 |
+
logger.info(f"Found model in {model_path}, skipping download")
|
39 |
+
return model_path
|
40 |
+
|
41 |
+
logger.info(f"Downloading artifacts {artifact_uri} to {model_dir}")
|
42 |
+
model_path = mlflow.artifacts.download_artifacts(artifact_uri, dst_path=str(model_dir))
|
43 |
+
logger.info(f"Succesfully downloaded {model_name}")
|
44 |
+
|
45 |
+
model_info = mlflow.models.get_model_info(model_path)
|
46 |
+
metadata = model_info.metadata
|
47 |
+
metadata_path = Path(model_path) / "metadata.json"
|
48 |
+
logger.info(f"Saving metadata to {metadata_path}")
|
49 |
+
with open(metadata_path, "w", encoding="utf-8") as file:
|
50 |
+
json.dump(metadata, file)
|
51 |
+
|
52 |
+
return Path(model_path)
|