Chris Hoge commited on
Commit
bfa1717
1 Parent(s): 9dad835

Initial commit

Browse files
Files changed (8) hide show
  1. Dockerfile +13 -0
  2. README.md +80 -11
  3. _wsgi.py +126 -0
  4. data/cnn.pt +3 -0
  5. data/vocab_obj.pt +3 -0
  6. docker-compose.yml +29 -0
  7. requirements.txt +6 -0
  8. sentiment_api.py +52 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8-slim
2
+
3
+ ENV PYTHONUNBUFFERED=True \
4
+ PORT=7860
5
+
6
+ WORKDIR /app
7
+ COPY requirements.txt .
8
+
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ COPY . ./
12
+
13
+ CMD exec gunicorn --preload --bind :$PORT --workers 1 --threads 8 --timeout 0 _wsgi:app
README.md CHANGED
@@ -1,11 +1,80 @@
1
- ---
2
- title: Pydata Sentiment
3
- emoji: 🐨
4
- colorFrom: red
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Quickstart
2
+
3
+ Build and start Machine Learning backend on `http://localhost:9090`
4
+
5
+ ```bash
6
+ docker-compose up
7
+ ```
8
+
9
+ Check if it works:
10
+
11
+ ```bash
12
+ $ curl http://localhost:9090/health
13
+ {"status":"UP"}
14
+ ```
15
+
16
+ Then connect running backend to Label Studio using Machine Learning settings.
17
+
18
+
19
+ ## Writing your own model
20
+ 1. Place your scripts for model training & inference inside root directory. Follow the [API guidelines](#api-guidelines) described bellow. You can put everything in a single file, or create 2 separate one say `my_training_module.py` and `my_inference_module.py`
21
+
22
+ 2. Write down your python dependencies in `requirements.txt`
23
+
24
+ 3. Open `wsgi.py` and make your configurations under `init_model_server` arguments:
25
+ ```python
26
+ from my_training_module import training_script
27
+ from my_inference_module import InferenceModel
28
+
29
+ init_model_server(
30
+ create_model_func=InferenceModel,
31
+ train_script=training_script,
32
+ ...
33
+ ```
34
+
35
+ 4. Make sure you have docker & docker-compose installed on your system, then run
36
+ ```bash
37
+ docker-compose up --build
38
+ ```
39
+
40
+ ## API guidelines
41
+
42
+
43
+ #### Inference module
44
+ In order to create module for inference, you have to declare the following class:
45
+
46
+ ```python
47
+ from htx.base_model import BaseModel
48
+
49
+ # use BaseModel inheritance provided by pyheartex SDK
50
+ class MyModel(BaseModel):
51
+
52
+ # Describe input types (Label Studio object tags names)
53
+ INPUT_TYPES = ('Image',)
54
+
55
+ # Describe output types (Label Studio control tags names)
56
+ INPUT_TYPES = ('Choices',)
57
+
58
+ def load(self, resources, **kwargs):
59
+ """Here you load the model into the memory. resources is a dict returned by training script"""
60
+ self.model_path = resources["model_path"]
61
+ self.labels = resources["labels"]
62
+
63
+ def predict(self, tasks, **kwargs):
64
+ """Here you create list of model results with Label Studio's prediction format, task by task"""
65
+ predictions = []
66
+ for task in tasks:
67
+ # do inference...
68
+ predictions.append(task_prediction)
69
+ return predictions
70
+ ```
71
+
72
+ #### Training module
73
+ Training could be made in a separate environment. The only one convention is that data iterator and working directory are specified as input arguments for training function which outputs JSON-serializable resources consumed later by `load()` function in inference module.
74
+
75
+ ```python
76
+ def train(input_iterator, working_dir, **kwargs):
77
+ """Here you gather input examples and output labels and train your model"""
78
+ resources = {"model_path": "some/model/path", "labels": ["aaa", "bbb", "ccc"]}
79
+ return resources
80
+ ```
_wsgi.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ import logging
4
+ import logging.config
5
+
6
+ logging.config.dictConfig({
7
+ "version": 1,
8
+ "formatters": {
9
+ "standard": {
10
+ "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
11
+ }
12
+ },
13
+ "handlers": {
14
+ "console": {
15
+ "class": "logging.StreamHandler",
16
+ "level": "DEBUG",
17
+ "stream": "ext://sys.stdout",
18
+ "formatter": "standard"
19
+ }
20
+ },
21
+ "root": {
22
+ "level": "ERROR",
23
+ "handlers": [
24
+ "console"
25
+ ],
26
+ "propagate": True
27
+ }
28
+ })
29
+
30
+ from label_studio_ml.api import init_app
31
+ from sentiment_api import SentimentModel
32
+
33
+
34
+ _DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json')
35
+
36
+
37
+ def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
38
+ if not os.path.exists(config_path):
39
+ return dict()
40
+ with open(config_path) as f:
41
+ config = json.load(f)
42
+ assert isinstance(config, dict)
43
+ return config
44
+
45
+
46
+ if __name__ == "__main__":
47
+ parser = argparse.ArgumentParser(description='Label studio')
48
+ parser.add_argument(
49
+ '-p', '--port', dest='port', type=int, default=9090,
50
+ help='Server port')
51
+ parser.add_argument(
52
+ '--host', dest='host', type=str, default='0.0.0.0',
53
+ help='Server host')
54
+ parser.add_argument(
55
+ '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='),
56
+ help='Additional LabelStudioMLBase model initialization kwargs')
57
+ parser.add_argument(
58
+ '-d', '--debug', dest='debug', action='store_true',
59
+ help='Switch debug mode')
60
+ parser.add_argument(
61
+ '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None,
62
+ help='Logging level')
63
+ parser.add_argument(
64
+ '--model-dir', dest='model_dir', default=os.path.dirname(__file__),
65
+ help='Directory where models are stored (relative to the project directory)')
66
+ parser.add_argument(
67
+ '--check', dest='check', action='store_true',
68
+ help='Validate model instance before launching server')
69
+
70
+ args = parser.parse_args()
71
+
72
+ # setup logging level
73
+ if args.log_level:
74
+ logging.root.setLevel(args.log_level)
75
+
76
+ def isfloat(value):
77
+ try:
78
+ float(value)
79
+ return True
80
+ except ValueError:
81
+ return False
82
+
83
+ def parse_kwargs():
84
+ param = dict()
85
+ for k, v in args.kwargs:
86
+ if v.isdigit():
87
+ param[k] = int(v)
88
+ elif v == 'True' or v == 'true':
89
+ param[k] = True
90
+ elif v == 'False' or v == 'False':
91
+ param[k] = False
92
+ elif isfloat(v):
93
+ param[k] = float(v)
94
+ else:
95
+ param[k] = v
96
+ return param
97
+
98
+ kwargs = get_kwargs_from_config()
99
+
100
+ if args.kwargs:
101
+ kwargs.update(parse_kwargs())
102
+
103
+ if args.check:
104
+ print('Check "' + SentimentModel.__name__ + '" instance creation..')
105
+ model = SentimentModel(**kwargs)
106
+
107
+ app = init_app(
108
+ model_class=SentimentModel,
109
+ model_dir=os.environ.get('MODEL_DIR', args.model_dir),
110
+ redis_queue=os.environ.get('RQ_QUEUE_NAME', 'default'),
111
+ redis_host=os.environ.get('REDIS_HOST', 'localhost'),
112
+ redis_port=os.environ.get('REDIS_PORT', 6379),
113
+ **kwargs
114
+ )
115
+
116
+ app.run(host=args.host, port=args.port, debug=args.debug)
117
+
118
+ else:
119
+ # for uWSGI use
120
+ app = init_app(
121
+ model_class=SentimentModel,
122
+ model_dir=os.environ.get('MODEL_DIR', os.path.dirname(__file__)),
123
+ redis_queue=os.environ.get('RQ_QUEUE_NAME', 'default'),
124
+ redis_host=os.environ.get('REDIS_HOST', 'localhost'),
125
+ redis_port=os.environ.get('REDIS_PORT', 6379)
126
+ )
data/cnn.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c72c9a96e9a22f97d53dc3403397b5f7790cd83b0ebadcf0766378902c62713
3
+ size 27607519
data/vocab_obj.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b28acfc849558893989d00a107e6bb776e81e321f89c080b8734dd6e439558d8
3
+ size 367155
docker-compose.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.8"
2
+
3
+ services:
4
+ redis:
5
+ image: redis:alpine
6
+ container_name: redis
7
+ hostname: redis
8
+ volumes:
9
+ - "./data/redis:/data"
10
+ expose:
11
+ - 6379
12
+ server:
13
+ container_name: server
14
+ build: .
15
+ environment:
16
+ - MODEL_DIR=/data/models
17
+ - RQ_QUEUE_NAME=default
18
+ - REDIS_HOST=redis
19
+ - REDIS_PORT=6379
20
+ - LABEL_STUDIO_USE_REDIS=true
21
+ ports:
22
+ - 9090:9090
23
+ depends_on:
24
+ - redis
25
+ links:
26
+ - redis
27
+ volumes:
28
+ - "./data/server:/data"
29
+ - "./logs:/tmp"
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ torchtext
3
+ label-studio-ml
4
+ redis
5
+ rq
6
+ gunicorn
sentiment_api.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from label_studio_ml.model import LabelStudioMLBase
2
+ from sentiment_cnn import SentimentCNN
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torchtext
7
+
8
+ class SentimentModel(LabelStudioMLBase):
9
+ def __init__(self, **kwargs):
10
+ super(SentimentModel, self).__init__(**kwargs)
11
+
12
+ self.sentiment_model = SentimentCNN(
13
+ state_dict='data/cnn.pt',
14
+ vocab='data/vocab_obj.pt')
15
+
16
+ self.label_map = {
17
+ 1: "Positive",
18
+ 0: "Negative"}
19
+
20
+ def predict(self, tasks, **kwargs):
21
+ predictions = []
22
+
23
+ # Get annotation tag first, and extract from_name/to_name keys from the labeling config
24
+ # to make predictions
25
+ from_name, schema = list(self.parsed_label_config.items())[0]
26
+ to_name = schema['to_name'][0]
27
+ data_name = schema['inputs'][0]['value']
28
+
29
+ for task in tasks:
30
+ # load the data and make a prediction with the model
31
+ text = task['data'][data_name]
32
+ predicted_class, predicted_prob = self.sentiment_model.predict_sentiment(text)
33
+ print("%s\nprediction: %s probability: %s" % (text, predicted_class, predicted_prob))
34
+
35
+ label = self.label_map[predicted_class]
36
+
37
+ # for each task, return classification results in the form of "choices" pre-annotations
38
+ prediction = {
39
+ 'score': float(predicted_prob),
40
+ 'result': [{
41
+ 'from_name': from_name,
42
+ 'to_name': to_name,
43
+ 'type': 'choices',
44
+ 'value': {
45
+ 'choices': [
46
+ label
47
+ ]
48
+ },
49
+ }]
50
+ }
51
+ predictions.append(prediction)
52
+ return predictions