Spaces:

hogepodge
/

pydata-sentiment

Sleeping

App Files Files Community

Chris Hoge commited on Nov 2, 2023

Commit

bfa1717

1 Parent(s): 9dad835

Initial commit

Browse files

Files changed (8) hide show

Dockerfile +13 -0
README.md +80 -11
_wsgi.py +126 -0
data/cnn.pt +3 -0
data/vocab_obj.pt +3 -0
docker-compose.yml +29 -0
requirements.txt +6 -0
sentiment_api.py +52 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.8-slim
+ENV PYTHONUNBUFFERED=True \
+    PORT=7860
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . ./
+CMD exec gunicorn --preload --bind :$PORT --workers 1 --threads 8 --timeout 0 _wsgi:app

README.md CHANGED Viewed

@@ -1,11 +1,80 @@
----
-title: Pydata Sentiment
-emoji: 🐨
-colorFrom: red
-colorTo: green
-sdk: docker
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+## Quickstart
+Build and start Machine Learning backend on `http://localhost:9090`
+```bash
+docker-compose up
+```
+Check if it works:
+```bash
+$ curl http://localhost:9090/health
+{"status":"UP"}
+```
+Then connect running backend to Label Studio using Machine Learning settings.
+## Writing your own model
+1. Place your scripts for model training & inference inside root directory. Follow the [API guidelines](#api-guidelines) described bellow. You can put everything in a single file, or create 2 separate one say `my_training_module.py` and `my_inference_module.py`
+2. Write down your python dependencies in `requirements.txt`
+3. Open `wsgi.py` and make your configurations under `init_model_server` arguments:
+    ```python
+    from my_training_module import training_script
+    from my_inference_module import InferenceModel
+    init_model_server(
+        create_model_func=InferenceModel,
+        train_script=training_script,
+        ...
+    ```
+4. Make sure you have docker & docker-compose installed on your system, then run
+    ```bash
+    docker-compose up --build
+    ```
+## API guidelines
+#### Inference module
+In order to create module for inference, you have to declare the following class:
+```python
+from htx.base_model import BaseModel
+# use BaseModel inheritance provided by pyheartex SDK
+class MyModel(BaseModel):
+    # Describe input types (Label Studio object tags names)
+    INPUT_TYPES = ('Image',)
+    # Describe output types (Label Studio control tags names)
+    INPUT_TYPES = ('Choices',)
+    def load(self, resources, **kwargs):
+        """Here you load the model into the memory. resources is a dict returned by training script"""
+        self.model_path = resources["model_path"]
+        self.labels = resources["labels"]
+    def predict(self, tasks, **kwargs):
+        """Here you create list of model results with Label Studio's prediction format, task by task"""
+        predictions = []
+        for task in tasks:
+            # do inference...
+            predictions.append(task_prediction)
+        return predictions
+```
+#### Training module
+Training could be made in a separate environment. The only one convention is that data iterator and working directory are specified as input arguments for training function which outputs JSON-serializable resources consumed later by `load()` function in inference module.
+```python
+def train(input_iterator, working_dir, **kwargs):
+    """Here you gather input examples and output labels and train your model"""
+    resources = {"model_path": "some/model/path", "labels": ["aaa", "bbb", "ccc"]}
+    return resources
+```

_wsgi.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import argparse
+import logging
+import logging.config
+logging.config.dictConfig({
+  "version": 1,
+  "formatters": {
+    "standard": {
+      "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
+    }
+  },
+  "handlers": {
+    "console": {
+      "class": "logging.StreamHandler",
+      "level": "DEBUG",
+      "stream": "ext://sys.stdout",
+      "formatter": "standard"
+    }
+  },
+  "root": {
+    "level": "ERROR",
+    "handlers": [
+      "console"
+    ],
+    "propagate": True
+  }
+})
+from label_studio_ml.api import init_app
+from sentiment_api import SentimentModel
+_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json')
+def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
+    if not os.path.exists(config_path):
+        return dict()
+    with open(config_path) as f:
+        config = json.load(f)
+    assert isinstance(config, dict)
+    return config
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Label studio')
+    parser.add_argument(
+        '-p', '--port', dest='port', type=int, default=9090,
+        help='Server port')
+    parser.add_argument(
+        '--host', dest='host', type=str, default='0.0.0.0',
+        help='Server host')
+    parser.add_argument(
+        '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='),
+        help='Additional LabelStudioMLBase model initialization kwargs')
+    parser.add_argument(
+        '-d', '--debug', dest='debug', action='store_true',
+        help='Switch debug mode')
+    parser.add_argument(
+        '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None,
+        help='Logging level')
+    parser.add_argument(
+        '--model-dir', dest='model_dir', default=os.path.dirname(__file__),
+        help='Directory where models are stored (relative to the project directory)')
+    parser.add_argument(
+        '--check', dest='check', action='store_true',
+        help='Validate model instance before launching server')
+    args = parser.parse_args()
+    # setup logging level
+    if args.log_level:
+        logging.root.setLevel(args.log_level)
+    def isfloat(value):
+        try:
+            float(value)
+            return True
+        except ValueError:
+            return False
+    def parse_kwargs():
+        param = dict()
+        for k, v in args.kwargs:
+            if v.isdigit():
+                param[k] = int(v)
+            elif v == 'True' or v == 'true':
+                param[k] = True
+            elif v == 'False' or v == 'False':
+                param[k] = False
+            elif isfloat(v):
+                param[k] = float(v)
+            else:
+                param[k] = v
+        return param
+    kwargs = get_kwargs_from_config()
+    if args.kwargs:
+        kwargs.update(parse_kwargs())
+    if args.check:
+        print('Check "' + SentimentModel.__name__ + '" instance creation..')
+        model = SentimentModel(**kwargs)
+    app = init_app(
+        model_class=SentimentModel,
+        model_dir=os.environ.get('MODEL_DIR', args.model_dir),
+        redis_queue=os.environ.get('RQ_QUEUE_NAME', 'default'),
+        redis_host=os.environ.get('REDIS_HOST', 'localhost'),
+        redis_port=os.environ.get('REDIS_PORT', 6379),
+        **kwargs
+    )
+    app.run(host=args.host, port=args.port, debug=args.debug)
+else:
+    # for uWSGI use
+    app = init_app(
+        model_class=SentimentModel,
+        model_dir=os.environ.get('MODEL_DIR', os.path.dirname(__file__)),
+        redis_queue=os.environ.get('RQ_QUEUE_NAME', 'default'),
+        redis_host=os.environ.get('REDIS_HOST', 'localhost'),
+        redis_port=os.environ.get('REDIS_PORT', 6379)
+    )

data/cnn.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c72c9a96e9a22f97d53dc3403397b5f7790cd83b0ebadcf0766378902c62713
+size 27607519

data/vocab_obj.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b28acfc849558893989d00a107e6bb776e81e321f89c080b8734dd6e439558d8
+size 367155

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,29 @@

+version: "3.8"
+services:
+  redis:
+    image: redis:alpine
+    container_name: redis
+    hostname: redis
+    volumes:
+      - "./data/redis:/data"
+    expose:
+      - 6379
+  server:
+    container_name: server
+    build: .
+    environment:
+      - MODEL_DIR=/data/models
+      - RQ_QUEUE_NAME=default
+      - REDIS_HOST=redis
+      - REDIS_PORT=6379
+      - LABEL_STUDIO_USE_REDIS=true
+    ports:
+      - 9090:9090
+    depends_on:
+      - redis
+    links:
+      - redis
+    volumes:
+      - "./data/server:/data"
+      - "./logs:/tmp"

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+torchtext
+label-studio-ml
+redis
+rq
+gunicorn

sentiment_api.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from label_studio_ml.model import LabelStudioMLBase
+from sentiment_cnn import SentimentCNN
+import torch
+import torch.nn as nn
+import torchtext
+class SentimentModel(LabelStudioMLBase):
+    def __init__(self, **kwargs):
+        super(SentimentModel, self).__init__(**kwargs)
+        self.sentiment_model = SentimentCNN(
+                state_dict='data/cnn.pt',
+                vocab='data/vocab_obj.pt')
+        self.label_map = {
+            1: "Positive",
+            0: "Negative"}
+    def predict(self, tasks, **kwargs):
+        predictions = []
+        # Get annotation tag first, and extract from_name/to_name keys from the labeling config
+        #  to make predictions
+        from_name, schema = list(self.parsed_label_config.items())[0]
+        to_name = schema['to_name'][0]
+        data_name = schema['inputs'][0]['value']
+        for task in tasks:
+            # load the data and make a prediction with the model
+            text = task['data'][data_name]
+            predicted_class, predicted_prob = self.sentiment_model.predict_sentiment(text)
+            print("%s\nprediction: %s probability: %s" % (text, predicted_class, predicted_prob))
+            label = self.label_map[predicted_class]
+            # for each task, return classification results in the form of "choices" pre-annotations
+            prediction = {
+                'score': float(predicted_prob),
+                'result': [{
+                    'from_name': from_name,
+                    'to_name': to_name,
+                    'type': 'choices',
+                    'value': {
+                        'choices': [
+                            label
+                        ]
+                    },
+                }]
+            }
+            predictions.append(prediction)
+        return predictions