diff --git a/.github/free_disk_space.sh b/.github/free_disk_space.sh
new file mode 100755
index 0000000000000000000000000000000000000000..1f206f424f89f730933bc09752acd8e8b7999b27
--- /dev/null
+++ b/.github/free_disk_space.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+#
+# The Azure provided machines typically have the following disk allocation:
+# Total space: 85GB
+# Allocated: 67 GB
+# Free: 17 GB
+# This script frees up 28 GB of disk space by deleting unneeded packages and
+# large directories.
+# The Flink end to end tests download and generate more than 17 GB of files,
+# causing unpredictable behavior and build failures.
+#
+echo "=============================================================================="
+echo "Freeing up disk space on CI system"
+echo "=============================================================================="
+
+echo "Listing 100 largest packages"
+dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100
+df -h
+echo "Removing large packages"
+sudo apt-get remove -y '^ghc-8.*'
+sudo apt-get remove -y '^dotnet-.*'
+sudo apt-get remove -y '^llvm-.*'
+sudo apt-get remove -y 'php.*'
+sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel
+sudo apt-get autoremove -y
+sudo apt-get clean
+df -h
+echo "Removing large directories"
+# deleting 15GB
+rm -rf /usr/share/dotnet/
+rm -rf /opt/hostedtoolcache
+df -h
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
deleted file mode 100644
index d16d1caee956187c824408cbf0a13a4640d12407..0000000000000000000000000000000000000000
--- a/.github/pull_request_template.md
+++ /dev/null
@@ -1,23 +0,0 @@
-Changelog
----------
-
-### Added
-
-### Changed
-
-### Deprecated
-
-### Removed
-
-### Fixed
-
-### Security
-
-
-Checklist
----------
-
-- [ ] Test
-- [ ] Self-review
-- [ ] Documentation
-- [ ] Version Bumped Manually*
diff --git a/.github/workflows/ci-cd.yaml b/.github/workflows/ci-cd.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2e09feb457a312d8e3f750498271a8834a5ed911
--- /dev/null
+++ b/.github/workflows/ci-cd.yaml
@@ -0,0 +1,108 @@
+# Copyright Jiaqi Liu
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+name: CI/CD
+
+on:
+ pull_request:
+ push:
+ branches: [master]
+
+jobs:
+ yml-md-style-and-link-checks:
+ uses: QubitPi/hashistack/.github/workflows/yml-md-style-and-link-checks.yml@master
+
+ unit-tests:
+ needs: yml-md-style-and-link-checks
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.10"]
+ test: [
+ {test-file: "mlflow/test_parser.py", requirements-file: "mlflow/requirements.txt"}
+ ]
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: pip3 install -r ${{ matrix.test.requirements-file }}
+ - name: Run all tests
+ run: python3 -m unittest ${{ matrix.test.test-file }}
+
+ mlflow-tests:
+ needs: unit-tests
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.10"]
+ steps:
+ - uses: actions/checkout@v3
+ - name: Remove unnecessary files
+ run: .github/free_disk_space.sh
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: pip3 install -r requirements.txt
+ working-directory: mlflow
+ - name: Build model
+ run: python3 HanLPner.py
+ working-directory: mlflow
+ - name: Build Docker image
+ run: mlflow models build-docker --name "entity-extraction"
+ working-directory: mlflow
+ - name: Run Container
+ run: |
+ cp parser.py models/HanLPner/
+ export ML_MODEL_PATH=${{ github.workspace }}/mlflow/models/HanLPner
+ docker run --rm \
+ --memory=4000m \
+ -p 8080:8080 \
+ -v $ML_MODEL_PATH:/opt/ml/model \
+ -e PYTHONPATH="/opt/ml/model:$PYTHONPATH" \
+ -e GUNICORN_CMD_ARGS="--timeout 60 -k gevent --workers=1" \
+ "entity-extraction" &
+ working-directory: mlflow
+ - name: Wait until container is up
+ run: |
+ npm install -g wait-on
+ wait-on http://127.0.0.1:8080/ping
+ - name: Get status code of a test request and verify it's 200
+ run: |
+ status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H "Content-Type:application/json" --data '{"dataframe_split": {"columns":["text"], "data":[["我爱中国"], ["世界会变、科技会变,但「派昂」不会变,它不会向任何人低头,不会向任何困难低头,甚至不会向「时代」低头。「派昂」,永远引领对科技的热爱。只有那些不向梦想道路上的阻挠认输的人,才配得上与我们一起追逐梦想"]]}}' http://127.0.0.1:8080/invocations)
+ if [ "$status_code" == 200 ]; then
+ exit 0
+ else
+ echo "Integration test failed with a non-200 response from container"
+ exit 1
+ fi
+
+ sync-to-huggingface-space:
+ needs: unit-tests
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+ lfs: true
+ - name: Push to hub
+ run: git push https://QubitPi:$HF_TOKEN@huggingface.co/spaces/QubitPi/lamassu master:main -f
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml
deleted file mode 100644
index 8654f08d26088bd1efcb72ed5603c4adda835f9f..0000000000000000000000000000000000000000
--- a/.github/workflows/ci-cd.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright Jiaqi Liu
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
----
-name: CI/CD
-
-"on":
- pull_request:
- push:
- branches:
- - master
-
-jobs:
- yml-md-style-and-link-checks:
- uses: QubitPi/hashicorp-aws/.github/workflows/yml-md-style-and-link-checks.yml@master
-
- release:
- name: Publish Lamassu To PyPI
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python 3.10
- uses: actions/setup-python@v4
- with:
- python-version: "3.10"
- - name: Package up SDK
- run: python setup.py sdist
- - name: Publish a Python distribution to PyPI
- if: github.ref == 'refs/heads/master'
- uses: pypa/gh-action-pypi-publish@release/v1
- with:
- user: __token__
- password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 1ff05d328c43e214979e1e5732c3b03b37c7f06a..9ff0a7e18a77b2182da788357b6f8b3f6eea2942 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+.venv
.idea/
+mlruns/
+models/
.DS_Store
__pycache__
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
deleted file mode 100644
index fb070911acf72d9d0ec7b76f660c65507d8745c7..0000000000000000000000000000000000000000
--- a/.readthedocs.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright Jiaqi Liu
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-version: 2
-
-build:
- os: ubuntu-22.04
- tools:
- python: "3.11"
-
-sphinx:
- configuration: docs/source/conf.py
-
-python:
- install:
- - method: pip
- path: .
- - requirements: requirements.txt
- - requirements: docs/source/requirements.txt
diff --git a/LICENSE b/LICENSE
index 261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64..7d7429c3ce99a0651ccdb50a3bda4caa6d45b8ce 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.
- Copyright [yyyy] [name of copyright owner]
+ Copyright 2024 Jiaqi Liu
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
diff --git a/README.md b/README.md
index bfc1c77f522895f4c6f557f488ea12c746f292e6..77f8e0fb945655162e7781b255ec3e2e4571df03 100644
--- a/README.md
+++ b/README.md
@@ -1,50 +1,122 @@
-
+---
+title: Lamassu
+emoji: 🤗
+colorFrom: gray
+colorTo: red
+sdk: gradio
+sdk_version: 4.36.1
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
-
+[![Hugging Face space badge]][Hugging Face space URL]
+[![Hugging Face sync status badge]][Hugging Face sync status URL]
+[![MLflow badge]][MLflow URL]
+[![MLflow build status badge]][MLflow build status URL]
+[![Apache License Badge]][Apache License, Version 2.0]
-
+Lamassu is a Named Entity Extraction service that is capable of running on [Hugging Face][Hugging Face space URL] and
+MLflow managed environment. It is the service backing the [Nexus Graph](https://paion-data.github.io/nexusgraph.com/)
-![Python Version][Python Version Badge]
-[![Read the Docs][Read the Docs badge]][Read the Docs URL]
-[![PyPI][PyPI project badge]][PyPI project url]
-[![GitHub Workflow Status][GitHub Workflow Status badge]][GitHub Workflow Status URL]
-![Last Commit][GitHub Last Commit Badge]
-[![Apache License badge]][Apache License URL]
+Hugging Face
+------------
-Lamassu
-=======
+Lamassu is directly available on [Hugging Face space][Hugging Face space URL]. Please check it out.
-Lamassu is a project that empowers individual to agnostically run machine learning algorithms to produce ad-hoc NLP
-features.
+MLflow
+------
-Documentation
--------------
+![Python Version Badge]
-[**Lamassu is in beta development phase for the moment**](https://lamassu.readthedocs.io/en/latest/)
+### Getting Source Code
+
+```console
+git clone git@github.com:QubitPi/lamassu.git
+```
+
+### Running Locally
+
+Create virtual environment and install dependencies:
+
+```console
+cd lamassu/mlflow
+python3 -m venv .venv
+. .venv/bin/activate
+pip3 install -r requirements.txt
+```
+
+Generate Model with
+
+```console
+python3 HanLPner.py
+```
+
+A model directory called "HanLPner" appears under `mlflow/models`. Then build Docker image
+
+```console
+mlflow models build-docker --name "entity-extraction"
+```
+
+and run container with
+
+```console
+cp parser.py models/HanLPner/
+export ML_MODEL_PATH=/absolute/path/to/models/HanLPner
+
+docker run --rm \
+ --memory=4000m \
+ -p 8080:8080 \
+ -v $ML_MODEL_PATH:/opt/ml/model \
+ -e PYTHONPATH="/opt/ml/model:$PYTHONPATH" \
+ -e GUNICORN_CMD_ARGS="--timeout 60 -k gevent --workers=1" \
+ "entity-extraction"
+```
+
+> [!TIP]
+> If `docker.errors.DockerException: Error while fetching server API version: ('Connection aborted.', FileNotFoundError(2, 'No such file or directory'))`
+> error is seen, refer to
+> https://forums.docker.com/t/docker-errors-dockerexception-error-while-fetching-server-api-version-connection-aborted-filenotfounderror-2-no-such-file-or-directory-error-in-python/135637/5
+
+The container runs Gunicorn server inside to serve incoming requests
+
+> [!WARNING]
+> The number of gunicorn worker process MUST be **1** (`--workers=1`) to prevent multiple workers from downloading a
+> HanLP pre-trained model to the same location, which results in runtime error in Docker container. In **native**
+> environment, this error can be
+>
+> ```console
+> OSError: [Errno 39] Directory not empty: '/root/.hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210304_135840'
+> -> '/root/.hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210111_124159'
+> ```
+
+Example query:
+
+```bash
+curl -X POST -H "Content-Type:application/json" \
+ --data '{"dataframe_split": {"columns":["text"], "data":[["我爱中国"], ["世界会变、科技会变,但「派昂」不会变,它不会向任何人低头,不会向任何困难低头,甚至不会向「时代」低头。「派昂」,永远引领对科技的热爱。只有那些不向梦想道路上的阻挠认输的人,才配得上与我们一起追逐梦想"]]}}' \
+ http://127.0.0.1:8080/invocations
+```
+
+[Note the JSON schema of the `--data` value](https://stackoverflow.com/a/75104855)
License
-------
-The use and distribution terms for Lamassu are covered by the
-[Apache License, Version 2.0][Apache License, Version 2.0].
+The use and distribution terms for [lamassu]() are covered by the [Apache License, Version 2.0].
-
+[Apache License Badge]: https://img.shields.io/badge/Apache%202.0-F25910.svg?style=for-the-badge&logo=Apache&logoColor=white
+[Apache License, Version 2.0]: https://www.apache.org/licenses/LICENSE-2.0
-[Apache License badge]: https://img.shields.io/badge/Apache%202.0-F25910.svg?style=for-the-badge&logo=Apache&logoColor=white
-[Apache License URL]: https://www.apache.org/licenses/LICENSE-2.0
-[Apache License, Version 2.0]: http://www.apache.org/licenses/LICENSE-2.0.html
+[Hugging Face space badge]: https://img.shields.io/badge/Hugging%20Face%20Space-lamassu-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white
+[Hugging Face space URL]: https://huggingface.co/spaces/QubitPi/lamassu
-[GitHub Last Commit Badge]: https://img.shields.io/github/last-commit/QubitPi/lamassu/master?logo=github&style=for-the-badge
-[GitHub Workflow Status badge]: https://img.shields.io/github/actions/workflow/status/QubitPi/lamassu/ci-cd.yml?logo=github&style=for-the-badge
-[GitHub Workflow Status URL]: https://github.com/QubitPi/lamassu/actions/workflows/ci-cd.yml
+[Hugging Face sync status badge]: https://img.shields.io/github/actions/workflow/status/QubitPi/lamassu/ci-cd.yaml?branch=master&style=for-the-badge&logo=github&logoColor=white&label=Hugging%20Face%20Sync%20Up
+[Hugging Face sync status URL]: https://github.com/QubitPi/lamassu/actions/workflows/ci-cd.yaml
-[Python Version Badge]: https://img.shields.io/badge/Python-3.10-brightgreen?style=for-the-badge&logo=python&logoColor=white
-[PyPI project badge]: https://img.shields.io/pypi/v/lamassu?logo=pypi&logoColor=white&style=for-the-badge
-[PyPI project url]: https://pypi.org/project/lamassu/
+[MLflow badge]: https://img.shields.io/badge/MLflow%20Supported-0194E2?style=for-the-badge&logo=mlflow&logoColor=white
+[MLflow URL]: https://mlflow.qubitpi.org/
+[MLflow build status badge]: https://img.shields.io/github/actions/workflow/status/QubitPi/lamassu/ci-cd.yaml?branch=master&style=for-the-badge&logo=github&logoColor=white&label=MLflow%20Build
+[MLflow build status URL]: https://github.com/QubitPi/lamassu/actions/workflows/ci-cd.yaml
-[Read the Docs badge]: https://img.shields.io/readthedocs/lamassu?style=for-the-badge&logo=readthedocs&logoColor=white&label=Read%20the%20Docs&labelColor=8CA1AF
-[Read the Docs URL]: https://lamassu.readthedocs.io/en/latest/
+[Python Version Badge]: https://img.shields.io/badge/Python-3.10-brightgreen?style=for-the-badge&logo=python&logoColor=white
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..b42e0dda66119bc1c104264994b6ec36c890f948
--- /dev/null
+++ b/app.py
@@ -0,0 +1,22 @@
+import gradio as gr
+
+import hanlp
+from mlflow.parser import convert_to_knowledge_graph_spec
+
+HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH)
+
+def inference(input):
+ return convert_to_knowledge_graph_spec(HanLP([input])["srl"])
+
+app = gr.Interface(
+ fn=inference,
+ inputs="text",
+ outputs="json",
+ title="Named Entity Recognition",
+ description=("Turning text corpus into graph representation"),
+ examples=[
+ ["我爱中国"],
+ ["世界会变、科技会变,但「派昂」不会变,它不会向任何人低头,不会向任何困难低头,甚至不会向「时代」低头。「派昂」,永远引领对科技的热爱。只有那些不向梦想道路上的阻挠认输的人,才配得上与我们一起追逐梦想"]
+ ],
+)
+app.launch()
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 7144694f8f8a85f9f96e49c75c23db4816458354..0000000000000000000000000000000000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright Jiaqi Liu
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS ?=
-SPHINXBUILD ?= sphinx-build
-SOURCEDIR = source
-BUILDDIR = build
-
-# Put it first so that "make" without argument is like "make help".
-help:
- @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 9534b018135ed7d5caed6298980c55e8b1d2ec82..0000000000000000000000000000000000000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=source
-set BUILDDIR=build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
- echo.
- echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
- echo.installed, then set the SPHINXBUILD environment variable to point
- echo.to the full path of the 'sphinx-build' executable. Alternatively you
- echo.may add the Sphinx directory to PATH.
- echo.
- echo.If you don't have Sphinx installed, grab it from
- echo.http://sphinx-doc.org/
- exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs/source/conf.py b/docs/source/conf.py
deleted file mode 100644
index c706e792af7b2c1bb695a7b2f92acd096fad128d..0000000000000000000000000000000000000000
--- a/docs/source/conf.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-sys.path.insert(0, os.path.abspath('../../'))
-
-
-# -- Project information -----------------------------------------------------
-
-project = 'lamassu'
-copyright = '2023, Jiaqi Liu'
-author = 'Jiaqi Liu'
-
-# The full version, including alpha/beta/rc tags
-release = '0.1.0'
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
- 'sphinx.ext.autodoc',
- 'hoverxref.extension',
- 'notfound.extension',
- 'sphinx.ext.coverage',
- 'sphinx.ext.intersphinx',
- 'sphinx.ext.viewcode',
- "sphinx.ext.graphviz",
- "pyan.sphinx"
-]
-
-# add graphviz options
-graphviz_output_format = "svg"
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
-
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-intersphinx_mapping = {
- 'attrs': ('https://www.attrs.org/en/stable/', None),
- 'coverage': ('https://coverage.readthedocs.io/en/stable', None),
- 'cryptography': ('https://cryptography.io/en/latest/', None),
- 'cssselect': ('https://cssselect.readthedocs.io/en/latest', None),
- 'itemloaders': ('https://itemloaders.readthedocs.io/en/latest/', None),
- 'pytest': ('https://docs.pytest.org/en/latest', None),
- 'python': ('https://docs.python.org/3', None),
- 'sphinx': ('https://www.sphinx-doc.org/en/master', None),
- 'tox': ('https://tox.wiki/en/latest/', None),
- 'twisted': ('https://docs.twisted.org/en/stable/', None),
- 'twistedapi': ('https://docs.twisted.org/en/stable/api/', None),
- 'w3lib': ('https://w3lib.readthedocs.io/en/latest', None),
-}
-intersphinx_disabled_reftypes = []
-
-hoverxref_auto_ref = True
-hoverxref_role_types = {
- "class": "tooltip",
- "command": "tooltip",
- "confval": "tooltip",
- "hoverxref": "tooltip",
- "mod": "tooltip",
- "ref": "tooltip",
- "reqmeta": "tooltip",
- "setting": "tooltip",
- "signal": "tooltip",
-}
-hoverxref_roles = ['command', 'reqmeta', 'setting', 'signal']
diff --git a/docs/source/faq.rst b/docs/source/faq.rst
deleted file mode 100644
index f8a03d9d2551588cebaffa52b45c67e76178151f..0000000000000000000000000000000000000000
--- a/docs/source/faq.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-.. _faq:
-
-==========================
-Frequently Asked Questions
-==========================
-
-
-Python Sphinx Autodoc Is Not Rendering on readthedocs
-=====================================================
-
-The project's dependencies are not specified on RTD, but instead have installed the dependencies locally. Visit the
-project's Builds, click a build, and click "view raw"::
-
- WARNING: autodoc: failed to import module 'rnn' from module 'lamassu'; the following exception was raised:
- No module named 'matplotlib'
-
-To remedy the situation, we must specify that the project's dependencies to be installed. See
-`Specifying Dependencies `_.
-
-
-Generate Sphinx Documentation Locally
-=====================================
-
-This site is auto-generated using `Sphinx `_ with the following command in venv::
-
- cd /path/to/lamassu/
- python3 -m venv venv
- source venv/bin/activate
- pip3 install .
- pip3 install -r docs/source/requirements.txt
- sphinx-build -a -b html docs/source/ /path/to/html/output/dir
- deactivate
-
-.. NOTE::
- The command above works for Linux/UNIX systems. Some commands will
- `differ on Windows OS `_
-
-
-Install Lamassu from Source Locally
-===================================
-
-We recommend creating a virtualenv for your application and activate it
-
-Navigate to the ``lamassu`` root directory and run::
-
- pip3 install -e .
-
-For more general information, please refer to the
-`Hitchhiker's Guide to Python `_: "Structuring Your Project".
-
-
-"module 'collections' has no attribute 'Callable' Error When Running nosetests
-==============================================================================
-
-First, uninstall nose with the following command::
-
- pip3 uninstall -y nose
-
-Second, reinstall nose but with ``--nobinaries`` flag::
-
- pip3 install -U nose --no-binary :all:
-
-Why does this work? At the time of this writing the binary generated by nose was likely generated with a version of
-Python 3.4 or older. This command forces to rebuild from source.
-
-
-No module named 'pytest' while Running Test Directly in PyCharm
-===============================================================
-
-"Right-click" run a ``test_**.py`` file results in::
-
- Traceback (most recent call last):
- File "/Applications/PyCharm CE.app/Contents/plugins/python-ce/helpers/pycharm/_jb_pytest_runner.py", line 5, in
- import pytest
- ModuleNotFoundError: No module named 'pytest'
-
-The solution is going to '**Settings** -> **Tools** -> **Python Integrated Tools**' and scroll down to where it says
-`pytest not found` and there is a **FIX** button. Clicking on it and apply the settings shall resolve the problem
diff --git a/docs/source/img/char-level-language-model.png b/docs/source/img/char-level-language-model.png
deleted file mode 100644
index c7de70483997c9b6ffa3d4066a4aeab83a313b49..0000000000000000000000000000000000000000
Binary files a/docs/source/img/char-level-language-model.png and /dev/null differ
diff --git a/docs/source/img/hello-sampling.png b/docs/source/img/hello-sampling.png
deleted file mode 100644
index f379b52bfa9deea54253a370129c7fa2f9c4a9ba..0000000000000000000000000000000000000000
Binary files a/docs/source/img/hello-sampling.png and /dev/null differ
diff --git a/docs/source/img/hello-sound.png b/docs/source/img/hello-sound.png
deleted file mode 100644
index e3b8ca07fb590c4e3a43fb08abd39ccd1beb1a92..0000000000000000000000000000000000000000
Binary files a/docs/source/img/hello-sound.png and /dev/null differ
diff --git a/docs/source/img/real-vs-sampling.png b/docs/source/img/real-vs-sampling.png
deleted file mode 100644
index 73449a86d6b7c08e00ad3b88be862158e5e5e2be..0000000000000000000000000000000000000000
Binary files a/docs/source/img/real-vs-sampling.png and /dev/null differ
diff --git a/docs/source/img/rnn-4-black-boxes-connected.drawio b/docs/source/img/rnn-4-black-boxes-connected.drawio
deleted file mode 100644
index 69d8a141c5782e813c62b78b264f10144155505d..0000000000000000000000000000000000000000
--- a/docs/source/img/rnn-4-black-boxes-connected.drawio
+++ /dev/null
@@ -1,121 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/source/img/rnn-4-black-boxes-connected.png b/docs/source/img/rnn-4-black-boxes-connected.png
deleted file mode 100644
index 94c23e4e7181ca093068ba5b8cce7f77764d2643..0000000000000000000000000000000000000000
Binary files a/docs/source/img/rnn-4-black-boxes-connected.png and /dev/null differ
diff --git a/docs/source/img/rnn-4-black-boxes.drawio b/docs/source/img/rnn-4-black-boxes.drawio
deleted file mode 100644
index 0a6c1df1637dbb2d21ca1e23da74d12c43ded768..0000000000000000000000000000000000000000
--- a/docs/source/img/rnn-4-black-boxes.drawio
+++ /dev/null
@@ -1,94 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/source/img/rnn-4-black-boxes.png b/docs/source/img/rnn-4-black-boxes.png
deleted file mode 100644
index 5ce50e91f02227a43d1a6dbc973efe23341c2024..0000000000000000000000000000000000000000
Binary files a/docs/source/img/rnn-4-black-boxes.png and /dev/null differ
diff --git a/docs/source/img/rnn-many-to-many-different-ltr.png b/docs/source/img/rnn-many-to-many-different-ltr.png
deleted file mode 100644
index 15694d66bc5bff1b3dfd3ea671a6e87545254127..0000000000000000000000000000000000000000
Binary files a/docs/source/img/rnn-many-to-many-different-ltr.png and /dev/null differ
diff --git a/docs/source/img/rnn-many-to-many-same-ltr.png b/docs/source/img/rnn-many-to-many-same-ltr.png
deleted file mode 100644
index 2f3a449de9c4e9166dd127f3e756769209038c5b..0000000000000000000000000000000000000000
Binary files a/docs/source/img/rnn-many-to-many-same-ltr.png and /dev/null differ
diff --git a/docs/source/img/rnn-many-to-one-ltr.png b/docs/source/img/rnn-many-to-one-ltr.png
deleted file mode 100644
index 47e15d81911c39b70e327a7cf5b9c03a4150402a..0000000000000000000000000000000000000000
Binary files a/docs/source/img/rnn-many-to-one-ltr.png and /dev/null differ
diff --git a/docs/source/img/rnn-multi-sequences.drawio b/docs/source/img/rnn-multi-sequences.drawio
deleted file mode 100644
index aeef8a4b0b5ef1d9ac51560f5a76bc68e524df83..0000000000000000000000000000000000000000
--- a/docs/source/img/rnn-multi-sequences.drawio
+++ /dev/null
@@ -1,250 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/source/img/rnn-multi-sequences.png b/docs/source/img/rnn-multi-sequences.png
deleted file mode 100644
index 4cc38a87bc42179083050f212de460d0359e4049..0000000000000000000000000000000000000000
Binary files a/docs/source/img/rnn-multi-sequences.png and /dev/null differ
diff --git a/docs/source/img/rnn-one-to-many-ltr.png b/docs/source/img/rnn-one-to-many-ltr.png
deleted file mode 100644
index 44e5b80240b4fb3d93916139a0b9fdf693f7e830..0000000000000000000000000000000000000000
Binary files a/docs/source/img/rnn-one-to-many-ltr.png and /dev/null differ
diff --git a/docs/source/img/rnn-one-to-one-ltr.png b/docs/source/img/rnn-one-to-one-ltr.png
deleted file mode 100644
index 7b2bd54543a6dae51b2ee65fc9ac07a7e307941a..0000000000000000000000000000000000000000
Binary files a/docs/source/img/rnn-one-to-one-ltr.png and /dev/null differ
diff --git a/docs/source/img/rnn.drawio b/docs/source/img/rnn.drawio
deleted file mode 100644
index 9769f3267ae4ab73560c34b2b6e0b918fc8d69ff..0000000000000000000000000000000000000000
--- a/docs/source/img/rnn.drawio
+++ /dev/null
@@ -1,149 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/source/img/rnn.png b/docs/source/img/rnn.png
deleted file mode 100644
index 15c41830544c2c9f90e3f557e0809cfa71c9a787..0000000000000000000000000000000000000000
Binary files a/docs/source/img/rnn.png and /dev/null differ
diff --git a/docs/source/img/sampling-sound-wave.gif b/docs/source/img/sampling-sound-wave.gif
deleted file mode 100644
index ed65ab0f4177ee2f28d27cee23a153ee0d1e892a..0000000000000000000000000000000000000000
Binary files a/docs/source/img/sampling-sound-wave.gif and /dev/null differ
diff --git a/docs/source/img/sound-wave.png b/docs/source/img/sound-wave.png
deleted file mode 100644
index bf327a44cf3c243528f22c7d414c41cd83b5959d..0000000000000000000000000000000000000000
Binary files a/docs/source/img/sound-wave.png and /dev/null differ
diff --git a/docs/source/img/speech-processing.png b/docs/source/img/speech-processing.png
deleted file mode 100644
index d1f08f59fe6da813fdaec9b9017f843aa5a78191..0000000000000000000000000000000000000000
Binary files a/docs/source/img/speech-processing.png and /dev/null differ
diff --git a/docs/source/index.rst b/docs/source/index.rst
deleted file mode 100644
index 4444a709fd7c2381fd5644042daea0045c40938f..0000000000000000000000000000000000000000
--- a/docs/source/index.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-=====================
-Lamassu documentation
-=====================
-
-
-Getting help
-============
-
-Having trouble? We'd like to help!
-
-* Try the :doc:`FAQ ` -- it's got answers to some common questions.
-* Looking for specific information? Try the :ref:`genindex` or :ref:`modindex`.
-* Report bugs with lamassu in our `issue tracker`_.
-* Join the Discord community `Lamassu Discord`_.
-
-.. hint::
- * Since methods with two underscores (`__`) cannot be tested due to the
- `name mangling `_. Lamassu requires
- all private methods and attributes to be prefixed with **single underscore prefix (`_`) only**
- * The phrase "Chinese" used throughout this documentation referse to "**Simplified Chinese**", instead of
- "Traditional Chinese"
-
-
-First Steps
-===========
-
-.. toctree::
- :caption: First steps
- :hidden:
-
- intro/install
-
-:doc:`intro/install`
- Get lamassu installed on your computer.
-
-
-Usage
-=====
-
-.. toctree::
- :maxdepth: 100
-
- lamassu
-
-.. _issue tracker: https://github.com/QubitPi/lamassu/issues
-.. _Lamassu Discord: https://discord.com/widget?id=1208960229002317934&theme=dark
diff --git a/docs/source/intro/install.rst b/docs/source/intro/install.rst
deleted file mode 100644
index 826eded954b62eee1e0b2869d97bf62d05875b0f..0000000000000000000000000000000000000000
--- a/docs/source/intro/install.rst
+++ /dev/null
@@ -1,54 +0,0 @@
-.. _intro-install:
-
-==================
-Installation guide
-==================
-
-
-Supported Python versions
-=========================
-
-Lamassu has been tested with Python 3.10. It may work with older versions of Python but it is not guaranteed.
-
-
-Installing Lamassu
-==================
-
-If you are already familiar with installation of Python packages, we can install Lamassu and its dependencies from
-`PyPI `_ with::
-
- pip3 install lamassu
-
-We strongly recommend that you install Lamassu in :ref:`a dedicated virtualenv `, to avoid
-conflicting with your system packages.
-
-If you're using `Anaconda `_ or
-`Miniconda `_, please allow me to
-apologize because I hate those two, so we won't install the package from there.
-
-
-Installing from Source
-======================
-
-When we want to apply a bug fix quicly by installing Lamassu locally, we can use::
-
- git clone https://github.com/QubitPi/lamassu.git
- cd lamassu
- pip3 install -e .
-
-
-.. _intro-using-virtualenv:
-
-Using a virtual environment (recommended)
------------------------------------------
-
-We recommend installing lamassu a virtual environment on all platforms.
-
-Python packages can be installed either globally (a.k.a system wide), or in user-space. We do not recommend installing
-lamassu system wide. Instead, we recommend installing lamassu within a "virtual environment" (:mod:`venv`),
-which keep you from conflicting with already-installed Python system packages.
-
-See :ref:`tut-venv` on how to create your virtual environment.
-
-Once you have created a virtual environment, we can install lamassu inside it with ``pip3``, just like any other
-Python package.
diff --git a/docs/source/lamassu.rst b/docs/source/lamassu.rst
deleted file mode 100644
index 71ced261fc4becb055f047d82d5c8b5186f8f377..0000000000000000000000000000000000000000
--- a/docs/source/lamassu.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-=======
-Lamassu
-=======
-
-.. toctree::
- :maxdepth: 100
-
- rnn/rnn
- speech/sampling.rst
diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt
deleted file mode 100644
index 80ab307d6439ad49814c38493770e58b8493fadb..0000000000000000000000000000000000000000
--- a/docs/source/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-sphinx==5.0.2
-sphinx-hoverxref==1.1.1
-sphinx-notfound-page==0.8
-sphinx-rtd-theme==1.0.0
-pycodestyle
-requests
-pyan3
diff --git a/docs/source/rnn/rnn.rst b/docs/source/rnn/rnn.rst
deleted file mode 100644
index b0331451e43da532af96f2e777c3ec13867c6daf..0000000000000000000000000000000000000000
--- a/docs/source/rnn/rnn.rst
+++ /dev/null
@@ -1,612 +0,0 @@
-================================================
-Introduction to Recurrent Neural Networks (RNNs)
-================================================
-
-.. admonition:: Prerequisite
-
- This article has the following prerequisites:
-
- 1. *Chapter 4 - Artificial Neural Networks* (p. 81) of `MACHINE LEARNING by Mitchell, Thom M. (1997)`_ Paperback
- 2. *Deep Learning (Adaptive Computation and Machine Learning series), Ian Goodfellow*
-
-.. contents:: Table of Contents
- :depth: 2
-
-We all heard of this buz word "LLM" (Large Language Model). But let's put that aside for just a second and look at a
-much simpler one called "character-level language model" where, for example, we input a prefix of a word such as
-"hell" and the model outputs a complete word "hello". That is, this language model predicts the next character of a
-character sequence
-
-This is like a Math function where we have:
-
-.. math::
-
- f(\text{“hell"}) = \text{“hello"}
-
-.. NOTE::
-
- We call inputs like "hell" as **sequence**
-
-How do we obtain a function like this? One approach is to have 4 black boxes, each of which takes a single character as
-input and calculates an output:
-
-.. figure:: ../img/rnn-4-black-boxes.png
- :align: center
- :width: 50%
-
-But one might have noticed that if the 3rd function (box) produces :math:`f(‘l') = ‘l'`, then why would the 4th function
-(box), given the same input, gives a different output of 'o'? This suggest that we should take the "**history**" into
-account. Instead of having :math:`f` depend on 1 parameter, we now have it take 2 parameters.
-
-1: a character;
-2: a variable that summarizes the previous calculations:
-
- .. figure:: ../img/rnn-4-black-boxes-connected.png
- :align: center
- :width: 50%
-
-Now it makes much more sense with:
-
-.. math::
-
- f(\text{‘l'}, h_2) = \text{‘l'}
-
- f(\text{‘l'}, h_3) = \text{‘o'}
-
-But what if we want to predict a longer or shorter word? For example, how about predicting "cat" by "ca"? That's simple,
-we will have 2 black boxes to do the work.
-
-.. figure:: ../img/rnn-multi-sequences.png
- :align: center
-
-What if the function :math:`f` is not smart enough to produce the correct output everytime? We will simply collect a lot
-of examples such as "cat" and "hello", and feed them into the boxes to train them until they can output correct
-vocabulary like "cat" and "hello".
-
-This is the idea behind RNN
-
-- It's recurrent because the boxed function gets invoked repeatedly for each element of the sequence. In the case of our
- character-level language model, element is a character such as "e" and sequence is a string like "hell"
-
- .. figure:: ../img/rnn.png
- :align: center
-
-Each function :math:`f` is a network unit containing 2 perceptrons. One perceptron computes the "history" like
-:math:`h_1`, :math:`h_2`, :math:`h_3`. Its formula is very similar to that of perceptron:
-
-.. math::
-
- h^{(t)} = g_1\left( W_{hh}h^{(t - 1)} + W_{xh}x^{(t)} + b_h \right)
-
-where :math:`t` is the index of the "black boxes" shown above. In our example of "hell",
-:math:`t \in \{ 1, 2, 3, 4 \}`
-
-The other perceptron computes the output like 'e', 'l', 'l', 'o'. We call those value :math:`y` which is computed as
-
-.. math::
-
- o^{(t)} = g_2\left( W_{yh}h^{(t)} + b_o \right)
-
-.. admonition:: What are :math:`g_1` and :math:`g_2`?
-
- They are *activation functions* which are used to change the linear function in a perceptron to a non-linear
- function. Please refer to `MACHINE LEARNING by Mitchell, Thom M. (1997)`_ Paperback (page 96) for why we bump it
- to non-linear
-
- A typical activation function for :math:`g_1` is :math:`tanh`:
-
- .. math::
-
- tanh(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}
-
- In practice, :math:`g_2` is constance, i.e. :math:`g_2 = 1`
-
-
-Forward Propagation Equations for RNN
--------------------------------------
-
-We now develop the forward propagation equations for the RNN. We assume the hyperbolic tangent activation function and
-that the output is discrete, as if the RNN is used to predict words or characters. A natural way to represent discrete
-variables is to regard the output :math:`\boldsymbol{o}` as giving the unnormalized log probabilities of each possible value of
-the discrete variable. We can then apply the softmax (we will disucss softmax function in the next section) operation as
-a post-processing step to obtain a vector :math:`\boldsymbol{\hat{y}}` of normalized probabilities over the output. Forward
-propagation begins with a specification of the initial state :math:`\boldsymbol{h}^{(0)}`. Then, for each time step from
-:math:`t = 1` to :math:`t = \tau`, we apply the following update equations:
-
-.. math::
-
- \color{green} \boxed{
- \begin{gather*}
- \boldsymbol{h}^{(t)} = \tanh\left( \boldsymbol{W_{hh}}h^{(t - 1)} + \boldsymbol{W_{xh}}x^{(t)} + \boldsymbol{b_h} \right) \\ \\
- \boldsymbol{o}^{(t)} = \boldsymbol{W_{yh}}\boldsymbol{h}^{(t)} + \boldsymbol{b_o} \\ \\
- \boldsymbol{\hat{y}} = softmax(\boldsymbol{o}^{(t)})
- \end{gather*}
- }
-
-Note that this recurrent network maps an input sequence to an output sequence of the same length.
-
-Loss Function of RNN
---------------------
-
-According to the discussion of `MACHINE LEARNING by Mitchell, Thom M. (1997)`_, the key for training RNN or any neural
-network is through "specifying a measure for the training error". We call this measure a *loss function*.
-
-In RNN, the total loss for a given sequence of input :math:`\boldsymbol{x}` paired with a sequence of expected
-:math:`\boldsymbol{y}` is the sum of the losses over all the time steps, i.e.
-
-.. math::
-
- \mathcal{L}\left( \{ \boldsymbol{x}^{(1)}, ..., \boldsymbol{x}^{(\tau)} \}, \{ \boldsymbol{y}^{(1)}, ..., \boldsymbol{y}^{(\tau)} \} \right) = \sum_t^{\tau} \mathcal{L}^{(t)} = \sum_t^{\tau}\log\boldsymbol{\hat{y}}^{(t)}
-
-Why would we have :math:`\mathcal{L}^{(t)} = \log\boldsymbol{\hat{y}}^{(t)}`? We need to learn *Softmax Activation* first.
-
-.. admonition:: Softmax Function by `Wikipedia `_
-
- The softmax function takes as input a vector :math:`z` of :math:`K` real numbers, and normalizes it into a
- probability distribution consisting of :math:`K` probabilities proportional to the exponentials of the input
- numbers. That is, prior to applying softmax, some vector components could be negative, or greater than one; and
- might not sum to 1; but after applying softmax, each component will be in the interval :math:`(0, 1)` and the
- components will add up to 1, so that they can be interpreted as probabilities. Furthermore, the larger input
- components will correspond to larger probabilities.
-
- For a vector :math:`z` of :math:`K` real numbers, the the standard (unit) softmax function
- :math:`\sigma: \mathbb{R}^K \mapsto (0, 1)^K`, where :math:`K \ge 1` is defined by
-
- .. math::
-
- \sigma(\boldsymbol{z})_i = \frac{e^{z_i}}{\sum_{j = 1}^Ke^{z_j}}
-
- where :math:`i = 1, 2, ..., K` and :math:`\boldsymbol{x} = (x_1, x_2, ..., x_K) \in \mathbb{R}^K`
-
-In the context of RNN,
-
-.. math::
-
- \sigma(\boldsymbol{o})_i = -\frac{e^{o_i}}{\sum_{j = 1}^ne^{o_j}}
-
-where
-
-- :math:`n` is the length of a sequence feed into the RNN
-- :math:`o_i` is the output by perceptron unit `i`
-- :math:`i = 1, 2, ..., n`,
-- :math:`\boldsymbol{o} = (o_1, o_2, ..., o_n) \in \mathbb{R}^n`
-
-The softmax function takes an N-dimensional vector of arbitrary real values and produces another N-dimensional vector
-with real values in the range (0, 1) that add up to 1.0. It maps :math:`\mathbb{R}^N \rightarrow \mathbb{R}^N`
-
-.. math::
-
- \sigma(\boldsymbol{o}): \begin{pmatrix}o_1\\o_2\\\dots\\o_n\end{pmatrix} \rightarrow \begin{pmatrix}\sigma_1\\\sigma_2\\\dots\\\sigma_n\end{pmatrix}
-
-This property of softmax function that it outputs a probability distribution makes it suitable for probabilistic
-interpretation in classification tasks. Neural networks, however, are commonly trained under a log loss (or
-cross-entropy) regime
-
-We are going to compute the derivative of the softmax function because we will be using it for training our RNN model
-shortly. But before diving in, it is important to keep in mind that Softmax is fundamentally a vector function. It takes
-a vector as input and produces a vector as output; in other words, it has multiple inputs and multiple outputs.
-Therefore, we cannot just ask for "the derivative of softmax"; We should instead specify:
-
-1. Which component (output element) of softmax we're seeking to find the derivative of.
-2. Since softmax has multiple inputs, with respect to which input element the partial derivative is computed.
-
-What we're looking for is the partial derivatives of
-
-.. math::
-
- \frac{\partial \sigma_i}{\partial o_k} = \frac{\partial }{\partial o_k} \frac{e^{o_i}}{\sum_{j = 1}^ne^{o_j}}
-
-
-:math:`\frac{\partial \sigma_i}{\partial o_k}` **is the partial derivative of the i-th output with respect with the
-k-th input**.
-
-We'll be using the quotient rule of derivatives. For :math:`h(x) = \frac{f(x)}{g(x)}` where both :math:`f` and :math:`g`
-are differentiable and :math:`g(x) \ne 0`, The `quotient rule `_ states
-that the derivative of :math:`h(x)` is
-
-.. math::
-
- h'(x) = \frac{f'(x)g(x) - f(x)g'(x)}{g^2(x)}
-
-In our case, we have
-
-.. math::
-
- f'(o_k) = \frac{\partial}{\partial o_k} e^{o_i} = \begin{cases}
- e^{o_k}, & \text{if}\ i = k \\
- 0, & \text{otherwise}
- \end{cases}
-
-.. math::
-
- g'(o_k) = \frac{\partial}{\partial o_k} \sum_{j = 1}^ne^{o_j} = \left( \frac{\partial e^{o_1}}{\partial o_k} + \frac{\partial e^{o_2}}{\partial o_k} + \dots + \frac{\partial e^{o_k}}{\partial o_k} + \dots + \frac{\partial e^{o_n}}{\partial o_k} \right) = \frac{\partial e^{o_k}}{\partial o_k} = e^{o_k}
-
-The rest of it becomes trivial then. When :math:`i = k`,
-
-.. math::
-
- \frac{\partial \sigma_i}{\partial o_k} = \frac{e^{o_k} \sum_{j = 1}^ne^{o_j} - e^{o_k} e^{o_i}}{\left( \sum_{j = 1}^ne^{o_j} \right)^2}
- = \frac{e^{o_i} \sum_{j = 1}^ne^{o_j} - e^{o_i} e^{o_i}}{\left( \sum_{j = 1}^ne^{o_j} \right)^2}
- = \frac{e^{o_i}}{\sum_{j = 1}^ne^{o_j}} \frac{\sum_{j = 1}^ne^{o_j} - e^{o_i}}{\sum_{j = 1}^ne^{o_j}} \\
-
- = \sigma_i\left( \frac{\sum_{j = 1}^ne^{o_j}}{\sum_{j = 1}^ne^{o_j}} - \frac{e^{o_i}}{\sum_{j = 1}^ne^{o_j}} \right)
- = \sigma_i \left( 1 - \sigma_i \right)
-
-When :math:`i \ne k`
-
-.. math::
-
- \frac{\partial \sigma_i}{\partial o_k} = \frac{-e^{o_k} e^{o_i}}{\left( \sum_{j = 1}^ne^{o_j} \right)^2} = -\sigma_i\sigma_k
-
-This concludes the derivative of the softmax function:
-
-.. math::
-
- \frac{\partial \sigma_i}{\partial o_k} = \begin{cases}
- \sigma_i \left( 1 - \sigma_i \right), & \text{if}\ i = k \\
- -\sigma_i\sigma_k, & \text{otherwise}
- \end{cases}
-
-Cross-Entropy
-"""""""""""""
-
-.. admonition:: Cross-Entropy `Wikipedia `_
-
- In information theory, the cross-entropy between two probability distributions :math:`p` and :math:`q` over the same
- underlying set of events measures the average number of bits needed to identify an event drawn from the set if a
- coding scheme used for the set is optimized for an estimated probability distribution :math:`q`, rather than the
- true distribution :math:`p`
-
-Confused? Let's put it in the context of Machine Learning.
-
-Machine Learning sees the world based on probability. The "probability distribution" identifies the various tasks to
-learn. For example, a daily language such as English or Chinese, can be seen as a probability distribution. The
-probability of "name" followed by "is" is far greater than "are" as in "My name is Jack". We call such language
-distribution :math:`p`. The task of RNN (or Machine Learning in general) is to learn an approximated distribution of
-:math:`p`; we call this approximation :math:`q`
-
-"The average number of bits needed" is can be seen as the distance between :math:`p` and :math:`q` given an event. In
-analogy of language, this can be the *quantitative* measure of the deviation between a real language phrase
-"My name is Jack" and "My name are Jack".
-
-At this point, it is easy to image that, in the Machine Learning world, the cross entropy indicates the distance between
-what the model believes the output distribution should be and what the original distribution really is.
-
-Now we have an intuitive understanding of cross entropy, let's formally define it.
-
-The cross-entropy of the discrete probability distribution :math:`q` relative to a distribution :math:`p` over a given
-set is defined as
-
-.. math::
-
- H(p, q) = -\sum_x p(x)\log q(x)
-
-In RNN, the probability distribution of :math:`q(x)` is exactly the softmax function we defined earlier:
-
-.. math::
-
- \mathcal{L} = -\sum_i p(i)\log\sigma(\boldsymbol{o})_i = -\sum_i \log\sigma(\boldsymbol{o})_i = -\log\boldsymbol{\hat{y}}^{(t)}
-
-where
-
-- :math:`\boldsymbol{o}` is the predicted sequence by RNN and :math:`o_i` is the i-th element of the predicted sequence
-
-.. admonition:: What is the Mathematical form of :math:`p(i)` in RNN? Why would it become 1?
-
- By definition, :math:`p(i)` is the *true* distribution whose exact functional form is unknown. In the language of
- Approximation Theory, :math:`p(i)` is the function that RNN is trying to learn or approximate mathematically.
-
- Although the :math:`p(i)` makes the exact form of :math:`\mathcal{L}` unknown, computationally :math:`p(i)` is
- perfectly defined in each training example. Taking our "hello" example:
-
- .. figure:: ../img/char-level-language-model.png
- :align: center
- :width: 60%
-
- The 4 probability distributions of :math:`q(x)` is "reflected" in the **output layer** of this example. They are
- "reflecting" the probability distribution of :math:`q(x)` because they are only :math:`o` values and have not been
- transformed to the :math:`\sigma` distribution yet. But in this case, we are 100% sure that the true probability
- distribution :math:`p(i)` for the 4 outputs are
-
- .. math::
-
- \begin{pmatrix}0\\1\\0\\0\end{pmatrix}, \begin{pmatrix}0\\0\\1\\0\end{pmatrix}, \begin{pmatrix}0\\0\\1\\0\end{pmatrix}, \begin{pmatrix}0\\0\\0\\1\end{pmatrix}
-
- respectively. *That is all we need for calculating the* :math:`\mathcal{L}`
-
-Deriving Gradient Descent Weight Update Rule
---------------------------------------------
-
-*Training a RNN model of is the same thing as searching for the optimal values for the following parameters of these two
-perceptrons*:
-
-1. :math:`W_{xh}`
-2. :math:`W_{hh}`
-3. :math:`W_{yh}`
-4. :math:`b_h`
-5. :math:`b_o`
-
-By the Gradient Descent discussed in `MACHINE LEARNING by Mitchell, Thom M. (1997)`_ tells us we should derive the
-weight updat rule by *taking partial derivatives with respect to all of the variables above*. Let's start with
-:math:`W_{yh}`
-
-`MACHINE LEARNING by Mitchell, Thom M. (1997)`_ has mentioned gradients and partial derivatives as being important for
-an optimization algorithm to update, say, the model weights of a neural network to reach an optimal set of weights. The
-use of partial derivatives permits each weight to be updated independently of the others, by calculating the gradient of
-the error curve with respect to each weight in turn.
-
-Many of the functions that we usually work with in machine learning are *multivariate*, *vector-valued* functions, which
-means that they map multiple real inputs :math:`n` to multiple real outputs :math:`m`:
-
-.. math::
-
- f: \mathbb{R}^n \rightarrow \mathbb{R}^m
-
-In training a neural network, the backpropagation algorithm is responsible for sharing back the error calculated at the
-output layer among the neurons comprising the different hidden layers of the neural network, until it reaches the input.
-
-If our RNN contains only 1 perceptron unit, the error is propagated back by, using the
-`Chain Rule `_ of :math:`\frac{dz}{dx} = \frac{dz}{dy}\frac{dy}{dx}`:
-
-.. math::
-
- \frac{\partial \mathcal{L}}{\partial W} = \frac{\partial \mathcal{L}}{\partial o}\frac{\partial o}{\partial W}
-
-Note that in the RNN mode, :math:`\mathcal{L}` is not a direct function of :math:`W`. Thus its first order derivative
-cannot be computed unless we connect the :math:`\mathcal{L}` to :math:`o` first and then to :math:`W`, because both the
-first order derivatives of :math:`\frac{\partial \mathcal{L}}{\partial o}` and :math:`\frac{\partial o}{\partial W}` are
-defined by the model
-
-It is more often the case that we'd have many connected perceptrons populating the network, each attributed a different
-weight. Since this is the case for RNN, we can generalise multiple inputs and multiple outputs using the **Generalized
-Chain Rule**:
-
-Consider the case where :math:`x \in \mathbb{R}^m` and :math:`u \in \mathbb{R}^n`; an inner function, :math:`f`, maps
-:math:`m` inputs to :math:`n` outputs, while an outer function, :math:`g`, receives :math:`n` inputs to produce an
-output, :math:`h \in \mathbb{R}^k`. For :math:`i = 1, \dots, m` the generalized chain rule states:
-
-.. math::
-
- \frac{\partial h}{\partial x_i} = \frac{\partial h}{\partial u_1} \frac{\partial u_1}{\partial x_i} + \frac{\partial h}{\partial u_2} \frac{\partial u_2}{\partial x_i} + \dots + \frac{\partial h}{\partial u_n} \frac{\partial u_n}{\partial x_i} = \sum_{j = 1}^n \frac{\partial h}{\partial u_j} \frac{\partial u_j}{\partial x_i}
-
-Therefore, the error propagation of Gradient Descent in RNN is
-
-.. math::
-
- \color{green} \boxed{
- \begin{align}
- \frac{\partial \mathcal{L}}{\partial W_{yh}} = \sum_{t = 1}^\tau \sum_{i = 1}^n \frac{\partial \mathcal{L}}{\partial o_i^{(t)}} \frac{\partial o_i^{(t)}}{\partial W_{yh}} \\ \\
- \frac{\partial \mathcal{L}}{\partial W_{hh}} = \sum_{t = 1}^\tau \sum_{i = 1}^n \frac{\partial \mathcal{L}}{\partial h_i^{(t)}} \frac{\partial h_i^{(t)}}{\partial W_{hh}} \\ \\
- \frac{\partial \mathcal{L}}{\partial W_{xh}} = \sum_{t = 1}^\tau \sum_{i = 1}^n \frac{\partial \mathcal{L}}{\partial h_i^{(t)}} \frac{\partial h_i^{(t)}}{\partial W_{xh}}
- \end{align}
- }
-
-where :math:`n` is the length of a RNN sequence and :math:`t` is the index of timestep
-
-.. admonition:: :math:`\sum_{t = 1}^\tau`
-
- We assume the error is the sum of all errors of each timestep, which is why we include the :math:`\sum_{t = 1}^\tau`
- term
-
-Let's look at :math:`\frac{\partial \mathcal{L}}{W_{yh}}` first
-
-.. math::
-
- \frac{\partial \mathcal{L}}{W_{yh}} = \sum_{t = 1}^\tau \sum_{i = 1}^n \frac{\partial \mathcal{L}}{\partial o_i^{(t)}} \frac{\partial o_i^{(t)}}{\partial W_{yh}}
-
-Since :math:`o_i = \left( W_{yh}h_i + b_o \right)`,
-
-.. math::
-
- \frac{\partial o_i}{W_{yh}} = \frac{\partial }{W_{yh}}\left( W_{yh}h_i + b_o \right) = h_i
-
-For the :math:`\frac{\partial \mathcal{L}}{\partial o_i}` we shall recall from the earlier discussion on softmax
-derivative that we cannot simply have
-
-.. math::
-
- \frac{\partial \mathcal{L}}{\partial o_i} = -\frac{\partial}{\partial o_i}\sum_i^np(i)\log\sigma_i
-
-because we need to
-
-1. specify which component (output element) we're seeking to find the derivative of
-2. with respect to which input element the partial derivative is computed
-
-Therefore:
-
-.. math::
-
- \frac{\partial \mathcal{L}}{\partial o_i} = -\frac{\partial}{\partial o_i}\sum_j^np(j)\log\sigma_j = -\sum_j^n\frac{\partial}{\partial o_i}p(j)\log\sigma_j = -\sum_j^np(j)\frac{\partial \log\sigma_j}{\partial o_i}
-
-where :math:`n` is the number of timesteps (or the length of a sequence such as "hell")
-
-Applying the chain rule again:
-
-.. math::
-
- -\sum_j^np(j)\frac{\partial \log\sigma_j}{\partial o_i} = -\sum_j^np(j)\frac{1}{\sigma_j}\frac{\partial\sigma_j}{\partial o_i}
-
-Recall we have already derived that
-
-.. math::
-
- \frac{\partial \sigma_i}{\partial o_j} = \begin{cases}
- \sigma_i \left( 1 - \sigma_i \right), & \text{if}\ i = j \\
- -\sigma_i\sigma_j, & \text{otherwise}
- \end{cases}
-
-.. math::
-
- -\sum_j^np(j)\frac{1}{\sigma_j}\frac{\partial\sigma_j}{\partial o_i} = -\sum_{i = j}^np(j)\frac{1}{\sigma_j}\frac{\partial\sigma_j}{\partial o_i} -\sum_{i \ne j}^np(j)\frac{1}{\sigma_j}\frac{\partial\sigma_j}{\partial o_i} = -p(i)(1 - \sigma_i) + \sum_{i \ne j}^np(j)\sigma_i
-
-Observing that
-
-.. math::
-
- \sum_{j}^np(j) = 1
-
-.. math::
-
- -p(i)(1 - \sigma_i) + \sum_{i \ne j}^np(j)\sigma_i = -p(i) + p(i)\sigma_i + \sum_{i \ne j}^np(j)\sigma_i = \sigma_i - p(i)
-
-.. math::
-
- \color{green} \boxed{\frac{\partial \mathcal{L}}{\partial o_i} = \sigma_i - p(i)}
-
-.. math::
-
- \color{green} \boxed{ \frac{\partial \mathcal{L}}{\partial W_{yh}} = \sum_{t = 1}^\tau \sum_i^n\left[ \sigma_i - p(i) \right] h_i = \sum_{t = 1}^\tau \left( \boldsymbol{\sigma} - \boldsymbol{p} \right) \boldsymbol{h}^{(t)} }
-
-.. math::
-
- \frac{\partial \mathcal{L}}{b_o} = \sum_{t = 1}^\tau \sum_i^n\frac{\partial \mathcal{L}}{\partial o_i^{(t)}}\frac{\partial o_i^{(t)}}{\partial b_o^{(t)}} = \sum_{t = 1}^\tau \sum_i^n\left[ \sigma_i - p(i) \right] \times 1
-
-.. math::
-
- \color{green} \boxed{ \frac{\partial \mathcal{L}}{\partial b_o} = \sum_{t = 1}^\tau \sum_i^n\left[ \sigma_i - p(i) \right] = \sum_{t = 1}^\tau \boldsymbol{\sigma} - \boldsymbol{p} }
-
-We have at this point derived backpropagating rule for :math:`W_{yh}` and :math:`b_o`:
-
-1. :math:`W_{xh}`
-2. :math:`W_{hh}`
-3. ✅ :math:`W_{yh}`
-4. :math:`b_h`
-5. ✅ :math:`b_o`
-
-Now let's look at :math:`\frac{\partial \mathcal{L}}{\partial W_{hh}}`:
-
-Recall from *Deep Learning*, section 6.5.2, p. 207 that the vector notation of
-:math:`\frac{\partial z}{\partial x_i} = \sum_j \frac{\partial z}{\partial y_j}\frac{\partial y_j}{\partial x_i}` is
-
-.. math::
-
- \nabla_{\boldsymbol{x}}z = \left( \frac{\partial \boldsymbol{y}}{\partial \boldsymbol{x}} \right)^\intercal \nabla_{\boldsymbol{y}}z
-
-This gives us a start with:
-
-.. math::
-
- \begin{align}
- \frac{\partial \mathcal{L}}{\partial W_{hh}} &= \sum_{t = 1}^\tau \sum_{i = 1}^n \frac{\partial \mathcal{L}}{\partial h_i^{(t)}} \frac{\partial h_i^{(t)}}{\partial W_{hh}} \\
- & = \sum_{t = 1}^\tau \left( \frac{\partial \mathcal{L}}{\partial \boldsymbol{h}^{(t)}} \right)^\intercal \nabla_{\boldsymbol{W_{hh}}}\boldsymbol{h}^{(t)} \\
- & = \sum_{t = 1}^\tau \left( \frac{\partial \mathcal{L}}{\partial \boldsymbol{h}^{(t)}} \right)^\intercal \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{W_{hh}}} \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\boldsymbol{h}^{(t)} \\
- & = \sum_{t = 1}^\tau \left( \frac{\partial \mathcal{L}}{\partial \boldsymbol{h}^{(t)}} \right)^\intercal \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{W_{hh}}} \right)^\intercal \\
- & = \sum_{t = 1}^\tau \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{W_{hh}}} \right)^\intercal \frac{\partial \mathcal{L}}{\partial \boldsymbol{h}^{(t)}} \\
- & = \sum_{t = 1}^\tau \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{h}^{(t - 1)}} \right)^\intercal \left( \frac{\partial \boldsymbol{h}^{(t - 1)}}{\partial \boldsymbol{W_{hh}}} \right)^\intercal \frac{\partial \mathcal{L}}{\partial \boldsymbol{h}^{(t)}} \\
- & = \sum_{t = 1}^\tau \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{h}^{(t - 1)}} \right)^\intercal \left( \frac{\partial \boldsymbol{h}^{(t - 1)}}{\partial \boldsymbol{h}^{(t)}}\frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{h}^{(t)}}\frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{W_{hh}}} \right)^\intercal \frac{\partial \mathcal{L}}{\partial \boldsymbol{h}^{(t)}} \\
- & = \sum_{t = 1}^\tau \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{h}^{(t - 1)}} \right)^\intercal \left( \frac{\partial \boldsymbol{h}^{(t - 1)}}{\partial \boldsymbol{h}^{(t)}}\frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{W_{hh}}}\frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{h}^{(t)}} \right)^\intercal \frac{\partial \mathcal{L}}{\partial \boldsymbol{h}^{(t)}} \\
- & = \sum_{t = 1}^\tau \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{h}^{(t - 1)}} \right)^\intercal \left( \frac{\partial \boldsymbol{h}^{(t - 1)}}{\partial \boldsymbol{h}^{(t)}} \right)^\intercal \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{W_{hh}}} \right)^\intercal \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{h}^{(t)}} \right)^\intercal \frac{\partial \mathcal{L}}{\partial \boldsymbol{h}^{(t)}} \\
- & = \sum_{t = 1}^\tau \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{W_{hh}}} \right)^\intercal \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{h}^{(t)}} \right)^\intercal \frac{\partial \mathcal{L}}{\partial \boldsymbol{h}^{(t)}} \\
- & = \sum_{t = 1}^\tau diag\left[ 1 - \left(\boldsymbol{h}^{(t)}\right)^2 \right] \boldsymbol{h}^{(t - 1)} \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \\
- & = \sum_{t = 1}^\tau diag\left[ 1 - \left(\boldsymbol{h}^{(t)}\right)^2 \right] \left( \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \right) {\boldsymbol{h}^{(t - 1)}}^\intercal
- \end{align}
-
-.. math::
-
- \color{green} \boxed{ \frac{\partial \mathcal{L}}{\partial W_{hh}} = \sum_{t = 1}^\tau diag\left[ 1 - \left(\boldsymbol{h}^{(t)}\right)^2 \right] \left( \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \right) {\boldsymbol{h}^{(t - 1)}}^\intercal }
-
-The equation above leaves us with a term :math:`\nabla_{\boldsymbol{h}^{(t)}}\mathcal{L}`, which we calculate next. Note
-that the back propagation on :math:`\boldsymbol{h}^{(t)}` has source from both :math:`\boldsymbol{o}^{(t)}` and
-:math:`\boldsymbol{h}^{(t + 1)}`. It's gradient, therefore, is given by
-
-.. math::
-
- \begin{align}
- \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} &= \left( \frac{\partial \boldsymbol{o}^{(t)}}{\partial \boldsymbol{h}^{(t)}} \right)^\intercal \nabla_{\boldsymbol{o}^{(t)}}\mathcal{L} + \left( \frac{\partial \boldsymbol{h}^{(t + 1)}}{\partial \boldsymbol{h}^{(t)}} \right)^\intercal \nabla_{\boldsymbol{h}^{(t + 1)}}\mathcal{L} \\
- &= \left( \boldsymbol{W_{yh}} \right)^\intercal \nabla_{\boldsymbol{o}^{(t)}}\mathcal{L} + \left( diag\left[ 1 - (\boldsymbol{h}^{(t + 1)})^2 \right] \boldsymbol{W_{hh}} \right)^\intercal \nabla_{\boldsymbol{h}^{(t + 1)}}\mathcal{L} \\
- &= \left( \boldsymbol{W_{yh}} \right)^\intercal \nabla_{\boldsymbol{o}^{(t)}}\mathcal{L}+ \boldsymbol{W_{hh}}^\intercal \nabla_{\boldsymbol{h}^{(t + 1)}}\mathcal{L} \left( diag\left[ 1 - (\boldsymbol{h}^{(t + 1)})^2 \right] \right)
- \end{align}
-
-.. math::
-
- \color{green} \boxed{ \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} = \left( \boldsymbol{W_{yh}} \right)^\intercal \nabla_{\boldsymbol{o}^{(t)}}\mathcal{L} + \boldsymbol{W_{hh}}^\intercal \nabla_{\boldsymbol{h}^{(t + 1)}}\mathcal{L} \left( diag\left[ 1 - (\boldsymbol{h}^{(t + 1)})^2 \right] \right) }
-
-Note that the 2nd term
-:math:`\boldsymbol{W_{xh}}^\intercal \nabla_{\boldsymbol{h}^{(t + 1)}}\mathcal{L} \left( diag\left[ 1 - (\boldsymbol{h}^{(t + 1)})^2 \right] \right)`
-is zero at first iteration propagating back because for the last-layer (unrolled) of RNN , there's no gradient update
-flow from the next hidden state.
-
-So far we have derived backpropagating rule for :math:`W_{hh}`
-
-1. :math:`W_{xh}`
-2. ✅ :math:`W_{hh}`
-3. ✅ :math:`W_{yh}`
-4. :math:`b_h`
-5. ✅ :math:`b_o`
-
-Let's tackle the remaining :math:`\frac{\partial \mathcal{L}}{\partial W_{xh}}` and :math:`b_h`:
-
-.. math::
-
- \begin{align}
- \frac{\partial \mathcal{L}}{\partial W_{xh}} &= \sum_{t = 1}^\tau \sum_{i = 1}^n \frac{\partial \mathcal{L}}{\partial h_i^{(t)}} \frac{\partial h_i^{(t)}}{\partial W_{xh}} \\
- &= \sum_{t = 1}^\tau \left( \frac{\partial \boldsymbol{h}^{(t)}}{\partial \boldsymbol{W_{xh}}} \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \\
- &= \sum_{t = 1}^\tau \left( diag\left[ 1 - (\boldsymbol{h}^{(t)})^2 \right] \boldsymbol{x}^{(t)} \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \\
- &= \sum_{t = 1}^\tau \left( diag\left[ 1 - (\boldsymbol{h}^{(t)})^2 \right] \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \left( \boldsymbol{x}^{(t)} \right)
- \end{align}
-
-.. math::
-
- \color{green} \boxed{ \frac{\partial \mathcal{L}}{\partial W_{xh}} = \sum_{t = 1}^\tau \left( diag\left[ 1 - (\boldsymbol{h}^{(t)})^2 \right] \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \left( \boldsymbol{x}^{(t)} \right) }
-
-.. math::
-
- \begin{align}
- \frac{\partial \mathcal{L}}{\partial b_h} &= \sum_{t = 1}^\tau \sum_{i = 1}^n \frac{\partial \mathcal{L}}{\partial h_i^{(t)}} \frac{\partial h_i^{(t)}}{\partial b_h^{(t)}} \\
- &= \sum_{t = 1}^\tau \left( \frac{\partial h_i^{(t)}}{\partial b_h^{(t)}} \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \\
- &= \sum_{t = 1}^\tau \left( diag\left[ 1 - (\boldsymbol{h}^{(t)})^2 \right] \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L}
- \end{align}
-
-.. math::
-
- \color{green} \boxed{ \frac{\partial \mathcal{L}}{\partial b_h} = \sum_{t = 1}^\tau \left( diag\left[ 1 - (\boldsymbol{h}^{(t)})^2 \right] \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} }
-
-This concludes our propagation rules for training RNN:
-
-.. math::
-
- \color{green} \boxed{
- \begin{gather*}
- \frac{\partial \mathcal{L}}{\partial W_{xh}} = \sum_{t = 1}^\tau \left( diag\left[ 1 - (\boldsymbol{h}^{(t)})^2 \right] \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \left( \boldsymbol{x}^{(t)} \right) \\ \\
- \frac{\partial \mathcal{L}}{\partial W_{hh}} = \sum_{t = 1}^\tau diag\left[ 1 - \left(\boldsymbol{h}^{(t)}\right)^2 \right] \left( \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \right) {\boldsymbol{h}^{(t - 1)}}^\intercal \\ \\
- \frac{\partial \mathcal{L}}{\partial W_{yh}} = \sum_{t = 1}^\tau \left( \boldsymbol{\sigma} - \boldsymbol{p} \right) \boldsymbol{h}^{(t)} \\ \\
- \frac{\partial \mathcal{L}}{\partial b_h} = \sum_{t = 1}^\tau \left( diag\left[ 1 - (\boldsymbol{h}^{(t)})^2 \right] \right)^\intercal \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} \\ \\
- \frac{\partial \mathcal{L}}{\partial b_o} =\sum_{t = 1}^\tau \boldsymbol{\sigma} - \boldsymbol{p}
- \end{gather*}
- }
-
-where
-
-.. math::
-
- \color{green} \boxed{ \nabla_{\boldsymbol{h}^{(t)}}\mathcal{L} = \left( \boldsymbol{W_{yh}} \right)^\intercal \nabla_{\boldsymbol{o}^{(t)}}\mathcal{L}+ \boldsymbol{W_{hh}}^\intercal \nabla_{\boldsymbol{h}^{(t + 1)}}\mathcal{L} \left( diag\left[ 1 - (\boldsymbol{h}^{(t + 1)})^2 \right] \right) }
-
-Computational Gradient Descent Weight Update Rule
--------------------------------------------------
-
-What does the propagation rules above look like in Python?
-
-Example
--------
-
-`Pride and Prejudice by Jane Austen `_
-
-
-.. code-block:: python
-
-
-
-
-
-
-
-
-
-.. _`exploding gradient`: https://qubitpi.github.io/stanford-cs231n.github.io/rnn/#vanilla-rnn-gradient-flow--vanishing-gradient-problem
-
-.. _`MACHINE LEARNING by Mitchell, Thom M. (1997)`: https://a.co/d/bjmsEOg
-
-.. _`loss function`: https://qubitpi.github.io/stanford-cs231n.github.io/neural-networks-2/#losses
-.. _`LSTM Formulation`: https://qubitpi.github.io/stanford-cs231n.github.io/rnn/#lstm-formulation
-
-.. _`Vanilla RNN Gradient Flow & Vanishing Gradient Problem`: https://qubitpi.github.io/stanford-cs231n.github.io/rnn/#vanilla-rnn-gradient-flow--vanishing-gradient-problem
diff --git a/docs/source/speech/sampling.rst b/docs/source/speech/sampling.rst
deleted file mode 100644
index 1ed648e16ab143e80b39a45e793ca0818c2ecf39..0000000000000000000000000000000000000000
--- a/docs/source/speech/sampling.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-===============================
-Speech Recognition with Lamassu
-===============================
-
-.. contents:: Table of Contents
- :depth: 2
-
-Speech recognition will become a primary way that we interact with computers.
-
-One might guess that we could simply feed sound recordings into a neural network and train it to produce text:
-
-.. figure:: ../img/speech-processing.png
- :align: center
-
-That's the holy grail of speech recognition with deep learning, but we aren't quite there yet. The big problem is that
-speech varies in speed. One person might say "hello!" very quickly and another person might say
-"heeeelllllllllllllooooo!" very slowly, producing a much longer sound file with much more data. Both sounds should be
-recognized as exactly the same text - "hello!" Automatically aligning audio files of various lengths to a fixed-length
-piece of text turns out to be pretty hard. To work around this, we have to use some special tricks and extra precessing.
-
-Turning Sounds into Bits
-========================
-
-The first step in speech recognition is obvious — we need to feed sound waves into a computer. Sound is transmitted as
-waves. A sound clip of someone saying "Hello" looks like
-
-.. figure:: ../img/hello-sound.png
- :align: center
-
-Sound waves are one-dimensional. At every moment in time, they have a single value based on the height of the wave.
-Let's zoom in on one tiny part of the sound wave and take a look:
-
-.. figure:: ../img/sound-wave.png
- :align: center
-
-To turn this sound wave into numbers, we just record of the height of the wave at equally-spaced points:
-
-.. figure:: ../img/sampling-sound-wave.gif
- :align: center
-
-This is called *sampling*. We are taking a reading thousands of times a second and recording a number representing the
-height of the sound wave at that point in time. That's basically all an uncompressed .wav audio file is.
-
-"CD Quality" audio is sampled at 44.1khz (44,100 readings per second). But for speech recognition, a sampling rate of
-16khz (16,000 samples per second) is enough to cover the frequency range of human speech.
-
-Lets sample our "Hello" sound wave 16,000 times per second. Here's the first 100 samples:
-
-.. figure:: ../img/hello-sampling.png
- :align: center
-
-.. note:: Can digital samples perfectly recreate the original analog sound wave? What about those gaps?
-
- You might be thinking that sampling is only creating a rough approximation of the original sound wave because it's
- only taking occasional readings. There's gaps in between our readings so we must be losing data, right?
-
- .. figure:: ../img/real-vs-sampling.png
- :align: center
-
- But thanks to the `Nyquist theorem`_, we know that we can use math to perfectly reconstruct the original sound wave
- from the spaced-out samples — as long as we sample at least twice as fast as the highest frequency we want to record.
-
-.. automodule:: lamassu.speech.sampling
- :members:
- :undoc-members:
- :show-inheritance:
-
-.. _`Nyquist theorem`: https://en.wikipedia.org/wiki/Nyquist%E2%80%93Shannon_sampling_theorem
diff --git a/lamassu-logo.png b/lamassu-logo.png
deleted file mode 100644
index 90f51b1f24c9ac2ff9f95276e1c1fbd2239b6a27..0000000000000000000000000000000000000000
Binary files a/lamassu-logo.png and /dev/null differ
diff --git a/lamassu/rnn/__init__.py b/lamassu/rnn/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/lamassu/rnn/example.py b/lamassu/rnn/example.py
deleted file mode 100644
index a3d4cbfa1e582d51667d116373e7971badc5dd15..0000000000000000000000000000000000000000
--- a/lamassu/rnn/example.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import numpy as np
-
-from lamassu.rnn.rnn import Config
-from lamassu.rnn.rnn import RecurrentNeuralNetwork
-
-if __name__ == "__main__":
- num_hidden_perceptrons= 100
- seq_length = 25
- learning_rate = 1e-1
-
-
- data = open('pride-and-prejudice.txt', 'r').read()
- char_set = list(set(data))
- num_chars, num_unique_chars = len(data), len(char_set)
- char_to_idx = { ch:i for i,ch in enumerate(char_set) }
- idx_to_char = { i:ch for i,ch in enumerate(char_set) }
-
- rnn = RecurrentNeuralNetwork(
- Config(
- num_hidden_perceptrons=num_hidden_perceptrons,
- input_size=num_unique_chars,
- learning_rate=learning_rate
- )
- )
-
- num_iter, pointer = 0, 0
-
-
- while True:
- if pointer + seq_length + 1 >= len(data) or num_iter == 0:
- prev_history = np.zeros((num_hidden_perceptrons, 1))
- pointer = 0
- input = [char_to_idx[c] for c in data[pointer: pointer + seq_length]]
- target = [char_to_idx[c] for c in data[pointer + 1: pointer + seq_length + 1]]
-
- if num_iter % 100 == 0: # inference after every 100 trainings
- inferenced_idxes = rnn.inference(prev_history, input[0])
- inferenced = ''.join(idx_to_char[idx] for idx in inferenced_idxes)
- print("============ inference ============")
- print(inferenced)
-
- history, q, x, loss = rnn.forward_pass(input, target, prev_history)
-
- if num_iter % 100 == 0:
- print("loss: {}".format(loss))
-
- prev_history = rnn.back_propagation(input, target, history, q, x)
-
- pointer += seq_length
- num_iter += 1
\ No newline at end of file
diff --git a/lamassu/rnn/rnn.py b/lamassu/rnn/rnn.py
deleted file mode 100644
index d3430152b57feef1f32910fd9740d269eb2fd1db..0000000000000000000000000000000000000000
--- a/lamassu/rnn/rnn.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import numpy as np
-from math import exp
-from dataclasses import dataclass
-
-
-np.random.seed(0)
-
-@dataclass
-class Config():
- num_hidden_perceptrons: int
- input_size: int
- learning_rate: float
-
-
-class RecurrentNeuralNetwork(object):
- """
- Architecture is single-hidden-layer
- """
-
- def __init__(self, config: Config):
- self.config = config
-
- self.W_xh = np.random.randn(config.num_hidden_perceptrons, config.input_size)
- self.W_hh = np.random.randn(config.num_hidden_perceptrons, config.num_hidden_perceptrons)
- self.W_yh = np.random.randn(config.input_size, config.num_hidden_perceptrons)
-
- self.b_h = np.zeros((config.num_hidden_perceptrons, 1))
- self.b_o = np.zeros((config.input_size, 1))
-
- def forward_pass(self, input, target, prev_history):
- """
-
- :param input: The input vector; each element is an index
- :return:
- """
-
- history, x, o, q, loss = {}, {}, {}, {}, 0
- history[-1] = np.copy(prev_history)
-
- for t in range(len(input)):
- x[t] = np.zeros((self.config.input_size, 1))
- x[t][input[t]] = 1
-
- if t == 0:
- np.dot(self.W_hh, history[t - 1])
- np.dot(self.W_xh, x[t])
-
- history[t] = np.tanh(
- np.dot(self.W_hh, history[t - 1]) + np.dot(self.W_xh, x[t]) + self.b_h
- )
- o[t] = np.dot(self.W_yh, history[t]) + self.b_o
- q[t] = np.exp(o[t]) / np.sum(np.exp(o[t]))
- loss += -np.log(q[t][target, 0])
-
- return history, q, x, loss
-
- def back_propagation(self, input, target, history, q, x):
- gradient_loss_over_W_xh = np.zeros_like(self.W_xh)
- gradient_loss_over_W_hh = np.zeros_like(self.W_hh)
- gradient_loss_over_W_yh = np.zeros_like(self.W_yh)
-
- gradient_loss_over_b_h = np.zeros_like(self.b_h)
- gradient_loss_over_b_y = np.zeros_like(self.b_o)
-
- gradient_loss_over_next_h = np.zeros_like(history[0])
-
- for t in reversed(range(len(input))):
- gradient_loss_over_o = np.copy(q[t])
- gradient_loss_over_o[target[t]] -= 1
-
- gradient_loss_over_W_yh += np.dot(gradient_loss_over_o, history[t].T)
- gradient_loss_over_b_y += gradient_loss_over_o #
-
- gradient_loss_over_h = np.dot(self.W_yh.T, gradient_loss_over_o) + gradient_loss_over_next_h
- diag_times_gradient_loss_over_h = (1 - history[t] * history[t]) * gradient_loss_over_h
-
- gradient_loss_over_b_h += diag_times_gradient_loss_over_h #
-
- gradient_loss_over_W_xh += np.dot(diag_times_gradient_loss_over_h, x[t].T) #
- gradient_loss_over_W_hh += np.dot(diag_times_gradient_loss_over_h, history[t - 1].T) #
-
- gradient_loss_over_next_h = np.dot(self.W_hh.T, diag_times_gradient_loss_over_h)
-
- for gradient in [gradient_loss_over_W_xh, gradient_loss_over_W_hh, gradient_loss_over_W_yh, gradient_loss_over_b_h, gradient_loss_over_b_y]:
- np.clip(gradient, -5, 5, out=gradient) # avoid exploding gradients
-
- # update weights
- for param, gradient in zip(
- [self.W_xh, self.W_hh, self.W_yh, self.b_h, self.b_o],
- [gradient_loss_over_W_xh, gradient_loss_over_W_hh, gradient_loss_over_W_yh, gradient_loss_over_b_h, gradient_loss_over_b_y]):
- param += -self.config.learning_rate * gradient
-
- return history[len(input) - 1]
-
- def inference(self, history, seed_idx):
- x = np.zeros((self.config.input_size, 1))
- x[seed_idx] = 1
- idxes = []
-
- for timestep in range(200):
- history = np.tanh(np.dot(self.W_xh, x) + np.dot(self.W_hh, history) + self.b_h)
- o = np.dot(self.W_yh, history) + self.b_o
- p = np.exp(o) / np.sum(np.exp(o))
-
- next_idx = self._inference_single(p.ravel())
-
- x[next_idx] = 1
- idxes.append(next_idx)
-
- return idxes
-
-
- def _inference_single(self, probability_distribution):
- return np.random.choice(range(self.config.input_size), p=probability_distribution)
\ No newline at end of file
diff --git a/lamassu/speech/__init__.py b/lamassu/speech/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/lamassu/speech/sampling.py b/lamassu/speech/sampling.py
deleted file mode 100644
index 363f1723398a2942405e3fceabacc26c0079c236..0000000000000000000000000000000000000000
--- a/lamassu/speech/sampling.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import wave
-
-import numpy as np
-
-
-def sample_wav(file_path: str):
- """
- Sampling a .wav file
-
- :param file_path: The absolute path to the .wav file to be sampled
-
- :return: an array of sampled points
- """
- with wave.open(file_path, "rb") as f:
- frames = f.readframes(f.getnframes())
- return np.frombuffer(frames, dtype=np.int16)
diff --git a/mlflow/HanLPner.py b/mlflow/HanLPner.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c5eb022d58d75fa0d1293eb1a6d49d4738aff5f
--- /dev/null
+++ b/mlflow/HanLPner.py
@@ -0,0 +1,57 @@
+import hanlp
+import mlflow.pyfunc
+import pandas
+from parser import convert_to_knowledge_graph_spec
+
+
+class HanLPner(mlflow.pyfunc.PythonModel):
+
+ def __init__(self):
+ self.HanLP = None
+
+ def load_context(self, context):
+ HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH)
+ self.HanLP = HanLP
+
+ def predict(self, context, model_input):
+ texts = []
+ for _, row in model_input.iterrows():
+ texts.append(row["text"])
+
+ return pandas.Series(convert_to_knowledge_graph_spec(self.HanLP(texts)["srl"]))
+
+if __name__ == '__main__':
+ conda_env = {
+ 'channels': ['defaults'],
+ 'dependencies': [
+ 'python=3.10.7',
+ 'pip',
+ {
+ 'pip': [
+ 'mlflow',
+ 'mlflow-skinny',
+ 'mlflow[extras]',
+ 'pandas=={}'.format(pandas.__version__),
+ 'hanlp[amr, fasttext, full, tf]'
+ ],
+ },
+ ],
+ 'name': 'HanLPner'
+ }
+
+ # Save the MLflow Model
+ mlflow_pyfunc_model_path = "models/HanLPner"
+ mlflow.pyfunc.save_model(path=mlflow_pyfunc_model_path, python_model=HanLPner(), conda_env=conda_env)
+
+ loaded_model = mlflow.pyfunc.load_model(mlflow_pyfunc_model_path)
+
+ test_data = pandas.DataFrame(
+ {
+ "text": [
+ "我爱中国"
+ ]
+ }
+ )
+
+ test_predictions = loaded_model.predict(test_data)
+ print(test_predictions.to_markdown())
\ No newline at end of file
diff --git a/lamassu/__init__.py b/mlflow/__init__.py
similarity index 100%
rename from lamassu/__init__.py
rename to mlflow/__init__.py
diff --git a/mlflow/parser.py b/mlflow/parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8870062295b3703f7b30d4cf4eaaa2310244992
--- /dev/null
+++ b/mlflow/parser.py
@@ -0,0 +1,84 @@
+import random
+import string
+
+
+def _random_id():
+ return "n" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5)).lower()
+
+
+def _construct_knowledge_graph_spec_node(extrapolated_entity: str):
+ return {
+ "id": _random_id(),
+ "fields": {
+ "name": extrapolated_entity,
+ "type": "entity"
+ }
+ }
+
+
+def _construct_knowledge_graph_spec_link(source: str, target: str, extrapolated_relationship: str):
+ return {
+ "id": _random_id(),
+ "source": source,
+ "target": target,
+ "fields": {
+ "type": extrapolated_relationship
+ }
+ }
+
+
+def convert_to_knowledge_graph_spec(model_results):
+ nodes = []
+ links = []
+
+ node_name_to_id_map = {}
+ link_set = set()
+ for srl_results in model_results:
+ for srl_result in srl_results:
+ subject = None
+ verb = None
+ object = None
+
+ for tuple in srl_result:
+ if tuple[1] == "ARG0":
+ subject = tuple
+ if tuple[1] == "PRED":
+ verb = tuple
+ if tuple[1] == "ARG1":
+ object = tuple
+
+ if subject and verb and object:
+ source_node = _construct_knowledge_graph_spec_node(subject[0])
+ target_node = _construct_knowledge_graph_spec_node(object[0])
+
+ source_node_id = source_node["id"]
+ source_node_name = source_node["fields"]["name"]
+ target_node_id = target_node["id"]
+ target_node_name = target_node["fields"]["name"]
+
+ if source_node_name not in node_name_to_id_map.keys():
+ node_name_to_id_map[source_node_name] = source_node_id
+ nodes.append(source_node)
+ if target_node_name not in node_name_to_id_map.keys():
+ node_name_to_id_map[target_node_name] = target_node_id
+ nodes.append(target_node)
+
+ link: str = source_node_name + target_node_name + verb[0]
+ if link not in link_set:
+ links.append(
+ _construct_knowledge_graph_spec_link(
+ node_name_to_id_map[source_node_name],
+ node_name_to_id_map[target_node_name],
+ verb[0]
+ )
+ )
+ link_set.add(link)
+
+ subject = None
+ verb = None
+ object = None
+
+ return {
+ "nodes": nodes,
+ "links": links
+ }
diff --git a/mlflow/requirements.txt b/mlflow/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..796df352357ebed7eecce3743c7a77a1a7e938e1
--- /dev/null
+++ b/mlflow/requirements.txt
@@ -0,0 +1,4 @@
+mlflow
+mlflow-skinny
+mlflow[extras]
+hanlp[amr,fasttext,full,tf]
diff --git a/mlflow/test_parser.py b/mlflow/test_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bbfb0c54400c7221d5c8405d5736f9a498821aa
--- /dev/null
+++ b/mlflow/test_parser.py
@@ -0,0 +1,25 @@
+import unittest
+
+from mlflow.parser import convert_to_knowledge_graph_spec
+
+
+class TestParser(unittest.TestCase):
+
+ def test_parser(self):
+ model_results: list = [
+ [
+ [['我', 'ARG0', 0, 1], ['爱', 'PRED', 1, 2], ['中国', 'ARG1', 2, 3]]
+ ]
+ ]
+
+ expected_nodes = [
+ '我',
+ '中国'
+ ]
+
+ expected_links = ['爱']
+
+ assert [node["fields"]["name"] for node in
+ convert_to_knowledge_graph_spec(model_results)["nodes"]] == expected_nodes
+ assert [node["fields"]["type"] for node in
+ convert_to_knowledge_graph_spec(model_results)["links"]] == expected_links
diff --git a/requirements.txt b/requirements.txt
index 24ce15ab7ead32f98c7ac3edcd34bb2010ff4326..0bc37093e7beaa48eed3ef0ab0cc55c0c52d2140 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-numpy
+hanlp[amr,fasttext,full,tf]
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 6907ad7a0abe2f1100042a5f73e08f0e69f7e5a2..0000000000000000000000000000000000000000
--- a/setup.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from setuptools import setup, find_packages
-
-setup(
- name="lamassu",
- version="0.0.13",
- description="Empowering individual to agnostically run machine learning algorithms to produce ad-hoc AI features",
- url="https://github.com/QubitPi/lamassu",
- author="Jiaqi liu",
- author_email="jack20220723@gmail.com",
- license="Apache-2.0",
- packages=find_packages(),
- python_requires='>=3.10',
- install_requires=[
-
- ],
- zip_safe=False,
- include_package_data=True
-)