FredZhang7 commited on
Commit
b38c8eb
1 Parent(s): 7202616

add custom tiny tfjs module for node.js v18+

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tiny-tensorflow/bert-tiny-multilingual/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
37
+ tiny-tensorflow/toxicity-tfjs/tfnapi-v8/tensorflow.dll filter=lfs diff=lfs merge=lfs -text
tiny-tensorflow/bert-tiny-multilingual/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84cb4153c11646eb331f7707570b8b3abe1f57ae5919c0bec80990ac6c4e8961
3
+ size 14166066
tiny-tensorflow/bert-tiny-multilingual/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcddc18a03c9cf78e873c24440335a972bdb6a266b7fe8e5edceb87fb95809e4
3
+ size 755860212
tiny-tensorflow/bert-tiny-multilingual/variables/variables.index ADDED
Binary file (13.8 kB). View file
 
tiny-tensorflow/predict.js ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ async function run() {
2
+ const { vitchen_toxicity_encode } = require('./bert-tiny-multilingual/bert-tokenizer');
3
+ const {loadSavedModel, tensor} = require('./bert-tiny-multilingual/node');
4
+ const model = await loadSavedModel('./toxicity9');
5
+ const input = vitchen_toxicity_encode(`What's up?`);
6
+ let t = tensor(input, [192], 'int32');
7
+ const prediction = model.predict({
8
+ input_ids: t
9
+ })['output_0'];
10
+ console.log(prediction.dataSync()[0])
11
+ }
12
+ run()
tiny-tensorflow/toxicity-tfjs/api_pb.js ADDED
The diff for this file is too large to render. See raw diff
 
tiny-tensorflow/toxicity-tfjs/bert-tokenizer.js ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const symbols = new Set(['!', '`', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '~' , '_', '+', '=', '[', ']', '{', '}', '\\', '|', ':', ';', '\'', '"', '<', '>', ',', '.', '?', '/']);
2
+ const toxicityTokens = require('./vitchen-multilingual-uncased.json');
3
+
4
+ /**
5
+ * @param {string} text The content of a Discord message
6
+ */
7
+ function vitchen_toxicity_encode(text) {
8
+ let unclean_words = text.replace('`', '').replace('\n', ' ').toLowerCase().split(' '), ids = [101]; // [CLS]
9
+ for (let i = 0; i < unclean_words.length && ids.length < 192; i++) {
10
+ let cur = unclean_words[i];
11
+ if (cur === '') continue;
12
+ let curToken = getToken(cur);
13
+ if (curToken) ids.push(curToken);
14
+ else {
15
+ let splitBySymbols = [], preSymbol = ''
16
+ for (let j = 0; j < cur.length; j++) {
17
+ if (symbols.has(cur[j])) {
18
+ if (preSymbol !== '') splitBySymbols.push(preSymbol);
19
+ splitBySymbols.push(cur[j]);
20
+ preSymbol = '';
21
+ } else {
22
+ preSymbol += cur[j]
23
+ }
24
+ }
25
+ splitBySymbols.push(preSymbol);
26
+ for (let word of splitBySymbols) {
27
+ let k = word.length, hangman = '', checkpoint = 0;
28
+ while (k !== checkpoint && hangman !== word) {
29
+ for (k; k >= checkpoint + 1; k--) {
30
+ let checkpointJourney = word.substring(checkpoint, k), token = getToken(hangman.length === 0 ? checkpointJourney : '##' + checkpointJourney);
31
+ if (token) {
32
+ ids.push(token);
33
+ hangman += checkpointJourney;
34
+ checkpoint = k;
35
+ k = word.length;
36
+ break;
37
+ }
38
+ }
39
+ }
40
+ if (k === checkpoint && hangman !== word) ids.push(100) // [UNK]
41
+ let lastcheckpointToken = getToken(word.substring(checkpoint));
42
+ if (lastcheckpointToken) ids.push(lastcheckpointToken);
43
+ }
44
+ }
45
+ }
46
+ if (ids.length < 192) {
47
+ ids.push(102);
48
+ while (ids.length < 192) {
49
+ ids.push(0);
50
+ }
51
+ } else if (ids.length === 192) {
52
+ ids[191] = 102 // [SEP]
53
+ } else {
54
+ ids = ids.slice(0, 192);
55
+ ids[191] = 102 // [SEP]
56
+ }
57
+ return ids;
58
+ }
59
+
60
+ /**
61
+ * @param {string} string
62
+ * @returns {number|undefined}
63
+ */
64
+ function getToken(string) {
65
+ if (string === '') return undefined;
66
+ return toxicityTokens[string];
67
+ }
68
+
69
+ module.exports.vitchen_toxicity_encode = vitchen_toxicity_encode;
tiny-tensorflow/toxicity-tfjs/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "@tensorflow/tfjs-node",
3
+ "version": "3.19.0",
4
+ "main": "dist/index.js",
5
+ "types": "dist/index.d.ts",
6
+ "gypfile": true,
7
+ "license": "Apache-2.0",
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "https://github.com/tensorflow/tfjs.git",
11
+ "directory": "tfjs-node"
12
+ },
13
+ "engines": {
14
+ "node": ">=8.11.0"
15
+ },
16
+ "scripts": {
17
+ "build": "tsc && npx mkdirp dist/proto && cp src/proto/api_pb.js dist/proto/api_pb.js",
18
+ "build-ci": "tsc && npx mkdirp dist/proto && cp src/proto/api_pb.js dist/proto/api_pb.js",
19
+ "build-link-package": "cd ../link-package && yarn build-deps-for tfjs-node tfjs",
20
+ "build-union": "cd ../tfjs && yarn && yarn build",
21
+ "build-union-ci": "cd ../tfjs && yarn && yarn build-ci",
22
+ "build-deps": "yarn build-link-package && yarn build-union",
23
+ "build-deps-ci": "yarn build-link-package && yarn build-union-ci",
24
+ "build-npm": "./scripts/build-npm.sh",
25
+ "build-addon": "./scripts/build-and-upload-addon.sh",
26
+ "build-addon-from-source": "node-pre-gyp install --build-from-source",
27
+ "clean-deps": "rm -rf deps && rm -rf lib",
28
+ "coverage": "nyc yarn ts-node -P tsconfig.test.json src/run_tests.ts",
29
+ "enable-gpu": "node scripts/install.js gpu download && yarn && yarn build-addon-from-source",
30
+ "ensure-cpu-gpu-packages-align": "node scripts/ensure-cpu-gpu-packages-align.js",
31
+ "format": "clang-format -i -style=Google binding/*.cc binding/*.h",
32
+ "install": "node scripts/install.js",
33
+ "install-from-source": "yarn clean-deps && yarn && yarn build-addon-from-source",
34
+ "link-local": "yalc link",
35
+ "lint": "tslint -p . -t verbose",
36
+ "prep": "cd node_modules/@tensorflow/tfjs-core && yarn && yarn build",
37
+ "publish-local": "yarn prep && yalc push",
38
+ "publish-npm": "npm publish",
39
+ "test": "yarn && yarn build-deps && yarn build && ts-node --transpile-only --skip-ignore -P tsconfig.test.json src/run_tests.ts",
40
+ "test-dev": "tsc && ts-node --transpile-only --skip-ignore -P tsconfig.test.json src/run_tests.ts",
41
+ "test-ci": "ts-node --transpile-only --skip-ignore -P tsconfig.test.json src/run_tests.ts",
42
+ "upload-windows-addon": "./scripts/build-and-upload-windows-addon.bat"
43
+ },
44
+ "dependencies": {
45
+ "@mapbox/node-pre-gyp": "1.0.9",
46
+ "adm-zip": "^0.5.2",
47
+ "google-protobuf": "^3.9.2",
48
+ "https-proxy-agent": "^2.2.1",
49
+ "progress": "^2.0.0",
50
+ "rimraf": "^2.6.2",
51
+ "path": "^0.12.7",
52
+ "tar": "^4.4.6",
53
+ "util": "^0.12.4"
54
+ },
55
+ "binary": {
56
+ "module_name": "tfjs_binding",
57
+ "module_path": "./tfnapi-v{napi_build_version}",
58
+ "host": "https://storage.googleapis.com/tf-builds/pre-built-binary",
59
+ "remote_path": "./tfnapi-v{napi_build_version}/{version}/",
60
+ "napi_versions": [
61
+ 3,
62
+ 4,
63
+ 5,
64
+ 6,
65
+ 7,
66
+ 8
67
+ ]
68
+ }
69
+ }
70
+
tiny-tensorflow/toxicity-tfjs/node.js ADDED
The diff for this file is too large to render. See raw diff
 
tiny-tensorflow/toxicity-tfjs/tfnapi-v8/tensorflow.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba650c0d99b0275086789d6ec8de2c200cca8e29d800ef7cefd7f318aeb1e9e5
3
+ size 200814080
tiny-tensorflow/toxicity-tfjs/tfnapi-v8/tfjs_binding.node ADDED
Binary file (170 kB). View file
 
tiny-tensorflow/toxicity-tfjs/vitchen-multilingual-uncased.json ADDED
The diff for this file is too large to render. See raw diff