FredZhang7
commited on
Commit
•
b38c8eb
1
Parent(s):
7202616
add custom tiny tfjs module for node.js v18+
Browse files- .gitattributes +2 -0
- tiny-tensorflow/bert-tiny-multilingual/saved_model.pb +3 -0
- tiny-tensorflow/bert-tiny-multilingual/variables/variables.data-00000-of-00001 +3 -0
- tiny-tensorflow/bert-tiny-multilingual/variables/variables.index +0 -0
- tiny-tensorflow/predict.js +12 -0
- tiny-tensorflow/toxicity-tfjs/api_pb.js +0 -0
- tiny-tensorflow/toxicity-tfjs/bert-tokenizer.js +69 -0
- tiny-tensorflow/toxicity-tfjs/config.json +70 -0
- tiny-tensorflow/toxicity-tfjs/node.js +0 -0
- tiny-tensorflow/toxicity-tfjs/tfnapi-v8/tensorflow.dll +3 -0
- tiny-tensorflow/toxicity-tfjs/tfnapi-v8/tfjs_binding.node +0 -0
- tiny-tensorflow/toxicity-tfjs/vitchen-multilingual-uncased.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tiny-tensorflow/bert-tiny-multilingual/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
tiny-tensorflow/toxicity-tfjs/tfnapi-v8/tensorflow.dll filter=lfs diff=lfs merge=lfs -text
|
tiny-tensorflow/bert-tiny-multilingual/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84cb4153c11646eb331f7707570b8b3abe1f57ae5919c0bec80990ac6c4e8961
|
3 |
+
size 14166066
|
tiny-tensorflow/bert-tiny-multilingual/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dcddc18a03c9cf78e873c24440335a972bdb6a266b7fe8e5edceb87fb95809e4
|
3 |
+
size 755860212
|
tiny-tensorflow/bert-tiny-multilingual/variables/variables.index
ADDED
Binary file (13.8 kB). View file
|
|
tiny-tensorflow/predict.js
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
async function run() {
|
2 |
+
const { vitchen_toxicity_encode } = require('./bert-tiny-multilingual/bert-tokenizer');
|
3 |
+
const {loadSavedModel, tensor} = require('./bert-tiny-multilingual/node');
|
4 |
+
const model = await loadSavedModel('./toxicity9');
|
5 |
+
const input = vitchen_toxicity_encode(`What's up?`);
|
6 |
+
let t = tensor(input, [192], 'int32');
|
7 |
+
const prediction = model.predict({
|
8 |
+
input_ids: t
|
9 |
+
})['output_0'];
|
10 |
+
console.log(prediction.dataSync()[0])
|
11 |
+
}
|
12 |
+
run()
|
tiny-tensorflow/toxicity-tfjs/api_pb.js
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tiny-tensorflow/toxicity-tfjs/bert-tokenizer.js
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const symbols = new Set(['!', '`', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '~' , '_', '+', '=', '[', ']', '{', '}', '\\', '|', ':', ';', '\'', '"', '<', '>', ',', '.', '?', '/']);
|
2 |
+
const toxicityTokens = require('./vitchen-multilingual-uncased.json');
|
3 |
+
|
4 |
+
/**
|
5 |
+
* @param {string} text The content of a Discord message
|
6 |
+
*/
|
7 |
+
function vitchen_toxicity_encode(text) {
|
8 |
+
let unclean_words = text.replace('`', '').replace('\n', ' ').toLowerCase().split(' '), ids = [101]; // [CLS]
|
9 |
+
for (let i = 0; i < unclean_words.length && ids.length < 192; i++) {
|
10 |
+
let cur = unclean_words[i];
|
11 |
+
if (cur === '') continue;
|
12 |
+
let curToken = getToken(cur);
|
13 |
+
if (curToken) ids.push(curToken);
|
14 |
+
else {
|
15 |
+
let splitBySymbols = [], preSymbol = ''
|
16 |
+
for (let j = 0; j < cur.length; j++) {
|
17 |
+
if (symbols.has(cur[j])) {
|
18 |
+
if (preSymbol !== '') splitBySymbols.push(preSymbol);
|
19 |
+
splitBySymbols.push(cur[j]);
|
20 |
+
preSymbol = '';
|
21 |
+
} else {
|
22 |
+
preSymbol += cur[j]
|
23 |
+
}
|
24 |
+
}
|
25 |
+
splitBySymbols.push(preSymbol);
|
26 |
+
for (let word of splitBySymbols) {
|
27 |
+
let k = word.length, hangman = '', checkpoint = 0;
|
28 |
+
while (k !== checkpoint && hangman !== word) {
|
29 |
+
for (k; k >= checkpoint + 1; k--) {
|
30 |
+
let checkpointJourney = word.substring(checkpoint, k), token = getToken(hangman.length === 0 ? checkpointJourney : '##' + checkpointJourney);
|
31 |
+
if (token) {
|
32 |
+
ids.push(token);
|
33 |
+
hangman += checkpointJourney;
|
34 |
+
checkpoint = k;
|
35 |
+
k = word.length;
|
36 |
+
break;
|
37 |
+
}
|
38 |
+
}
|
39 |
+
}
|
40 |
+
if (k === checkpoint && hangman !== word) ids.push(100) // [UNK]
|
41 |
+
let lastcheckpointToken = getToken(word.substring(checkpoint));
|
42 |
+
if (lastcheckpointToken) ids.push(lastcheckpointToken);
|
43 |
+
}
|
44 |
+
}
|
45 |
+
}
|
46 |
+
if (ids.length < 192) {
|
47 |
+
ids.push(102);
|
48 |
+
while (ids.length < 192) {
|
49 |
+
ids.push(0);
|
50 |
+
}
|
51 |
+
} else if (ids.length === 192) {
|
52 |
+
ids[191] = 102 // [SEP]
|
53 |
+
} else {
|
54 |
+
ids = ids.slice(0, 192);
|
55 |
+
ids[191] = 102 // [SEP]
|
56 |
+
}
|
57 |
+
return ids;
|
58 |
+
}
|
59 |
+
|
60 |
+
/**
|
61 |
+
* @param {string} string
|
62 |
+
* @returns {number|undefined}
|
63 |
+
*/
|
64 |
+
function getToken(string) {
|
65 |
+
if (string === '') return undefined;
|
66 |
+
return toxicityTokens[string];
|
67 |
+
}
|
68 |
+
|
69 |
+
module.exports.vitchen_toxicity_encode = vitchen_toxicity_encode;
|
tiny-tensorflow/toxicity-tfjs/config.json
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "@tensorflow/tfjs-node",
|
3 |
+
"version": "3.19.0",
|
4 |
+
"main": "dist/index.js",
|
5 |
+
"types": "dist/index.d.ts",
|
6 |
+
"gypfile": true,
|
7 |
+
"license": "Apache-2.0",
|
8 |
+
"repository": {
|
9 |
+
"type": "git",
|
10 |
+
"url": "https://github.com/tensorflow/tfjs.git",
|
11 |
+
"directory": "tfjs-node"
|
12 |
+
},
|
13 |
+
"engines": {
|
14 |
+
"node": ">=8.11.0"
|
15 |
+
},
|
16 |
+
"scripts": {
|
17 |
+
"build": "tsc && npx mkdirp dist/proto && cp src/proto/api_pb.js dist/proto/api_pb.js",
|
18 |
+
"build-ci": "tsc && npx mkdirp dist/proto && cp src/proto/api_pb.js dist/proto/api_pb.js",
|
19 |
+
"build-link-package": "cd ../link-package && yarn build-deps-for tfjs-node tfjs",
|
20 |
+
"build-union": "cd ../tfjs && yarn && yarn build",
|
21 |
+
"build-union-ci": "cd ../tfjs && yarn && yarn build-ci",
|
22 |
+
"build-deps": "yarn build-link-package && yarn build-union",
|
23 |
+
"build-deps-ci": "yarn build-link-package && yarn build-union-ci",
|
24 |
+
"build-npm": "./scripts/build-npm.sh",
|
25 |
+
"build-addon": "./scripts/build-and-upload-addon.sh",
|
26 |
+
"build-addon-from-source": "node-pre-gyp install --build-from-source",
|
27 |
+
"clean-deps": "rm -rf deps && rm -rf lib",
|
28 |
+
"coverage": "nyc yarn ts-node -P tsconfig.test.json src/run_tests.ts",
|
29 |
+
"enable-gpu": "node scripts/install.js gpu download && yarn && yarn build-addon-from-source",
|
30 |
+
"ensure-cpu-gpu-packages-align": "node scripts/ensure-cpu-gpu-packages-align.js",
|
31 |
+
"format": "clang-format -i -style=Google binding/*.cc binding/*.h",
|
32 |
+
"install": "node scripts/install.js",
|
33 |
+
"install-from-source": "yarn clean-deps && yarn && yarn build-addon-from-source",
|
34 |
+
"link-local": "yalc link",
|
35 |
+
"lint": "tslint -p . -t verbose",
|
36 |
+
"prep": "cd node_modules/@tensorflow/tfjs-core && yarn && yarn build",
|
37 |
+
"publish-local": "yarn prep && yalc push",
|
38 |
+
"publish-npm": "npm publish",
|
39 |
+
"test": "yarn && yarn build-deps && yarn build && ts-node --transpile-only --skip-ignore -P tsconfig.test.json src/run_tests.ts",
|
40 |
+
"test-dev": "tsc && ts-node --transpile-only --skip-ignore -P tsconfig.test.json src/run_tests.ts",
|
41 |
+
"test-ci": "ts-node --transpile-only --skip-ignore -P tsconfig.test.json src/run_tests.ts",
|
42 |
+
"upload-windows-addon": "./scripts/build-and-upload-windows-addon.bat"
|
43 |
+
},
|
44 |
+
"dependencies": {
|
45 |
+
"@mapbox/node-pre-gyp": "1.0.9",
|
46 |
+
"adm-zip": "^0.5.2",
|
47 |
+
"google-protobuf": "^3.9.2",
|
48 |
+
"https-proxy-agent": "^2.2.1",
|
49 |
+
"progress": "^2.0.0",
|
50 |
+
"rimraf": "^2.6.2",
|
51 |
+
"path": "^0.12.7",
|
52 |
+
"tar": "^4.4.6",
|
53 |
+
"util": "^0.12.4"
|
54 |
+
},
|
55 |
+
"binary": {
|
56 |
+
"module_name": "tfjs_binding",
|
57 |
+
"module_path": "./tfnapi-v{napi_build_version}",
|
58 |
+
"host": "https://storage.googleapis.com/tf-builds/pre-built-binary",
|
59 |
+
"remote_path": "./tfnapi-v{napi_build_version}/{version}/",
|
60 |
+
"napi_versions": [
|
61 |
+
3,
|
62 |
+
4,
|
63 |
+
5,
|
64 |
+
6,
|
65 |
+
7,
|
66 |
+
8
|
67 |
+
]
|
68 |
+
}
|
69 |
+
}
|
70 |
+
|
tiny-tensorflow/toxicity-tfjs/node.js
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tiny-tensorflow/toxicity-tfjs/tfnapi-v8/tensorflow.dll
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba650c0d99b0275086789d6ec8de2c200cca8e29d800ef7cefd7f318aeb1e9e5
|
3 |
+
size 200814080
|
tiny-tensorflow/toxicity-tfjs/tfnapi-v8/tfjs_binding.node
ADDED
Binary file (170 kB). View file
|
|
tiny-tensorflow/toxicity-tfjs/vitchen-multilingual-uncased.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|