Christopher Glaze
commited on
Commit
·
cbc0f63
1
Parent(s):
fbe1af4
Add nltk resource
Browse files
handler.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
|
| 2 |
from typing import Dict, List, Union, Optional
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
import json
|
| 5 |
import joblib
|
|
@@ -10,6 +11,9 @@ import torch
|
|
| 10 |
import numpy as np
|
| 11 |
from sklearn.base import TransformerMixin
|
| 12 |
|
|
|
|
|
|
|
|
|
|
| 13 |
class SimcseGenerator(TransformerMixin):
|
| 14 |
def __init__(
|
| 15 |
self, batch_size: int =16, model_name: str = "princeton-nlp/unsup-simcse-bert-base-uncased"
|
|
@@ -57,7 +61,7 @@ class EndpointHandler():
|
|
| 57 |
def __init__(self, path: str = ""):
|
| 58 |
|
| 59 |
if len(path)==0:
|
| 60 |
-
path =
|
| 61 |
else:
|
| 62 |
path = Path(path)
|
| 63 |
|
|
|
|
| 1 |
|
| 2 |
from typing import Dict, List, Union, Optional
|
| 3 |
+
import os
|
| 4 |
from pathlib import Path
|
| 5 |
import json
|
| 6 |
import joblib
|
|
|
|
| 11 |
import numpy as np
|
| 12 |
from sklearn.base import TransformerMixin
|
| 13 |
|
| 14 |
+
LOCAL_PATH = Path(__file__).parent
|
| 15 |
+
nltk.data.path.append(str(LOCAL_PATH/"nltk_data"))
|
| 16 |
+
|
| 17 |
class SimcseGenerator(TransformerMixin):
|
| 18 |
def __init__(
|
| 19 |
self, batch_size: int =16, model_name: str = "princeton-nlp/unsup-simcse-bert-base-uncased"
|
|
|
|
| 61 |
def __init__(self, path: str = ""):
|
| 62 |
|
| 63 |
if len(path)==0:
|
| 64 |
+
path = LOCAL_PATH
|
| 65 |
else:
|
| 66 |
path = Path(path)
|
| 67 |
|
nltk_data/tokenizers/punkt/PY3/english.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cad3758596392364e3be9803dbd7ebeda384b68937b488a01365f5551bb942c
|
| 3 |
+
size 406697
|
nltk_data/tokenizers/punkt/english.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dda37972ae88998a6fd3e3ec002697a6bd362b32d050fda7d7ca5276873092aa
|
| 3 |
+
size 433305
|