Spaces:
Running
Running
vickeee465
commited on
Commit
·
e390ccc
1
Parent(s):
8cc5141
cache hf models
Browse files- .idea/.gitignore +3 -0
- .idea/babelmachine-dev.iml +8 -0
- .idea/inspectionProfiles/Project_Default.xml +33 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- app.py +2 -5
- interfaces/manifesto.py +2 -0
- utils.py +30 -0
.idea/.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Default ignored files
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
.idea/babelmachine-dev.iml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$" />
|
5 |
+
<orderEntry type="inheritedJdk" />
|
6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
+
</component>
|
8 |
+
</module>
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<profile version="1.0">
|
3 |
+
<option name="myName" value="Project Default" />
|
4 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
5 |
+
<option name="ignoredPackages">
|
6 |
+
<value>
|
7 |
+
<list size="13">
|
8 |
+
<item index="0" class="java.lang.String" itemvalue="google-cloud-bigquery" />
|
9 |
+
<item index="1" class="java.lang.String" itemvalue="db-dtypes" />
|
10 |
+
<item index="2" class="java.lang.String" itemvalue="pytest" />
|
11 |
+
<item index="3" class="java.lang.String" itemvalue="google-cloud-secret-manager" />
|
12 |
+
<item index="4" class="java.lang.String" itemvalue="redis" />
|
13 |
+
<item index="5" class="java.lang.String" itemvalue="google-cloud-logging" />
|
14 |
+
<item index="6" class="java.lang.String" itemvalue="functions_framework" />
|
15 |
+
<item index="7" class="java.lang.String" itemvalue="google-cloud-compute" />
|
16 |
+
<item index="8" class="java.lang.String" itemvalue="pickle5" />
|
17 |
+
<item index="9" class="java.lang.String" itemvalue="typing_extensions" />
|
18 |
+
<item index="10" class="java.lang.String" itemvalue="label_studio_sdk" />
|
19 |
+
<item index="11" class="java.lang.String" itemvalue="gcsfs" />
|
20 |
+
<item index="12" class="java.lang.String" itemvalue="rq" />
|
21 |
+
</list>
|
22 |
+
</value>
|
23 |
+
</option>
|
24 |
+
</inspection_tool>
|
25 |
+
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
26 |
+
<option name="ignoredIdentifiers">
|
27 |
+
<list>
|
28 |
+
<option value="mido.*" />
|
29 |
+
</list>
|
30 |
+
</option>
|
31 |
+
</inspection_tool>
|
32 |
+
</profile>
|
33 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/babelmachine-dev.iml" filepath="$PROJECT_DIR$/.idea/babelmachine-dev.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
.idea/vcs.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="VcsDirectoryMappings">
|
4 |
+
<mapping directory="" vcs="Git" />
|
5 |
+
</component>
|
6 |
+
</project>
|
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import os
|
2 |
import gradio as gr
|
3 |
|
4 |
from spacy import glossary
|
@@ -8,8 +7,7 @@ from interfaces.sentiment import demo as sentiment_demo
|
|
8 |
from interfaces.emotion import demo as emotion_demo
|
9 |
from interfaces.ner import demo as ner_demo
|
10 |
from interfaces.ner import download_models as download_spacy_models
|
11 |
-
|
12 |
-
os.environ['TRANSFORMERS_CACHE'] = '/data/'
|
13 |
|
14 |
entities = ["CARDINAL", "DATE", "EVENT", "FAC", "GPE", "LANGUAGE", "LAW", "LOC", "MONEY", "NORP", "ORDINAL", "ORG", "PERCENT", "PERSON", "PRODUCT", "QUANTITY", "TIME", "WORK_OF_ART"]
|
15 |
ent_dict = glossary.GLOSSARY
|
@@ -35,9 +33,8 @@ with gr.Blocks() as demo:
|
|
35 |
)
|
36 |
|
37 |
if __name__ == "__main__":
|
38 |
-
|
39 |
download_spacy_models()
|
40 |
-
gr.Info("Downloaded spacy models")
|
41 |
demo.launch()
|
42 |
|
43 |
# TODO: add all languages & domains
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
from spacy import glossary
|
|
|
7 |
from interfaces.emotion import demo as emotion_demo
|
8 |
from interfaces.ner import demo as ner_demo
|
9 |
from interfaces.ner import download_models as download_spacy_models
|
10 |
+
from utils import download_hf_models
|
|
|
11 |
|
12 |
entities = ["CARDINAL", "DATE", "EVENT", "FAC", "GPE", "LANGUAGE", "LAW", "LOC", "MONEY", "NORP", "ORDINAL", "ORG", "PERCENT", "PERSON", "PRODUCT", "QUANTITY", "TIME", "WORK_OF_ART"]
|
13 |
ent_dict = glossary.GLOSSARY
|
|
|
33 |
)
|
34 |
|
35 |
if __name__ == "__main__":
|
36 |
+
download_hf_models()
|
37 |
download_spacy_models()
|
|
|
38 |
demo.launch()
|
39 |
|
40 |
# TODO: add all languages & domains
|
interfaces/manifesto.py
CHANGED
@@ -39,6 +39,8 @@ def build_huggingface_path(language: str):
|
|
39 |
return "poltextlab/xlm-roberta-large-manifesto"
|
40 |
|
41 |
def predict(text, model_id, tokenizer_id):
|
|
|
|
|
42 |
device = torch.device("cpu")
|
43 |
with m("Loading model"):
|
44 |
model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", token=HF_TOKEN)
|
|
|
39 |
return "poltextlab/xlm-roberta-large-manifesto"
|
40 |
|
41 |
def predict(text, model_id, tokenizer_id):
|
42 |
+
gr.Info(os.listdir("/data/"))
|
43 |
+
|
44 |
device = torch.device("cpu")
|
45 |
with m("Loading model"):
|
46 |
model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", token=HF_TOKEN)
|
utils.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
+
|
4 |
+
"""
|
5 |
+
from interfaces.manifesto import languages as languages_manifesto
|
6 |
+
from interfaces.manifesto import languages as languages_manifesto
|
7 |
+
from interfaces.manifesto import languages as languages_manifesto
|
8 |
+
"""
|
9 |
+
|
10 |
+
from interfaces.cap import build_huggingface_path as hf_cap_path
|
11 |
+
from interfaces.manifesto import build_huggingface_path as hf_manifesto_path
|
12 |
+
from interfaces.sentiment import build_huggingface_path as hf_sentiment_path
|
13 |
+
from interfaces.emotion import build_huggingface_path as hf_emotion_path
|
14 |
+
|
15 |
+
|
16 |
+
os.environ['TRANSFORMERS_CACHE'] = '/data/'
|
17 |
+
HF_TOKEN = os.environ["hf_read"]
|
18 |
+
|
19 |
+
models = [hf_manifesto_path(""), hf_sentiment_path(""), hf_emotion_path("")]
|
20 |
+
tokenizers = ["xlm-roberta-large"]
|
21 |
+
|
22 |
+
def download_hf_models():
|
23 |
+
for model in models:
|
24 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto",
|
25 |
+
token=HF_TOKEN)
|
26 |
+
del model
|
27 |
+
for tokenizer in tokenizers:
|
28 |
+
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
|
29 |
+
del tokenizer
|
30 |
+
|