vickeee465 commited on
Commit
e390ccc
·
1 Parent(s): 8cc5141

cache hf models

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/babelmachine-dev.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="13">
8
+ <item index="0" class="java.lang.String" itemvalue="google-cloud-bigquery" />
9
+ <item index="1" class="java.lang.String" itemvalue="db-dtypes" />
10
+ <item index="2" class="java.lang.String" itemvalue="pytest" />
11
+ <item index="3" class="java.lang.String" itemvalue="google-cloud-secret-manager" />
12
+ <item index="4" class="java.lang.String" itemvalue="redis" />
13
+ <item index="5" class="java.lang.String" itemvalue="google-cloud-logging" />
14
+ <item index="6" class="java.lang.String" itemvalue="functions_framework" />
15
+ <item index="7" class="java.lang.String" itemvalue="google-cloud-compute" />
16
+ <item index="8" class="java.lang.String" itemvalue="pickle5" />
17
+ <item index="9" class="java.lang.String" itemvalue="typing_extensions" />
18
+ <item index="10" class="java.lang.String" itemvalue="label_studio_sdk" />
19
+ <item index="11" class="java.lang.String" itemvalue="gcsfs" />
20
+ <item index="12" class="java.lang.String" itemvalue="rq" />
21
+ </list>
22
+ </value>
23
+ </option>
24
+ </inspection_tool>
25
+ <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
26
+ <option name="ignoredIdentifiers">
27
+ <list>
28
+ <option value="mido.*" />
29
+ </list>
30
+ </option>
31
+ </inspection_tool>
32
+ </profile>
33
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/babelmachine-dev.iml" filepath="$PROJECT_DIR$/.idea/babelmachine-dev.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  import gradio as gr
3
 
4
  from spacy import glossary
@@ -8,8 +7,7 @@ from interfaces.sentiment import demo as sentiment_demo
8
  from interfaces.emotion import demo as emotion_demo
9
  from interfaces.ner import demo as ner_demo
10
  from interfaces.ner import download_models as download_spacy_models
11
-
12
- os.environ['TRANSFORMERS_CACHE'] = '/data/'
13
 
14
  entities = ["CARDINAL", "DATE", "EVENT", "FAC", "GPE", "LANGUAGE", "LAW", "LOC", "MONEY", "NORP", "ORDINAL", "ORG", "PERCENT", "PERSON", "PRODUCT", "QUANTITY", "TIME", "WORK_OF_ART"]
15
  ent_dict = glossary.GLOSSARY
@@ -35,9 +33,8 @@ with gr.Blocks() as demo:
35
  )
36
 
37
  if __name__ == "__main__":
38
- gr.Info("Downloading spacy models")
39
  download_spacy_models()
40
- gr.Info("Downloaded spacy models")
41
  demo.launch()
42
 
43
  # TODO: add all languages & domains
 
 
1
  import gradio as gr
2
 
3
  from spacy import glossary
 
7
  from interfaces.emotion import demo as emotion_demo
8
  from interfaces.ner import demo as ner_demo
9
  from interfaces.ner import download_models as download_spacy_models
10
+ from utils import download_hf_models
 
11
 
12
  entities = ["CARDINAL", "DATE", "EVENT", "FAC", "GPE", "LANGUAGE", "LAW", "LOC", "MONEY", "NORP", "ORDINAL", "ORG", "PERCENT", "PERSON", "PRODUCT", "QUANTITY", "TIME", "WORK_OF_ART"]
13
  ent_dict = glossary.GLOSSARY
 
33
  )
34
 
35
  if __name__ == "__main__":
36
+ download_hf_models()
37
  download_spacy_models()
 
38
  demo.launch()
39
 
40
  # TODO: add all languages & domains
interfaces/manifesto.py CHANGED
@@ -39,6 +39,8 @@ def build_huggingface_path(language: str):
39
  return "poltextlab/xlm-roberta-large-manifesto"
40
 
41
  def predict(text, model_id, tokenizer_id):
 
 
42
  device = torch.device("cpu")
43
  with m("Loading model"):
44
  model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", token=HF_TOKEN)
 
39
  return "poltextlab/xlm-roberta-large-manifesto"
40
 
41
  def predict(text, model_id, tokenizer_id):
42
+ gr.Info(os.listdir("/data/"))
43
+
44
  device = torch.device("cpu")
45
  with m("Loading model"):
46
  model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", token=HF_TOKEN)
utils.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+
4
+ """
5
+ from interfaces.manifesto import languages as languages_manifesto
6
+ from interfaces.manifesto import languages as languages_manifesto
7
+ from interfaces.manifesto import languages as languages_manifesto
8
+ """
9
+
10
+ from interfaces.cap import build_huggingface_path as hf_cap_path
11
+ from interfaces.manifesto import build_huggingface_path as hf_manifesto_path
12
+ from interfaces.sentiment import build_huggingface_path as hf_sentiment_path
13
+ from interfaces.emotion import build_huggingface_path as hf_emotion_path
14
+
15
+
16
+ os.environ['TRANSFORMERS_CACHE'] = '/data/'
17
+ HF_TOKEN = os.environ["hf_read"]
18
+
19
+ models = [hf_manifesto_path(""), hf_sentiment_path(""), hf_emotion_path("")]
20
+ tokenizers = ["xlm-roberta-large"]
21
+
22
+ def download_hf_models():
23
+ for model in models:
24
+ model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto",
25
+ token=HF_TOKEN)
26
+ del model
27
+ for tokenizer in tokenizers:
28
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
29
+ del tokenizer
30
+