Spaces:

dlsmallw
/

NLPinitiative-Streamlit-App

Running

App Files Files Community

dlsmallw commited on 18 days ago

Commit

23428ec

1 Parent(s): 0637402

Task-359 Correct code to read new model repository structure

Browse files

Files changed (2) hide show

app.py +22 -16
scripts/predict.py +7 -28

app.py CHANGED Viewed

@@ -85,10 +85,7 @@ def load_inference_handler(api_token: str) -> InferenceHandler | None:
         Returns an instance of the InferenceHandler class if a valid token is entered, otherwise returns None.
     """
-    try:
-        return InferenceHandler(api_token)
-    except:
-        return None
 def build_result_tree(parent_elem, results: dict):
     """Loads the history of results from inference for previous inputs made by the user.
@@ -195,11 +192,10 @@ def analyze_text(input: str):
     input : str
         The text to analyze.
     """
-    if ih:
         res = None
         with rc:
             with st.spinner("Processing...", show_time=True) as spnr:
-                # time.sleep(5)
                 res = ih.classify_text(input)
                 del spnr
@@ -209,8 +205,8 @@ def analyze_text(input: str):
 @st.cache_data
 def load_datasets(_parent_elem, api_token: str):
-    if api_token is None or len(api_token) == 0:
-        raise Exception()
     cache_path = snapshot_download(repo_id=DATASET_REPO, repo_type='dataset', token=api_token)
     ds_record = pd.read_csv(os.path.join(cache_path, 'dataset_record.csv'))
@@ -263,13 +259,23 @@ def load_datasets(_parent_elem, api_token: str):
 st.title('NLPinitiative Text Classifier')
-st.sidebar.write("")
-API_KEY = st.sidebar.text_input(
-    "Enter your HuggingFace API Token",
-    help="You can get your free API token in your settings page: https://huggingface.co/settings/tokens",
-    type="password",
-)
-ih = load_inference_handler(API_KEY)
 tab1 = st.empty()
 tab2 = st.empty()
@@ -354,7 +360,7 @@ with tab3:
 with tab4:
     ds_container = st.container(border=True)
     try:
-        load_datasets(ds_container, API_KEY)
     except Exception as e:
         logger.error(f'{e}')
         ds_container.markdown(

         Returns an instance of the InferenceHandler class if a valid token is entered, otherwise returns None.
     """
+    return InferenceHandler(api_token)
 def build_result_tree(parent_elem, results: dict):
     """Loads the history of results from inference for previous inputs made by the user.
     input : str
         The text to analyze.
     """
+    if ih is not None:
         res = None
         with rc:
             with st.spinner("Processing...", show_time=True) as spnr:
                 res = ih.classify_text(input)
                 del spnr
 @st.cache_data
 def load_datasets(_parent_elem, api_token: str):
+    # if api_token is None or len(api_token) == 0:
+    #     raise Exception()
     cache_path = snapshot_download(repo_id=DATASET_REPO, repo_type='dataset', token=api_token)
     ds_record = pd.read_csv(os.path.join(cache_path, 'dataset_record.csv'))
 st.title('NLPinitiative Text Classifier')
+# st.sidebar.write("")
+# API_KEY = st.sidebar.text_input(
+#     "Enter your HuggingFace API Token",
+#     help="You can get your free API token in your settings page: https://huggingface.co/settings/tokens",
+#     type="password",
+# )
+# if API_KEY is not None and len(API_KEY) > 0:
+#     try:
+#         ih = load_inference_handler(API_KEY)
+#     except Exception as e:
+#         ih = None
+#         st.sidebar.write(f'Failed to load inference handler: {e}')
+# else:
+#     ih = None
+ih = InferenceHandler(None)
 tab1 = st.empty()
 tab2 = st.empty()
 with tab4:
     ds_container = st.container(border=True)
     try:
+        load_datasets(ds_container, None)
     except Exception as e:
         logger.error(f'{e}')
         ds_container.markdown(

scripts/predict.py CHANGED Viewed

@@ -3,18 +3,18 @@ Script file used for performing inference with an existing model.
 """
 import torch
-import json
 import nltk
 from nltk.tokenize import sent_tokenize
-import huggingface_hub
 from transformers import (
     AutoTokenizer,
     AutoModelForSequenceClassification
 )
-BIN_REPO = 'dlsmallw/NLPinitiative-Binary-Classification'
-ML_REPO = 'dlsmallw/NLPinitiative-Multilabel-Regression'
 class InferenceHandler:
     """A class that handles performing inference using the trained binary classification and multilabel regression models."""
@@ -33,28 +33,13 @@ class InferenceHandler:
         self.ml_regr_tokenizer, self.ml_regr_model = self._init_model_and_tokenizer(ML_REPO)
         nltk.download('punkt_tab')
-    def _get_config(self, repo_id: str) -> str:
-        """Retrieves the config.json file from the specified model repository.
-        Parameters
-        ----------
-        repo_id : str
-            The repository id (i.e., <owner username>/<repository name>).
-        """
-        config = None
-        if repo_id and self.api_token:
-            config = huggingface_hub.hf_hub_download(repo_id, filename='config.json', token=self.api_token)
-        return config
     def _init_model_and_tokenizer(self, repo_id: str):
         """Initializes a model and tokenizer for use in inference using the models path.
         Parameters
         ----------
-        model_path : Path
-            Directory path to the models tensor file.
         Returns
         -------
@@ -62,14 +47,8 @@ class InferenceHandler:
             A tuple containing the tokenizer and model objects.
         """
-        config = self._get_config(repo_id)
-        with open(config) as config_file:
-            config_json = json.load(config_file)
-        model_name = config_json['_name_or_path']
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModelForSequenceClassification.from_pretrained(repo_id, token=self.api_token)
         model.eval()
         return tokenizer, model

 """
 import torch
 import nltk
 from nltk.tokenize import sent_tokenize
 from transformers import (
     AutoTokenizer,
     AutoModelForSequenceClassification
 )
+from scripts.config import (
+    BIN_REPO,
+    ML_REPO
+)
 class InferenceHandler:
     """A class that handles performing inference using the trained binary classification and multilabel regression models."""
         self.ml_regr_tokenizer, self.ml_regr_model = self._init_model_and_tokenizer(ML_REPO)
         nltk.download('punkt_tab')
     def _init_model_and_tokenizer(self, repo_id: str):
         """Initializes a model and tokenizer for use in inference using the models path.
         Parameters
         ----------
+        repo_id : str
+            The repository id (i.e., <owner username>/<repository name>).
         Returns
         -------
             A tuple containing the tokenizer and model objects.
         """
+        tokenizer = AutoTokenizer.from_pretrained(repo_id, token=self.api_token)
         model = AutoModelForSequenceClassification.from_pretrained(repo_id, token=self.api_token)
         model.eval()
         return tokenizer, model