Spaces:

kusa04
/

g13_DL_project

Sleeping

App Files Files Community

kusa04 commited on Mar 20

Commit

e652020

verified ·

1 Parent(s): dbe9ae9

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -3

app.py CHANGED Viewed

@@ -17,9 +17,6 @@ from transformers import (
 from transformers.pipelines import AggregationStrategy
 from functions import (
-                    load_sentiment_pipeline,
-                    KeyphraseExtractionPipeline,
-                    keyword_extractor,
                     scrape_reddit_data,
                     split_text_by_token_limit,
                     safe_sentiment,
@@ -28,6 +25,45 @@ from functions import (
                 )

 from transformers.pipelines import AggregationStrategy
 from functions import (
                     scrape_reddit_data,
                     split_text_by_token_limit,
                     safe_sentiment,
                 )
+# ---------- Cached function for loading the model pipelines ----------
+@st.cache_resource
+def load_sentiment_pipeline(): # sentiment pipeline
+    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
+    model = AutoModelForSequenceClassification.from_pretrained(
+        "cardiffnlp/twitter-roberta-base-sentiment-latest",
+        use_auth_token=st.secrets["hugging_face_token"]
+    )
+    sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=0) # -1 to 0
+    max_tokens = tokenizer.model_max_length
+    if max_tokens > 10000:
+        max_tokens = 512
+    return sentiment_pipeline, tokenizer, max_tokens
+# class for keyword extraction
+@st.cache_resource
+class KeyphraseExtractionPipeline(TokenClassificationPipeline):
+    def __init__(self, model, *args, **kwargs):
+        super().__init__(
+            model=AutoModelForTokenClassification.from_pretrained(model),
+            tokenizer=AutoTokenizer.from_pretrained(model),
+            *args,
+            **kwargs
+        )
+    def postprocess(self, all_outputs):
+        results = super().postprocess(
+            all_outputs=all_outputs,
+            aggregation_strategy=AggregationStrategy.SIMPLE,
+        )
+        return np.unique([result.get("word").strip() for result in results])
+@st.cache_resource
+def keyword_extractor():
+    model_name = "ml6team/keyphrase-extraction-kbir-inspec"
+    extractor = KeyphraseExtractionPipeline(model=model_name)
+    return extractor