Spaces:

kusa04
/

g13_DL_project

Sleeping

App Files Files Community

kusa04 commited on Mar 20

Commit

a9d660f

verified ·

1 Parent(s): 052eebe

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -1

app.py CHANGED Viewed

@@ -18,7 +18,6 @@ from transformers import (
 from transformers.pipelines import AggregationStrategy
 from functions import (
-                    load_sentiment_pipeline,
                     scrape_reddit_data,
                     safe_sentiment,
                     analyze_detail,
@@ -26,6 +25,46 @@ from functions import (
                 )

 from transformers.pipelines import AggregationStrategy
 from functions import (
                     scrape_reddit_data,
                     safe_sentiment,
                     analyze_detail,
                 )
+# ---------- Cached function for loading the model pipelines ----------
+@st.cache_resource
+def load_sentiment_pipeline(): # sentiment pipeline
+    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
+    model = AutoModelForSequenceClassification.from_pretrained(
+        "cardiffnlp/twitter-roberta-base-sentiment-latest",
+        use_auth_token=st.secrets["hugging_face_token"]
+    )
+    sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=0) # -1 to 0
+    max_tokens = tokenizer.model_max_length
+    if max_tokens > 10000:
+        max_tokens = 512
+    return sentiment_pipeline, tokenizer, max_tokens
+# class for keyword extraction
+@st.cache_resource
+class KeyphraseExtractionPipeline(TokenClassificationPipeline):
+    def __init__(self, model, *args, **kwargs):
+        super().__init__(
+            model=AutoModelForTokenClassification.from_pretrained(model),
+            tokenizer=AutoTokenizer.from_pretrained(model),
+            *args,
+            **kwargs
+        )
+    def postprocess(self, all_outputs):
+        results = super().postprocess(
+            all_outputs=all_outputs,
+            aggregation_strategy=AggregationStrategy.SIMPLE,
+        )
+        return np.unique([result.get("word").strip() for result in results])
+@st.cache_resource
+def keyword_extractor():
+    model_name = "ml6team/keyphrase-extraction-kbir-inspec"
+    extractor = KeyphraseExtractionPipeline(model=model_name)
+    return extractor
+# ---------------------------------------------------------------------------