Spaces:

kusa04
/

g13_DL_project

Sleeping

App Files Files Community

kusa04 commited on Mar 20

Commit

052eebe

verified ·

1 Parent(s): 590b498

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -43

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from collections import Counter
-from concurrent.futures import ThreadPoolExecutor　# palarell processing
 import matplotlib.pyplot as plt
 import pandas as pd
 import praw  # Reddit's API
@@ -18,55 +18,14 @@ from transformers import (
 from transformers.pipelines import AggregationStrategy
 from functions import (
                     scrape_reddit_data,
-                    split_text_by_token_limit,
                     safe_sentiment,
-                    safe_sentiment_batch,
                     analyze_detail,
                     preprocess_text
                 )
-# ---------- Cached function for loading the model pipelines ----------
-@st.cache_resource
-def load_sentiment_pipeline(): # sentiment pipeline
-    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
-    model = AutoModelForSequenceClassification.from_pretrained(
-        "cardiffnlp/twitter-roberta-base-sentiment-latest",
-        use_auth_token=st.secrets["hugging_face_token"]
-    )
-    sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=0) # -1 to 0
-    max_tokens = tokenizer.model_max_length
-    if max_tokens > 10000:
-        max_tokens = 512
-    return sentiment_pipeline, tokenizer, max_tokens
-# class for keyword extraction
-@st.cache_resource
-class KeyphraseExtractionPipeline(TokenClassificationPipeline):
-    def __init__(self, model, *args, **kwargs):
-        super().__init__(
-            model=AutoModelForTokenClassification.from_pretrained(model),
-            tokenizer=AutoTokenizer.from_pretrained(model),
-            *args,
-            **kwargs
-        )
-    def postprocess(self, all_outputs):
-        results = super().postprocess(
-            all_outputs=all_outputs,
-            aggregation_strategy=AggregationStrategy.SIMPLE,
-        )
-        return np.unique([result.get("word").strip() for result in results])
-@st.cache_resource
-def keyword_extractor():
-    model_name = "ml6team/keyphrase-extraction-kbir-inspec"
-    extractor = KeyphraseExtractionPipeline(model=model_name)
-    return extractor

 from collections import Counter
+from concurrent.futures import ThreadPoolExecutor # palarell processing
 import matplotlib.pyplot as plt
 import pandas as pd
 import praw  # Reddit's API
 from transformers.pipelines import AggregationStrategy
 from functions import (
+                    load_sentiment_pipeline,
                     scrape_reddit_data,
                     safe_sentiment,
                     analyze_detail,
                     preprocess_text
                 )