Spaces:

dejanseo
/

sentiment

Running

dejanseo commited on Jun 8

Commit

6a601a0

•

1 Parent(s): 426c5b5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,11 +2,11 @@ import streamlit as st
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import requests
-from bs4 import BeautifulSoup
 import pandas as pd
 import altair as alt
 from collections import OrderedDict
 from nltk.tokenize import sent_tokenize
 # Load the punkt tokenizer from nltk
 import nltk
@@ -41,11 +41,9 @@ background_colors = {
 # Function to get text content from a URL
 def get_text_from_url(url):
-    response = requests.get(url)
-    if response.status_code == 200:
-        soup = BeautifulSoup(response.content, 'html.parser')
-        paragraphs = soup.find_all('p')
-        return ' '.join(p.get_text() for p in paragraphs)
     return ""
 # Function to classify text
@@ -150,7 +148,7 @@ Multi-label sentiment classification model developed by [Dejan Marketing](https:
 The model is designed to be deployed in an automated pipeline capable of classifying text sentiment for thousands (or even millions) of text chunks or as a part of a scraping pipeline. This is a demo model which may occassionally misclasify some texts. In a typical commercial project, a larger model is deployed for the task, and in special cases, a domain-specific model is developed for the client.
 ### Engage Our Team
-Interested in using this in an automated pipeline for bulk query processing?
 Please [book an appointment](https://dejanmarketing.com/conference/) to discuss your needs.
 """)

 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import requests
 import pandas as pd
 import altair as alt
 from collections import OrderedDict
 from nltk.tokenize import sent_tokenize
+import trafilatura
 # Load the punkt tokenizer from nltk
 import nltk
 # Function to get text content from a URL
 def get_text_from_url(url):
+    downloaded = trafilatura.fetch_url(url)
+    if downloaded:
+        return trafilatura.extract(downloaded)
     return ""
 # Function to classify text
 The model is designed to be deployed in an automated pipeline capable of classifying text sentiment for thousands (or even millions) of text chunks or as a part of a scraping pipeline. This is a demo model which may occassionally misclasify some texts. In a typical commercial project, a larger model is deployed for the task, and in special cases, a domain-specific model is developed for the client.
 ### Engage Our Team
+Interested in using this in an automated pipeline for bulk sentiment processing?
 Please [book an appointment](https://dejanmarketing.com/conference/) to discuss your needs.
 """)