Update app.py
Browse files
app.py
CHANGED
@@ -2,11 +2,11 @@ import streamlit as st
|
|
2 |
import torch
|
3 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
4 |
import requests
|
5 |
-
from bs4 import BeautifulSoup
|
6 |
import pandas as pd
|
7 |
import altair as alt
|
8 |
from collections import OrderedDict
|
9 |
from nltk.tokenize import sent_tokenize
|
|
|
10 |
|
11 |
# Load the punkt tokenizer from nltk
|
12 |
import nltk
|
@@ -41,11 +41,9 @@ background_colors = {
|
|
41 |
|
42 |
# Function to get text content from a URL
|
43 |
def get_text_from_url(url):
|
44 |
-
|
45 |
-
if
|
46 |
-
|
47 |
-
paragraphs = soup.find_all('p')
|
48 |
-
return ' '.join(p.get_text() for p in paragraphs)
|
49 |
return ""
|
50 |
|
51 |
# Function to classify text
|
@@ -150,7 +148,7 @@ Multi-label sentiment classification model developed by [Dejan Marketing](https:
|
|
150 |
The model is designed to be deployed in an automated pipeline capable of classifying text sentiment for thousands (or even millions) of text chunks or as a part of a scraping pipeline. This is a demo model which may occassionally misclasify some texts. In a typical commercial project, a larger model is deployed for the task, and in special cases, a domain-specific model is developed for the client.
|
151 |
|
152 |
### Engage Our Team
|
153 |
-
Interested in using this in an automated pipeline for bulk
|
154 |
|
155 |
Please [book an appointment](https://dejanmarketing.com/conference/) to discuss your needs.
|
156 |
""")
|
|
|
2 |
import torch
|
3 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
4 |
import requests
|
|
|
5 |
import pandas as pd
|
6 |
import altair as alt
|
7 |
from collections import OrderedDict
|
8 |
from nltk.tokenize import sent_tokenize
|
9 |
+
import trafilatura
|
10 |
|
11 |
# Load the punkt tokenizer from nltk
|
12 |
import nltk
|
|
|
41 |
|
42 |
# Function to get text content from a URL
|
43 |
def get_text_from_url(url):
|
44 |
+
downloaded = trafilatura.fetch_url(url)
|
45 |
+
if downloaded:
|
46 |
+
return trafilatura.extract(downloaded)
|
|
|
|
|
47 |
return ""
|
48 |
|
49 |
# Function to classify text
|
|
|
148 |
The model is designed to be deployed in an automated pipeline capable of classifying text sentiment for thousands (or even millions) of text chunks or as a part of a scraping pipeline. This is a demo model which may occassionally misclasify some texts. In a typical commercial project, a larger model is deployed for the task, and in special cases, a domain-specific model is developed for the client.
|
149 |
|
150 |
### Engage Our Team
|
151 |
+
Interested in using this in an automated pipeline for bulk sentiment processing?
|
152 |
|
153 |
Please [book an appointment](https://dejanmarketing.com/conference/) to discuss your needs.
|
154 |
""")
|