dejanseo commited on
Commit
6a601a0
1 Parent(s): 426c5b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -2,11 +2,11 @@ import streamlit as st
2
  import torch
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
  import requests
5
- from bs4 import BeautifulSoup
6
  import pandas as pd
7
  import altair as alt
8
  from collections import OrderedDict
9
  from nltk.tokenize import sent_tokenize
 
10
 
11
  # Load the punkt tokenizer from nltk
12
  import nltk
@@ -41,11 +41,9 @@ background_colors = {
41
 
42
  # Function to get text content from a URL
43
  def get_text_from_url(url):
44
- response = requests.get(url)
45
- if response.status_code == 200:
46
- soup = BeautifulSoup(response.content, 'html.parser')
47
- paragraphs = soup.find_all('p')
48
- return ' '.join(p.get_text() for p in paragraphs)
49
  return ""
50
 
51
  # Function to classify text
@@ -150,7 +148,7 @@ Multi-label sentiment classification model developed by [Dejan Marketing](https:
150
  The model is designed to be deployed in an automated pipeline capable of classifying text sentiment for thousands (or even millions) of text chunks or as a part of a scraping pipeline. This is a demo model which may occassionally misclasify some texts. In a typical commercial project, a larger model is deployed for the task, and in special cases, a domain-specific model is developed for the client.
151
 
152
  ### Engage Our Team
153
- Interested in using this in an automated pipeline for bulk query processing?
154
 
155
  Please [book an appointment](https://dejanmarketing.com/conference/) to discuss your needs.
156
  """)
 
2
  import torch
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
  import requests
 
5
  import pandas as pd
6
  import altair as alt
7
  from collections import OrderedDict
8
  from nltk.tokenize import sent_tokenize
9
+ import trafilatura
10
 
11
  # Load the punkt tokenizer from nltk
12
  import nltk
 
41
 
42
  # Function to get text content from a URL
43
  def get_text_from_url(url):
44
+ downloaded = trafilatura.fetch_url(url)
45
+ if downloaded:
46
+ return trafilatura.extract(downloaded)
 
 
47
  return ""
48
 
49
  # Function to classify text
 
148
  The model is designed to be deployed in an automated pipeline capable of classifying text sentiment for thousands (or even millions) of text chunks or as a part of a scraping pipeline. This is a demo model which may occassionally misclasify some texts. In a typical commercial project, a larger model is deployed for the task, and in special cases, a domain-specific model is developed for the client.
149
 
150
  ### Engage Our Team
151
+ Interested in using this in an automated pipeline for bulk sentiment processing?
152
 
153
  Please [book an appointment](https://dejanmarketing.com/conference/) to discuss your needs.
154
  """)