Spaces:
Runtime error
Runtime error
first
Browse files- .gitignore +27 -0
- Makefile +2 -0
- app.py +113 -0
- install-node.sh +10 -0
- main.py +3 -0
- packages.txt +0 -0
- popular.txt +0 -0
- requirements.txt +10 -0
- templates/index.html +26 -0
- umap_reducer.py +37 -0
.gitignore
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.DS_Store
|
2 |
+
.env
|
3 |
+
.flaskenv
|
4 |
+
*.pyc
|
5 |
+
*.pyo
|
6 |
+
env/
|
7 |
+
venv/
|
8 |
+
.venv/
|
9 |
+
env*
|
10 |
+
dist/
|
11 |
+
build/
|
12 |
+
*.egg
|
13 |
+
*.egg-info/
|
14 |
+
_mailinglist
|
15 |
+
.tox/
|
16 |
+
.cache/
|
17 |
+
.pytest_cache/
|
18 |
+
.idea/
|
19 |
+
docs/_build/
|
20 |
+
.vscode
|
21 |
+
# Coverage reports
|
22 |
+
htmlcov/
|
23 |
+
.coverage
|
24 |
+
.coverage.*
|
25 |
+
*,cover
|
26 |
+
venv
|
27 |
+
*_cache.json
|
Makefile
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
run:
|
2 |
+
PORT=3000 FLASK_ENV=development python app.py
|
app.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from umap_reducer import UMAPReducer
|
2 |
+
from flask import Flask, request, render_template, jsonify, make_response
|
3 |
+
from flask_cors import CORS
|
4 |
+
import os
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from transformers import pipeline
|
7 |
+
import feedparser
|
8 |
+
import json
|
9 |
+
from dateutil import parser
|
10 |
+
import re
|
11 |
+
import numpy as np
|
12 |
+
import gzip
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
# Load Setiment Classifier
|
17 |
+
# sentiment_analysis = pipeline(
|
18 |
+
# "sentiment-analysis", model="siebert/sentiment-roberta-large-english")
|
19 |
+
app = Flask(__name__, static_url_path='/static')
|
20 |
+
reducer = UMAPReducer()
|
21 |
+
|
22 |
+
CORS(app)
|
23 |
+
|
24 |
+
|
25 |
+
@app.route('/')
|
26 |
+
def index():
|
27 |
+
return render_template('index.html')
|
28 |
+
|
29 |
+
|
30 |
+
@app.route('/run-umap') # //methods=['POST'])
|
31 |
+
def run_umap():
|
32 |
+
data = np.random.rand(512, 4)
|
33 |
+
|
34 |
+
# UMAP embeddings
|
35 |
+
embeddings = reducer.embed(data)
|
36 |
+
|
37 |
+
content = gzip.compress(json.dumps(embeddings.tolist()).encode('utf8'), 5)
|
38 |
+
response = make_response(content)
|
39 |
+
response.headers['Content-length'] = len(content)
|
40 |
+
response.headers['Content-Encoding'] = 'gzip'
|
41 |
+
return response
|
42 |
+
|
43 |
+
|
44 |
+
# @app.route('/news')
|
45 |
+
# def get_news():
|
46 |
+
# feed_url = request.args.get('feed_url')
|
47 |
+
# # check if string is a valid
|
48 |
+
|
49 |
+
# # file name for cache
|
50 |
+
# file_name = "".join(re.split(r"https://|\.|/", feed_url))
|
51 |
+
|
52 |
+
# feed_entries = get_feed(feed_url)
|
53 |
+
# # filter only titles for sentiment analysis
|
54 |
+
# try:
|
55 |
+
# with open(f'{file_name}_cache.json') as file:
|
56 |
+
# cache = json.load(file)
|
57 |
+
# except:
|
58 |
+
# cache = {}
|
59 |
+
|
60 |
+
# # if new homepage is newer than cache, update cache and return
|
61 |
+
# print("new date", feed_entries['last_update'])
|
62 |
+
# print("old date", cache['last_update']
|
63 |
+
# if 'last_update' in cache else "None")
|
64 |
+
# if not cache or parser.parse(feed_entries['last_update']) > parser.parse(cache['last_update']):
|
65 |
+
# print("Updating cache with new preditions")
|
66 |
+
# titles = [entry['title'] for entry in feed_entries['entries']]
|
67 |
+
# # run sentiment analysis on titles
|
68 |
+
# predictions = [sentiment_analysis(sentence) for sentence in titles]
|
69 |
+
# # parse Negative and Positive, normalize to -1 to 1
|
70 |
+
# predictions = [-prediction[0]['score'] if prediction[0]['label'] ==
|
71 |
+
# 'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
|
72 |
+
# # merge rss data with predictions
|
73 |
+
# entries_predicitons = [{**entry, 'sentiment': prediction}
|
74 |
+
# for entry, prediction in zip(feed_entries['entries'], predictions)]
|
75 |
+
# output = {'entries': entries_predicitons,
|
76 |
+
# 'last_update': feed_entries['last_update']}
|
77 |
+
# # update last precitions cache
|
78 |
+
# with open(f'{file_name}_cache.json', 'w') as file:
|
79 |
+
# json.dump(output, file)
|
80 |
+
# # send back json
|
81 |
+
# return jsonify(output)
|
82 |
+
# else:
|
83 |
+
# print("Returning cached predictions")
|
84 |
+
# return jsonify(cache)
|
85 |
+
|
86 |
+
|
87 |
+
# @ app.route('/predict', methods=['POST'])
|
88 |
+
# def predict():
|
89 |
+
# # get data from POST
|
90 |
+
# if request.method == 'POST':
|
91 |
+
# # get current news
|
92 |
+
# # get post body data
|
93 |
+
# data = request.get_json()
|
94 |
+
# if data.get('sentences') is None:
|
95 |
+
# return jsonify({'error': 'No text provided'})
|
96 |
+
# # get post expeceted to be under {'sentences': ['text': '...']}
|
97 |
+
# sentences = data.get('sentences')
|
98 |
+
# # prencit sentiments
|
99 |
+
# predictions = [sentiment_analysis(sentence) for sentence in sentences]
|
100 |
+
# # parse Negative and Positive, normalize to -1 to 1
|
101 |
+
# predictions = [-prediction[0]['score'] if prediction[0]['label'] ==
|
102 |
+
# 'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
|
103 |
+
# output = [dict(sentence=sentence, sentiment=prediction)
|
104 |
+
# for sentence, prediction in zip(sentences, predictions)]
|
105 |
+
# # send back json
|
106 |
+
# return jsonify(output)
|
107 |
+
|
108 |
+
|
109 |
+
# def get_feed(feed_url):
|
110 |
+
# feed = feedparser.parse(feed_url)
|
111 |
+
# return {'entries': feed['entries'], 'last_update': feed["feed"]['updated']}
|
112 |
+
if __name__ == '__main__':
|
113 |
+
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
|
install-node.sh
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash
|
2 |
+
export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"
|
3 |
+
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
4 |
+
nvm install --lts
|
5 |
+
node --version
|
6 |
+
npm --version
|
7 |
+
which node
|
8 |
+
which npm
|
9 |
+
command ln -s "$NVM_BIN/node" /home/user/.local/bin/node
|
10 |
+
command ln -s "$NVM_BIN/npm" /home/user/.local/bin/npm
|
main.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
|
3 |
+
subprocess.run(["make", "build-all"], shell=False)
|
packages.txt
ADDED
File without changes
|
popular.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
feedparser==6.0.8
|
2 |
+
Flask==2.0.3
|
3 |
+
flask_cors==3.0.10
|
4 |
+
hdbscan==0.8.28
|
5 |
+
numpy==1.22.2
|
6 |
+
python-dotenv==0.19.2
|
7 |
+
python_dateutil==2.8.2
|
8 |
+
transformers==4.16.2
|
9 |
+
umap-learn==0.5.2
|
10 |
+
torch
|
templates/index.html
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<base href="" />
|
5 |
+
<meta charset="utf-8" />
|
6 |
+
<meta name="description" content="" />
|
7 |
+
<link rel="icon" href="favicon.png" />
|
8 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
9 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
|
10 |
+
<link
|
11 |
+
rel="stylesheet"
|
12 |
+
href="https://cdn.jsdelivr.net/npm/@observablehq/inspector@3/dist/inspector.css"
|
13 |
+
/>
|
14 |
+
</head>
|
15 |
+
<body>
|
16 |
+
<div id="observablehq-3f13b363"></div>
|
17 |
+
<script type="module">
|
18 |
+
import {
|
19 |
+
Runtime,
|
20 |
+
Inspector,
|
21 |
+
} from "https://cdn.jsdelivr.net/npm/@observablehq/runtime@4/dist/runtime.js";
|
22 |
+
import define from "https://api.observablehq.com/d/843a8bdf01fc2c8f.js?v=3";
|
23 |
+
new Runtime().module(define, Inspector.into("#observablehq-3f13b363"));
|
24 |
+
</script>
|
25 |
+
</body>
|
26 |
+
</html>
|
umap_reducer.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import umap
|
2 |
+
import hdbscan
|
3 |
+
import copy
|
4 |
+
|
5 |
+
class UMAPReducer:
|
6 |
+
def __init__(self, options={}):
|
7 |
+
|
8 |
+
# set options with defaults
|
9 |
+
options = {'n_components': 3, 'spread': 1, 'min_dist': 0.1, 'n_neighbors': 15,
|
10 |
+
'metric': 'hellinger', 'min_cluster_size': 60, 'min_samples': 15, **options}
|
11 |
+
|
12 |
+
print(options)
|
13 |
+
self.reducer = umap.UMAP(
|
14 |
+
n_neighbors=options['n_neighbors'],
|
15 |
+
min_dist=options['min_dist'],
|
16 |
+
n_components=options['n_components'],
|
17 |
+
metric=options['metric'],
|
18 |
+
verbose=True)
|
19 |
+
# cluster init
|
20 |
+
self.clusterer = hdbscan.HDBSCAN(
|
21 |
+
min_cluster_size=options['min_cluster_size'],
|
22 |
+
min_samples=options['min_samples'],
|
23 |
+
allow_single_cluster=True
|
24 |
+
)
|
25 |
+
self.cluster_params = copy.deepcopy(options)
|
26 |
+
|
27 |
+
def setParams(self, options):
|
28 |
+
# update params
|
29 |
+
self.cluster_params = {**self.cluster_params, **options}
|
30 |
+
|
31 |
+
def clusterAnalysis(self, data):
|
32 |
+
clusters = self.clusterer.fit(data)
|
33 |
+
return clusters
|
34 |
+
|
35 |
+
def embed(self, data):
|
36 |
+
result = self.reducer.fit_transform(data)
|
37 |
+
return result
|