Spaces:
Paused
Paused
Commit
·
5efa5ee
1
Parent(s):
7942766
init
Browse files- .gitignore +6 -0
- app.py +152 -0
- hf_scraper.py +52 -0
- indexer.py +32 -0
- requirements.txt +4 -0
- templates/index.html +162 -0
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#.env
|
2 |
+
.env
|
3 |
+
# cache
|
4 |
+
tmp
|
5 |
+
# pycache
|
6 |
+
__pycache__
|
app.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, jsonify, render_template, redirect, request, send_file
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
import requests
|
5 |
+
import urllib.parse
|
6 |
+
from datetime import datetime, timedelta
|
7 |
+
from threading import Thread
|
8 |
+
from hf_scraper import get_system_proxies
|
9 |
+
from indexer import indexer
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
INDEX_FILE = os.getenv("INDEX_FILE")
|
14 |
+
TOKEN = os.getenv("TOKEN")
|
15 |
+
REPO = os.getenv("REPO")
|
16 |
+
THETVDB_API_KEY = os.getenv("THETVDB_API_KEY")
|
17 |
+
THETVDB_API_URL = os.getenv("THETVDB_API_URL")
|
18 |
+
CACHE_DIR = os.getenv("CACHE_DIR")
|
19 |
+
TOKEN_EXPIRY = None
|
20 |
+
THETVDB_TOKEN = None
|
21 |
+
proxies = get_system_proxies()
|
22 |
+
|
23 |
+
if not os.path.exists(CACHE_DIR):
|
24 |
+
os.makedirs(CACHE_DIR)
|
25 |
+
|
26 |
+
indexer()
|
27 |
+
|
28 |
+
# Check if INDEX_FILE exists
|
29 |
+
if not os.path.exists(INDEX_FILE):
|
30 |
+
raise FileNotFoundError(f"{INDEX_FILE} not found. Please make sure the file exists.")
|
31 |
+
|
32 |
+
with open(INDEX_FILE, 'r') as f:
|
33 |
+
file_structure = json.load(f)
|
34 |
+
|
35 |
+
def authenticate_thetvdb():
|
36 |
+
global THETVDB_TOKEN, TOKEN_EXPIRY
|
37 |
+
auth_url = f"{THETVDB_API_URL}/login"
|
38 |
+
auth_data = {
|
39 |
+
"apikey": THETVDB_API_KEY
|
40 |
+
}
|
41 |
+
try:
|
42 |
+
response = requests.post(auth_url, json=auth_data, proxies=proxies)
|
43 |
+
response.raise_for_status()
|
44 |
+
response_data = response.json()
|
45 |
+
print("Auth Response Data:", response_data) # Debugging print statement
|
46 |
+
THETVDB_TOKEN = response_data['data']['token']
|
47 |
+
TOKEN_EXPIRY = datetime.now() + timedelta(days=30) # token is valid for 1 month
|
48 |
+
except requests.RequestException as e:
|
49 |
+
print(f"Authentication failed: {e}")
|
50 |
+
THETVDB_TOKEN = None
|
51 |
+
TOKEN_EXPIRY = None
|
52 |
+
|
53 |
+
def get_thetvdb_token():
|
54 |
+
global THETVDB_TOKEN, TOKEN_EXPIRY
|
55 |
+
if not THETVDB_TOKEN or datetime.now() >= TOKEN_EXPIRY:
|
56 |
+
authenticate_thetvdb()
|
57 |
+
return THETVDB_TOKEN
|
58 |
+
|
59 |
+
def fetch_and_cache_image(title, media_type, year=None):
|
60 |
+
if year:
|
61 |
+
search_url = f"{THETVDB_API_URL}/search?query={title}&type={media_type}&year={year}"
|
62 |
+
else:
|
63 |
+
search_url = f"{THETVDB_API_URL}/search?query={title}&type={media_type}"
|
64 |
+
|
65 |
+
token = get_thetvdb_token()
|
66 |
+
if not token:
|
67 |
+
print("Authentication failed")
|
68 |
+
return
|
69 |
+
|
70 |
+
headers = {
|
71 |
+
"Authorization": f"Bearer {token}",
|
72 |
+
"accept": "application/json",
|
73 |
+
}
|
74 |
+
|
75 |
+
try:
|
76 |
+
response = requests.get(search_url, headers=headers, proxies=proxies)
|
77 |
+
response.raise_for_status()
|
78 |
+
data = response.json()
|
79 |
+
|
80 |
+
if 'data' in data and data['data']:
|
81 |
+
img_url = data['data'][0].get('thumbnail')
|
82 |
+
if img_url:
|
83 |
+
img_content = requests.get(img_url, proxies=proxies).content
|
84 |
+
cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(title)}.jpg")
|
85 |
+
with open(cache_path, 'wb') as f:
|
86 |
+
f.write(img_content)
|
87 |
+
# Save JSON response to cache
|
88 |
+
json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(title)}.json")
|
89 |
+
with open(json_cache_path, 'w') as f:
|
90 |
+
json.dump(data, f)
|
91 |
+
except requests.RequestException as e:
|
92 |
+
print(f"Error fetching data: {e}")
|
93 |
+
|
94 |
+
def prefetch_images():
|
95 |
+
for item in file_structure:
|
96 |
+
if 'contents' in item:
|
97 |
+
for sub_item in item['contents']:
|
98 |
+
title = sub_item['path'].split('/')[-1]
|
99 |
+
media_type = 'series' if item['path'].startswith('tv') else 'movie'
|
100 |
+
year = None
|
101 |
+
if any(char.isdigit() for char in title):
|
102 |
+
# Strip year from title if present
|
103 |
+
parts = title.split()
|
104 |
+
year_str = parts[-1]
|
105 |
+
if year_str.isdigit() and len(year_str) == 4:
|
106 |
+
title = ' '.join(parts[:-1])
|
107 |
+
year = int(year_str)
|
108 |
+
fetch_and_cache_image(title, media_type, year)
|
109 |
+
|
110 |
+
# Run prefetch_images in a background thread
|
111 |
+
def start_prefetching():
|
112 |
+
prefetch_images()
|
113 |
+
|
114 |
+
# Start prefetching before running the Flask app
|
115 |
+
thread = Thread(target=start_prefetching)
|
116 |
+
thread.daemon = True
|
117 |
+
thread.start()
|
118 |
+
|
119 |
+
app = Flask(__name__)
|
120 |
+
|
121 |
+
@app.route('/')
|
122 |
+
def home():
|
123 |
+
return render_template('index.html')
|
124 |
+
|
125 |
+
@app.route('/films')
|
126 |
+
def list_films():
|
127 |
+
films = [item for item in file_structure if item['path'].startswith('films')]
|
128 |
+
return jsonify([sub_item for film in films for sub_item in film['contents']])
|
129 |
+
|
130 |
+
@app.route('/tv')
|
131 |
+
def list_tv():
|
132 |
+
tv_shows = [item for item in file_structure if item['path'].startswith('tv')]
|
133 |
+
return jsonify([sub_item for show in tv_shows for sub_item in show['contents']])
|
134 |
+
|
135 |
+
@app.route('/play/<path:file_path>')
|
136 |
+
def play(file_path):
|
137 |
+
file_url = f"https://huggingface.co/{REPO}/resolve/main/{file_path}"
|
138 |
+
return redirect(file_url)
|
139 |
+
|
140 |
+
@app.route('/get_image')
|
141 |
+
def get_image():
|
142 |
+
title = request.args.get('title')
|
143 |
+
cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(title)}.jpg")
|
144 |
+
|
145 |
+
if os.path.exists(cache_path):
|
146 |
+
return send_file(cache_path, mimetype='image/jpeg')
|
147 |
+
|
148 |
+
# If image is not found in cache, return a placeholder
|
149 |
+
return jsonify({'poster': 'placeholder'})
|
150 |
+
|
151 |
+
if __name__ == '__main__':
|
152 |
+
app.run(debug=True, host="0.0.0.0", port=7860)
|
hf_scraper.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
import urllib.request
|
5 |
+
from requests.exceptions import RequestException
|
6 |
+
|
7 |
+
def get_system_proxies():
|
8 |
+
try:
|
9 |
+
proxies = urllib.request.getproxies()
|
10 |
+
print("System proxies:", proxies)
|
11 |
+
return {
|
12 |
+
"http": proxies.get("http"),
|
13 |
+
"https": proxies.get("http")
|
14 |
+
}
|
15 |
+
except Exception as e:
|
16 |
+
print(f"Error getting system proxies: {e}")
|
17 |
+
return {}
|
18 |
+
|
19 |
+
def download_file(file_url, token, output_path, proxies):
|
20 |
+
print(f"Downloading file from URL: {file_url} with proxies: {proxies}")
|
21 |
+
try:
|
22 |
+
response = requests.get(file_url, headers={'Authorization': f'Bearer {token}'}, proxies=proxies, stream=True, verify=False)
|
23 |
+
response.raise_for_status()
|
24 |
+
with open(output_path, 'wb') as f:
|
25 |
+
for chunk in response.iter_content(chunk_size=8192):
|
26 |
+
if chunk:
|
27 |
+
f.write(chunk)
|
28 |
+
print(f'File {output_path} downloaded successfully.')
|
29 |
+
except RequestException as e:
|
30 |
+
print(f"Error downloading file: {e}")
|
31 |
+
except IOError as e:
|
32 |
+
print(f"Error writing file {output_path}: {e}")
|
33 |
+
|
34 |
+
def get_file_structure(repo, token, path="", proxies=None):
|
35 |
+
api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
|
36 |
+
headers = {'Authorization': f'Bearer {token}'}
|
37 |
+
print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")
|
38 |
+
try:
|
39 |
+
response = requests.get(api_url, headers=headers, proxies=proxies, verify=False)
|
40 |
+
response.raise_for_status()
|
41 |
+
return response.json()
|
42 |
+
except RequestException as e:
|
43 |
+
print(f"Error fetching file structure: {e}")
|
44 |
+
return []
|
45 |
+
|
46 |
+
def write_file_structure_to_json(file_structure, file_path):
|
47 |
+
try:
|
48 |
+
with open(file_path, 'w') as json_file:
|
49 |
+
json.dump(file_structure, json_file, indent=2)
|
50 |
+
print(f'File structure written to {file_path}')
|
51 |
+
except IOError as e:
|
52 |
+
print(f"Error writing file structure to JSON: {e}")
|
indexer.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from hf_scraper import get_system_proxies, get_file_structure, write_file_structure_to_json
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
def index_repository(token, repo, current_path="", proxies=None):
|
9 |
+
file_structure = get_file_structure(repo, token, current_path, proxies)
|
10 |
+
full_structure = []
|
11 |
+
for item in file_structure:
|
12 |
+
if item['type'] == 'directory':
|
13 |
+
sub_directory_structure = index_repository(token, repo, item['path'], proxies)
|
14 |
+
full_structure.append({
|
15 |
+
"type": "directory",
|
16 |
+
"path": item['path'],
|
17 |
+
"contents": sub_directory_structure
|
18 |
+
})
|
19 |
+
else:
|
20 |
+
full_structure.append(item)
|
21 |
+
return full_structure
|
22 |
+
|
23 |
+
def indexer():
|
24 |
+
token = os.getenv("TOKEN")
|
25 |
+
repo = os.getenv("REPO")
|
26 |
+
output_path = os.getenv("INDEX_FILE")
|
27 |
+
|
28 |
+
proxies = get_system_proxies()
|
29 |
+
full_structure = index_repository(token, repo, "", proxies)
|
30 |
+
write_file_structure_to_json(full_structure, output_path)
|
31 |
+
print(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}")
|
32 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
flask
|
2 |
+
Flask-Cors
|
3 |
+
requests
|
4 |
+
python-dotenv
|
templates/index.html
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<title>Media Library</title>
|
5 |
+
<style>
|
6 |
+
body {
|
7 |
+
font-family: Arial, sans-serif;
|
8 |
+
background-color: #141414;
|
9 |
+
color: #fff;
|
10 |
+
margin: 0;
|
11 |
+
padding: 0;
|
12 |
+
}
|
13 |
+
.header {
|
14 |
+
background-color: #000;
|
15 |
+
padding: 20px;
|
16 |
+
text-align: center;
|
17 |
+
}
|
18 |
+
.header h1 {
|
19 |
+
margin: 0;
|
20 |
+
font-size: 24px;
|
21 |
+
}
|
22 |
+
.content {
|
23 |
+
padding: 20px;
|
24 |
+
}
|
25 |
+
.section {
|
26 |
+
margin-bottom: 40px;
|
27 |
+
}
|
28 |
+
.section h2 {
|
29 |
+
margin-bottom: 20px;
|
30 |
+
}
|
31 |
+
.grid {
|
32 |
+
display: grid;
|
33 |
+
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
34 |
+
gap: 20px;
|
35 |
+
}
|
36 |
+
.card {
|
37 |
+
background-color: #333;
|
38 |
+
border-radius: 10px;
|
39 |
+
overflow: hidden;
|
40 |
+
text-align: center;
|
41 |
+
transition: transform 0.2s;
|
42 |
+
}
|
43 |
+
.card:hover {
|
44 |
+
transform: scale(1.05);
|
45 |
+
}
|
46 |
+
.card img {
|
47 |
+
width: 100%;
|
48 |
+
height: 300px;
|
49 |
+
object-fit: cover;
|
50 |
+
}
|
51 |
+
.card h3 {
|
52 |
+
margin: 0;
|
53 |
+
padding: 10px;
|
54 |
+
font-size: 18px;
|
55 |
+
}
|
56 |
+
.card p {
|
57 |
+
margin: 0;
|
58 |
+
padding: 10px;
|
59 |
+
}
|
60 |
+
.card a {
|
61 |
+
display: block;
|
62 |
+
padding: 10px;
|
63 |
+
color: #fff;
|
64 |
+
background-color: #e50914;
|
65 |
+
text-decoration: none;
|
66 |
+
border-radius: 0 0 10px 10px;
|
67 |
+
}
|
68 |
+
.card a:hover {
|
69 |
+
background-color: #f40612;
|
70 |
+
}
|
71 |
+
</style>
|
72 |
+
</head>
|
73 |
+
<body>
|
74 |
+
<div class="header">
|
75 |
+
<h1>Media Library</h1>
|
76 |
+
</div>
|
77 |
+
<div class="content">
|
78 |
+
<div class="section" id="films">
|
79 |
+
<h2>Films</h2>
|
80 |
+
<div class="grid" id="films-grid">
|
81 |
+
<!-- Film cards will be inserted here -->
|
82 |
+
</div>
|
83 |
+
</div>
|
84 |
+
<div class="section" id="tv">
|
85 |
+
<h2>TV Shows</h2>
|
86 |
+
<div class="grid" id="tv-grid">
|
87 |
+
<!-- TV show cards will be inserted here -->
|
88 |
+
</div>
|
89 |
+
</div>
|
90 |
+
</div>
|
91 |
+
<script>
|
92 |
+
async function fetchData(endpoint) {
|
93 |
+
try {
|
94 |
+
const response = await fetch(endpoint);
|
95 |
+
if (!response.ok) {
|
96 |
+
throw new Error('Network response was not ok');
|
97 |
+
}
|
98 |
+
return await response.json();
|
99 |
+
} catch (error) {
|
100 |
+
console.error('Fetch error:', error);
|
101 |
+
return [];
|
102 |
+
}
|
103 |
+
}
|
104 |
+
|
105 |
+
async function fetchImage(title, mediaType) {
|
106 |
+
try {
|
107 |
+
const response = await fetch(`/get_image?title=${encodeURIComponent(title)}&type=${mediaType}`);
|
108 |
+
if (response.headers.get('content-type') === 'application/json') {
|
109 |
+
const data = await response.json();
|
110 |
+
return `https://via.placeholder.com/200x300?text=${title}`;
|
111 |
+
} else {
|
112 |
+
return response.url;
|
113 |
+
}
|
114 |
+
} catch (error) {
|
115 |
+
console.error('Image fetch error:', error);
|
116 |
+
return `https://via.placeholder.com/200x300?text=${title}`;
|
117 |
+
}
|
118 |
+
}
|
119 |
+
|
120 |
+
async function createCard(item) {
|
121 |
+
const card = document.createElement('div');
|
122 |
+
card.className = 'card';
|
123 |
+
|
124 |
+
const title = item.path.split('/').pop();
|
125 |
+
const mediaType = item.path.includes('tv') ? 'series' : 'movie'; // Determine media type
|
126 |
+
const img = document.createElement('img');
|
127 |
+
img.src = await fetchImage(title, mediaType);
|
128 |
+
card.appendChild(img);
|
129 |
+
|
130 |
+
const titleElement = document.createElement('h3');
|
131 |
+
titleElement.textContent = title;
|
132 |
+
card.appendChild(titleElement);
|
133 |
+
|
134 |
+
if (item.contents) {
|
135 |
+
const p = document.createElement('p');
|
136 |
+
p.textContent = `Contains ${item.contents.length} items`;
|
137 |
+
card.appendChild(p);
|
138 |
+
} else {
|
139 |
+
const link = document.createElement('a');
|
140 |
+
link.href = `/play/${encodeURIComponent(item.path)}`;
|
141 |
+
link.textContent = 'Play';
|
142 |
+
card.appendChild(link);
|
143 |
+
}
|
144 |
+
|
145 |
+
return card;
|
146 |
+
}
|
147 |
+
|
148 |
+
async function loadSection(endpoint, containerId) {
|
149 |
+
const data = await fetchData(endpoint);
|
150 |
+
const container = document.getElementById(containerId);
|
151 |
+
for (const item of data) {
|
152 |
+
container.appendChild(await createCard(item));
|
153 |
+
}
|
154 |
+
}
|
155 |
+
|
156 |
+
document.addEventListener('DOMContentLoaded', () => {
|
157 |
+
loadSection('/films', 'films-grid');
|
158 |
+
loadSection('/tv', 'tv-grid');
|
159 |
+
});
|
160 |
+
</script>
|
161 |
+
</body>
|
162 |
+
</html>
|