Spaces:
Paused
Paused
Commit
·
2ab5f89
1
Parent(s):
ac28d89
create tvdb.py
Browse files
README.md
CHANGED
@@ -15,6 +15,7 @@ pinned: false
|
|
15 |
### app.py -> main script that run flask server
|
16 |
### hf_scrapper.py -> script for interacting with huggingface
|
17 |
### indexer.py script to index the repo structure
|
|
|
18 |
|
19 |
## Templates
|
20 |
|
|
|
15 |
### app.py -> main script that run flask server
|
16 |
### hf_scrapper.py -> script for interacting with huggingface
|
17 |
### indexer.py script to index the repo structure
|
18 |
+
### tvdb.py script to interact with TheTVDB
|
19 |
|
20 |
## Templates
|
21 |
|
app.py
CHANGED
@@ -1,92 +1,32 @@
|
|
1 |
-
|
|
|
2 |
import os
|
3 |
import json
|
4 |
import re
|
5 |
-
import requests
|
6 |
import urllib.parse
|
7 |
-
from datetime import datetime, timedelta
|
8 |
from threading import Thread
|
9 |
-
from hf_scrapper import
|
10 |
from indexer import indexer
|
11 |
from dotenv import load_dotenv
|
12 |
-
import
|
13 |
|
14 |
load_dotenv()
|
15 |
INDEX_FILE = os.getenv("INDEX_FILE")
|
16 |
TOKEN = os.getenv("TOKEN")
|
17 |
REPO = os.getenv("REPO")
|
18 |
-
THETVDB_API_KEY = os.getenv("THETVDB_API_KEY")
|
19 |
-
THETVDB_API_URL = os.getenv("THETVDB_API_URL")
|
20 |
CACHE_DIR = os.getenv("CACHE_DIR")
|
21 |
-
TOKEN_EXPIRY = None
|
22 |
-
THETVDB_TOKEN = None
|
23 |
-
proxies = get_system_proxies()
|
24 |
|
25 |
if not os.path.exists(CACHE_DIR):
|
26 |
os.makedirs(CACHE_DIR)
|
27 |
|
28 |
indexer()
|
29 |
|
30 |
-
# Check if INDEX_FILE exists
|
31 |
if not os.path.exists(INDEX_FILE):
|
32 |
raise FileNotFoundError(f"{INDEX_FILE} not found. Please make sure the file exists.")
|
33 |
|
34 |
with open(INDEX_FILE, 'r') as f:
|
35 |
file_structure = json.load(f)
|
36 |
|
37 |
-
def authenticate_thetvdb():
|
38 |
-
global THETVDB_TOKEN, TOKEN_EXPIRY
|
39 |
-
auth_url = f"{THETVDB_API_URL}/login"
|
40 |
-
auth_data = {
|
41 |
-
"apikey": THETVDB_API_KEY
|
42 |
-
}
|
43 |
-
try:
|
44 |
-
response = requests.post(auth_url, json=auth_data, proxies=proxies)
|
45 |
-
response.raise_for_status()
|
46 |
-
response_data = response.json()
|
47 |
-
THETVDB_TOKEN = response_data['data']['token']
|
48 |
-
TOKEN_EXPIRY = datetime.now() + timedelta(days=30) # token is valid for 1 month
|
49 |
-
except requests.RequestException as e:
|
50 |
-
print(f"Authentication failed: {e}")
|
51 |
-
THETVDB_TOKEN = None
|
52 |
-
TOKEN_EXPIRY = None
|
53 |
-
|
54 |
-
def get_thetvdb_token():
|
55 |
-
global THETVDB_TOKEN, TOKEN_EXPIRY
|
56 |
-
if not THETVDB_TOKEN or datetime.now() >= TOKEN_EXPIRY:
|
57 |
-
authenticate_thetvdb()
|
58 |
-
return THETVDB_TOKEN
|
59 |
-
|
60 |
-
def fetch_and_cache_json(original_title, title, media_type, year=None):
|
61 |
-
if year:
|
62 |
-
search_url = f"{THETVDB_API_URL}/search?query={urllib.parse.quote(title)}&type={media_type}&year={year}"
|
63 |
-
else:
|
64 |
-
search_url = f"{THETVDB_API_URL}/search?query={urllib.parse.quote(title)}&type={media_type}"
|
65 |
-
|
66 |
-
token = get_thetvdb_token()
|
67 |
-
if not token:
|
68 |
-
print("Authentication failed")
|
69 |
-
return
|
70 |
-
|
71 |
-
headers = {
|
72 |
-
"Authorization": f"Bearer {token}",
|
73 |
-
"accept": "application/json",
|
74 |
-
}
|
75 |
-
|
76 |
-
try:
|
77 |
-
response = requests.get(search_url, headers=headers, proxies=proxies)
|
78 |
-
response.raise_for_status()
|
79 |
-
data = response.json()
|
80 |
-
|
81 |
-
if 'data' in data and data['data']:
|
82 |
-
# Use original_title to save JSON response to cache
|
83 |
-
json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(original_title)}.json")
|
84 |
-
with open(json_cache_path, 'w') as f:
|
85 |
-
json.dump(data, f)
|
86 |
-
|
87 |
-
except requests.RequestException as e:
|
88 |
-
print(f"Error fetching data: {e}")
|
89 |
-
|
90 |
def prefetch_metadata():
|
91 |
for item in file_structure:
|
92 |
if 'contents' in item:
|
@@ -96,7 +36,6 @@ def prefetch_metadata():
|
|
96 |
title = original_title
|
97 |
year = None
|
98 |
|
99 |
-
# Check if the title contains a year in parentheses
|
100 |
match = re.search(r'\((\d{4})\)', original_title)
|
101 |
if match:
|
102 |
year_str = match.group(1)
|
@@ -104,7 +43,6 @@ def prefetch_metadata():
|
|
104 |
title = original_title[:match.start()].strip()
|
105 |
year = int(year_str)
|
106 |
else:
|
107 |
-
# Check if the title contains a year at the end without parentheses
|
108 |
parts = original_title.rsplit(' ', 1)
|
109 |
if len(parts) > 1 and parts[-1].isdigit() and len(parts[-1]) == 4:
|
110 |
title = parts[0].strip()
|
@@ -113,19 +51,14 @@ def prefetch_metadata():
|
|
113 |
fetch_and_cache_json(original_title, title, media_type, year)
|
114 |
|
115 |
def get_film_file_path(title):
|
116 |
-
# URL-decode the title
|
117 |
decoded_title = urllib.parse.unquote(title)
|
118 |
-
|
119 |
-
normalized_title = decoded_title.split(' (')[0]
|
120 |
-
normalized_title = normalized_title.strip()
|
121 |
|
122 |
for item in file_structure:
|
123 |
if item['path'].startswith('films'):
|
124 |
for sub_item in item['contents']:
|
125 |
sub_item_title = sub_item['path'].split('/')[-1]
|
126 |
-
|
127 |
-
normalized_sub_item_title = sub_item_title.split(' (')[0]
|
128 |
-
normalized_sub_item_title = normalized_sub_item_title.strip()
|
129 |
|
130 |
if normalized_title == normalized_sub_item_title:
|
131 |
for file in sub_item['contents']:
|
@@ -155,15 +88,13 @@ def get_tv_show_seasons(title):
|
|
155 |
return seasons
|
156 |
return []
|
157 |
|
158 |
-
# Run prefetch_metadata in a background thread
|
159 |
def start_prefetching():
|
160 |
prefetch_metadata()
|
161 |
|
162 |
def generate(file_url):
|
163 |
-
token = TOKEN
|
164 |
output_stream = ffmpeg_stream(file_url, token)
|
165 |
|
166 |
-
# Start prefetching before running the Flask app
|
167 |
thread = Thread(target=start_prefetching)
|
168 |
thread.daemon = True
|
169 |
thread.start()
|
@@ -190,12 +121,11 @@ def list_tv():
|
|
190 |
|
191 |
@app.route('/film/<path:title>')
|
192 |
def film_page(title):
|
193 |
-
title = urllib.parse.unquote(title)
|
194 |
film_file_path = get_film_file_path(title)
|
195 |
if not film_file_path:
|
196 |
return jsonify({'error': 'Film not found'}), 404
|
197 |
|
198 |
-
# Fetch cached metadata
|
199 |
json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(title)}.json")
|
200 |
if os.path.exists(json_cache_path):
|
201 |
with open(json_cache_path, 'r') as f:
|
@@ -210,12 +140,11 @@ def film_page(title):
|
|
210 |
|
211 |
@app.route('/tv/<path:show_title>')
|
212 |
def tv_page(show_title):
|
213 |
-
show_title = urllib.parse.unquote(show_title)
|
214 |
seasons = get_tv_show_seasons(show_title)
|
215 |
if not seasons:
|
216 |
return jsonify({'error': 'TV show not found'}), 404
|
217 |
|
218 |
-
# Fetch cached metadata
|
219 |
json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(show_title)}.json")
|
220 |
if os.path.exists(json_cache_path):
|
221 |
with open(json_cache_path, 'r') as f:
|
@@ -241,19 +170,16 @@ def get_metadata():
|
|
241 |
data = json.load(f)
|
242 |
return jsonify(data)
|
243 |
|
244 |
-
# If metadata is not found in cache, return an error
|
245 |
return jsonify({'error': 'Metadata not found'}), 404
|
246 |
|
247 |
@app.route('/stream')
|
248 |
def stream_video():
|
249 |
-
# this route currently only stream the file from huggingface using ffmpy. can't play them in the web yet. need to implement later.
|
250 |
file_path = request.args.get('path')
|
251 |
if not file_path:
|
252 |
return "File path not provided", 400
|
253 |
|
254 |
file_url = f"https://huggingface.co/{REPO}/resolve/main/{file_path}"
|
255 |
-
|
256 |
-
return Response(generate(file_url), content_type='video/mp4')
|
257 |
|
258 |
if __name__ == '__main__':
|
259 |
-
app.run(debug=True
|
|
|
1 |
+
# app.py
|
2 |
+
from flask import Flask, jsonify, render_template, request, Response
|
3 |
import os
|
4 |
import json
|
5 |
import re
|
|
|
6 |
import urllib.parse
|
|
|
7 |
from threading import Thread
|
8 |
+
from hf_scrapper import ffmpeg_stream
|
9 |
from indexer import indexer
|
10 |
from dotenv import load_dotenv
|
11 |
+
from tvdb import fetch_and_cache_json
|
12 |
|
13 |
load_dotenv()
|
14 |
INDEX_FILE = os.getenv("INDEX_FILE")
|
15 |
TOKEN = os.getenv("TOKEN")
|
16 |
REPO = os.getenv("REPO")
|
|
|
|
|
17 |
CACHE_DIR = os.getenv("CACHE_DIR")
|
|
|
|
|
|
|
18 |
|
19 |
if not os.path.exists(CACHE_DIR):
|
20 |
os.makedirs(CACHE_DIR)
|
21 |
|
22 |
indexer()
|
23 |
|
|
|
24 |
if not os.path.exists(INDEX_FILE):
|
25 |
raise FileNotFoundError(f"{INDEX_FILE} not found. Please make sure the file exists.")
|
26 |
|
27 |
with open(INDEX_FILE, 'r') as f:
|
28 |
file_structure = json.load(f)
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
def prefetch_metadata():
|
31 |
for item in file_structure:
|
32 |
if 'contents' in item:
|
|
|
36 |
title = original_title
|
37 |
year = None
|
38 |
|
|
|
39 |
match = re.search(r'\((\d{4})\)', original_title)
|
40 |
if match:
|
41 |
year_str = match.group(1)
|
|
|
43 |
title = original_title[:match.start()].strip()
|
44 |
year = int(year_str)
|
45 |
else:
|
|
|
46 |
parts = original_title.rsplit(' ', 1)
|
47 |
if len(parts) > 1 and parts[-1].isdigit() and len(parts[-1]) == 4:
|
48 |
title = parts[0].strip()
|
|
|
51 |
fetch_and_cache_json(original_title, title, media_type, year)
|
52 |
|
53 |
def get_film_file_path(title):
|
|
|
54 |
decoded_title = urllib.parse.unquote(title)
|
55 |
+
normalized_title = decoded_title.split(' (')[0].strip()
|
|
|
|
|
56 |
|
57 |
for item in file_structure:
|
58 |
if item['path'].startswith('films'):
|
59 |
for sub_item in item['contents']:
|
60 |
sub_item_title = sub_item['path'].split('/')[-1]
|
61 |
+
normalized_sub_item_title = sub_item_title.split(' (')[0].strip()
|
|
|
|
|
62 |
|
63 |
if normalized_title == normalized_sub_item_title:
|
64 |
for file in sub_item['contents']:
|
|
|
88 |
return seasons
|
89 |
return []
|
90 |
|
|
|
91 |
def start_prefetching():
|
92 |
prefetch_metadata()
|
93 |
|
94 |
def generate(file_url):
|
95 |
+
token = TOKEN
|
96 |
output_stream = ffmpeg_stream(file_url, token)
|
97 |
|
|
|
98 |
thread = Thread(target=start_prefetching)
|
99 |
thread.daemon = True
|
100 |
thread.start()
|
|
|
121 |
|
122 |
@app.route('/film/<path:title>')
|
123 |
def film_page(title):
|
124 |
+
title = urllib.parse.unquote(title)
|
125 |
film_file_path = get_film_file_path(title)
|
126 |
if not film_file_path:
|
127 |
return jsonify({'error': 'Film not found'}), 404
|
128 |
|
|
|
129 |
json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(title)}.json")
|
130 |
if os.path.exists(json_cache_path):
|
131 |
with open(json_cache_path, 'r') as f:
|
|
|
140 |
|
141 |
@app.route('/tv/<path:show_title>')
|
142 |
def tv_page(show_title):
|
143 |
+
show_title = urllib.parse.unquote(show_title)
|
144 |
seasons = get_tv_show_seasons(show_title)
|
145 |
if not seasons:
|
146 |
return jsonify({'error': 'TV show not found'}), 404
|
147 |
|
|
|
148 |
json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(show_title)}.json")
|
149 |
if os.path.exists(json_cache_path):
|
150 |
with open(json_cache_path, 'r') as f:
|
|
|
170 |
data = json.load(f)
|
171 |
return jsonify(data)
|
172 |
|
|
|
173 |
return jsonify({'error': 'Metadata not found'}), 404
|
174 |
|
175 |
@app.route('/stream')
|
176 |
def stream_video():
|
|
|
177 |
file_path = request.args.get('path')
|
178 |
if not file_path:
|
179 |
return "File path not provided", 400
|
180 |
|
181 |
file_url = f"https://huggingface.co/{REPO}/resolve/main/{file_path}"
|
182 |
+
return Response(generate(file_url), mimetype="video/mp4")
|
|
|
183 |
|
184 |
if __name__ == '__main__':
|
185 |
+
app.run(debug=True)
|
tvdb.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tvdb.py
|
2 |
+
import os
|
3 |
+
import requests
|
4 |
+
import urllib.parse
|
5 |
+
from datetime import datetime, timedelta
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import json
|
8 |
+
from hf_scrapper import get_system_proxies
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
THETVDB_API_KEY = os.getenv("THETVDB_API_KEY")
|
12 |
+
THETVDB_API_URL = os.getenv("THETVDB_API_URL")
|
13 |
+
CACHE_DIR = os.getenv("CACHE_DIR")
|
14 |
+
TOKEN_EXPIRY = None
|
15 |
+
THETVDB_TOKEN = None
|
16 |
+
|
17 |
+
|
18 |
+
proxies = get_system_proxies()
|
19 |
+
|
20 |
+
def authenticate_thetvdb():
|
21 |
+
global THETVDB_TOKEN, TOKEN_EXPIRY
|
22 |
+
auth_url = f"{THETVDB_API_URL}/login"
|
23 |
+
auth_data = {
|
24 |
+
"apikey": THETVDB_API_KEY
|
25 |
+
}
|
26 |
+
try:
|
27 |
+
response = requests.post(auth_url, json=auth_data, proxies=proxies)
|
28 |
+
response.raise_for_status()
|
29 |
+
response_data = response.json()
|
30 |
+
THETVDB_TOKEN = response_data['data']['token']
|
31 |
+
TOKEN_EXPIRY = datetime.now() + timedelta(days=30)
|
32 |
+
except requests.RequestException as e:
|
33 |
+
print(f"Authentication failed: {e}")
|
34 |
+
THETVDB_TOKEN = None
|
35 |
+
TOKEN_EXPIRY = None
|
36 |
+
|
37 |
+
def get_thetvdb_token():
|
38 |
+
global THETVDB_TOKEN, TOKEN_EXPIRY
|
39 |
+
if not THETVDB_TOKEN or datetime.now() >= TOKEN_EXPIRY:
|
40 |
+
authenticate_thetvdb()
|
41 |
+
return THETVDB_TOKEN
|
42 |
+
|
43 |
+
def fetch_and_cache_json(original_title, title, media_type, year=None):
|
44 |
+
if year:
|
45 |
+
search_url = f"{THETVDB_API_URL}/search?query={urllib.parse.quote(title)}&type={media_type}&year={year}"
|
46 |
+
else:
|
47 |
+
search_url = f"{THETVDB_API_URL}/search?query={urllib.parse.quote(title)}&type={media_type}"
|
48 |
+
|
49 |
+
token = get_thetvdb_token()
|
50 |
+
if not token:
|
51 |
+
print("Authentication failed")
|
52 |
+
return
|
53 |
+
|
54 |
+
headers = {
|
55 |
+
"Authorization": f"Bearer {token}",
|
56 |
+
"accept": "application/json",
|
57 |
+
}
|
58 |
+
|
59 |
+
try:
|
60 |
+
response = requests.get(search_url, headers=headers, proxies=proxies)
|
61 |
+
response.raise_for_status()
|
62 |
+
data = response.json()
|
63 |
+
|
64 |
+
if 'data' in data and data['data']:
|
65 |
+
json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(original_title)}.json")
|
66 |
+
with open(json_cache_path, 'w') as f:
|
67 |
+
json.dump(data, f)
|
68 |
+
|
69 |
+
except requests.RequestException as e:
|
70 |
+
print(f"Error fetching data: {e}")
|