ChandimaPrabath commited on
Commit
5efa5ee
·
1 Parent(s): 7942766
Files changed (6) hide show
  1. .gitignore +6 -0
  2. app.py +152 -0
  3. hf_scraper.py +52 -0
  4. indexer.py +32 -0
  5. requirements.txt +4 -0
  6. templates/index.html +162 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #.env
2
+ .env
3
+ # cache
4
+ tmp
5
+ # pycache
6
+ __pycache__
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, jsonify, render_template, redirect, request, send_file
2
+ import os
3
+ import json
4
+ import requests
5
+ import urllib.parse
6
+ from datetime import datetime, timedelta
7
+ from threading import Thread
8
+ from hf_scraper import get_system_proxies
9
+ from indexer import indexer
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+ INDEX_FILE = os.getenv("INDEX_FILE")
14
+ TOKEN = os.getenv("TOKEN")
15
+ REPO = os.getenv("REPO")
16
+ THETVDB_API_KEY = os.getenv("THETVDB_API_KEY")
17
+ THETVDB_API_URL = os.getenv("THETVDB_API_URL")
18
+ CACHE_DIR = os.getenv("CACHE_DIR")
19
+ TOKEN_EXPIRY = None
20
+ THETVDB_TOKEN = None
21
+ proxies = get_system_proxies()
22
+
23
+ if not os.path.exists(CACHE_DIR):
24
+ os.makedirs(CACHE_DIR)
25
+
26
+ indexer()
27
+
28
+ # Check if INDEX_FILE exists
29
+ if not os.path.exists(INDEX_FILE):
30
+ raise FileNotFoundError(f"{INDEX_FILE} not found. Please make sure the file exists.")
31
+
32
+ with open(INDEX_FILE, 'r') as f:
33
+ file_structure = json.load(f)
34
+
35
+ def authenticate_thetvdb():
36
+ global THETVDB_TOKEN, TOKEN_EXPIRY
37
+ auth_url = f"{THETVDB_API_URL}/login"
38
+ auth_data = {
39
+ "apikey": THETVDB_API_KEY
40
+ }
41
+ try:
42
+ response = requests.post(auth_url, json=auth_data, proxies=proxies)
43
+ response.raise_for_status()
44
+ response_data = response.json()
45
+ print("Auth Response Data:", response_data) # Debugging print statement
46
+ THETVDB_TOKEN = response_data['data']['token']
47
+ TOKEN_EXPIRY = datetime.now() + timedelta(days=30) # token is valid for 1 month
48
+ except requests.RequestException as e:
49
+ print(f"Authentication failed: {e}")
50
+ THETVDB_TOKEN = None
51
+ TOKEN_EXPIRY = None
52
+
53
+ def get_thetvdb_token():
54
+ global THETVDB_TOKEN, TOKEN_EXPIRY
55
+ if not THETVDB_TOKEN or datetime.now() >= TOKEN_EXPIRY:
56
+ authenticate_thetvdb()
57
+ return THETVDB_TOKEN
58
+
59
+ def fetch_and_cache_image(title, media_type, year=None):
60
+ if year:
61
+ search_url = f"{THETVDB_API_URL}/search?query={title}&type={media_type}&year={year}"
62
+ else:
63
+ search_url = f"{THETVDB_API_URL}/search?query={title}&type={media_type}"
64
+
65
+ token = get_thetvdb_token()
66
+ if not token:
67
+ print("Authentication failed")
68
+ return
69
+
70
+ headers = {
71
+ "Authorization": f"Bearer {token}",
72
+ "accept": "application/json",
73
+ }
74
+
75
+ try:
76
+ response = requests.get(search_url, headers=headers, proxies=proxies)
77
+ response.raise_for_status()
78
+ data = response.json()
79
+
80
+ if 'data' in data and data['data']:
81
+ img_url = data['data'][0].get('thumbnail')
82
+ if img_url:
83
+ img_content = requests.get(img_url, proxies=proxies).content
84
+ cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(title)}.jpg")
85
+ with open(cache_path, 'wb') as f:
86
+ f.write(img_content)
87
+ # Save JSON response to cache
88
+ json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(title)}.json")
89
+ with open(json_cache_path, 'w') as f:
90
+ json.dump(data, f)
91
+ except requests.RequestException as e:
92
+ print(f"Error fetching data: {e}")
93
+
94
+ def prefetch_images():
95
+ for item in file_structure:
96
+ if 'contents' in item:
97
+ for sub_item in item['contents']:
98
+ title = sub_item['path'].split('/')[-1]
99
+ media_type = 'series' if item['path'].startswith('tv') else 'movie'
100
+ year = None
101
+ if any(char.isdigit() for char in title):
102
+ # Strip year from title if present
103
+ parts = title.split()
104
+ year_str = parts[-1]
105
+ if year_str.isdigit() and len(year_str) == 4:
106
+ title = ' '.join(parts[:-1])
107
+ year = int(year_str)
108
+ fetch_and_cache_image(title, media_type, year)
109
+
110
+ # Run prefetch_images in a background thread
111
+ def start_prefetching():
112
+ prefetch_images()
113
+
114
+ # Start prefetching before running the Flask app
115
+ thread = Thread(target=start_prefetching)
116
+ thread.daemon = True
117
+ thread.start()
118
+
119
+ app = Flask(__name__)
120
+
121
+ @app.route('/')
122
+ def home():
123
+ return render_template('index.html')
124
+
125
+ @app.route('/films')
126
+ def list_films():
127
+ films = [item for item in file_structure if item['path'].startswith('films')]
128
+ return jsonify([sub_item for film in films for sub_item in film['contents']])
129
+
130
+ @app.route('/tv')
131
+ def list_tv():
132
+ tv_shows = [item for item in file_structure if item['path'].startswith('tv')]
133
+ return jsonify([sub_item for show in tv_shows for sub_item in show['contents']])
134
+
135
+ @app.route('/play/<path:file_path>')
136
+ def play(file_path):
137
+ file_url = f"https://huggingface.co/{REPO}/resolve/main/{file_path}"
138
+ return redirect(file_url)
139
+
140
+ @app.route('/get_image')
141
+ def get_image():
142
+ title = request.args.get('title')
143
+ cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(title)}.jpg")
144
+
145
+ if os.path.exists(cache_path):
146
+ return send_file(cache_path, mimetype='image/jpeg')
147
+
148
+ # If image is not found in cache, return a placeholder
149
+ return jsonify({'poster': 'placeholder'})
150
+
151
+ if __name__ == '__main__':
152
+ app.run(debug=True, host="0.0.0.0", port=7860)
hf_scraper.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ import urllib.request
5
+ from requests.exceptions import RequestException
6
+
7
+ def get_system_proxies():
8
+ try:
9
+ proxies = urllib.request.getproxies()
10
+ print("System proxies:", proxies)
11
+ return {
12
+ "http": proxies.get("http"),
13
+ "https": proxies.get("http")
14
+ }
15
+ except Exception as e:
16
+ print(f"Error getting system proxies: {e}")
17
+ return {}
18
+
19
+ def download_file(file_url, token, output_path, proxies):
20
+ print(f"Downloading file from URL: {file_url} with proxies: {proxies}")
21
+ try:
22
+ response = requests.get(file_url, headers={'Authorization': f'Bearer {token}'}, proxies=proxies, stream=True, verify=False)
23
+ response.raise_for_status()
24
+ with open(output_path, 'wb') as f:
25
+ for chunk in response.iter_content(chunk_size=8192):
26
+ if chunk:
27
+ f.write(chunk)
28
+ print(f'File {output_path} downloaded successfully.')
29
+ except RequestException as e:
30
+ print(f"Error downloading file: {e}")
31
+ except IOError as e:
32
+ print(f"Error writing file {output_path}: {e}")
33
+
34
+ def get_file_structure(repo, token, path="", proxies=None):
35
+ api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
36
+ headers = {'Authorization': f'Bearer {token}'}
37
+ print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")
38
+ try:
39
+ response = requests.get(api_url, headers=headers, proxies=proxies, verify=False)
40
+ response.raise_for_status()
41
+ return response.json()
42
+ except RequestException as e:
43
+ print(f"Error fetching file structure: {e}")
44
+ return []
45
+
46
+ def write_file_structure_to_json(file_structure, file_path):
47
+ try:
48
+ with open(file_path, 'w') as json_file:
49
+ json.dump(file_structure, json_file, indent=2)
50
+ print(f'File structure written to {file_path}')
51
+ except IOError as e:
52
+ print(f"Error writing file structure to JSON: {e}")
indexer.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from hf_scraper import get_system_proxies, get_file_structure, write_file_structure_to_json
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ load_dotenv()
7
+
8
+ def index_repository(token, repo, current_path="", proxies=None):
9
+ file_structure = get_file_structure(repo, token, current_path, proxies)
10
+ full_structure = []
11
+ for item in file_structure:
12
+ if item['type'] == 'directory':
13
+ sub_directory_structure = index_repository(token, repo, item['path'], proxies)
14
+ full_structure.append({
15
+ "type": "directory",
16
+ "path": item['path'],
17
+ "contents": sub_directory_structure
18
+ })
19
+ else:
20
+ full_structure.append(item)
21
+ return full_structure
22
+
23
+ def indexer():
24
+ token = os.getenv("TOKEN")
25
+ repo = os.getenv("REPO")
26
+ output_path = os.getenv("INDEX_FILE")
27
+
28
+ proxies = get_system_proxies()
29
+ full_structure = index_repository(token, repo, "", proxies)
30
+ write_file_structure_to_json(full_structure, output_path)
31
+ print(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}")
32
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ flask
2
+ Flask-Cors
3
+ requests
4
+ python-dotenv
templates/index.html ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Media Library</title>
5
+ <style>
6
+ body {
7
+ font-family: Arial, sans-serif;
8
+ background-color: #141414;
9
+ color: #fff;
10
+ margin: 0;
11
+ padding: 0;
12
+ }
13
+ .header {
14
+ background-color: #000;
15
+ padding: 20px;
16
+ text-align: center;
17
+ }
18
+ .header h1 {
19
+ margin: 0;
20
+ font-size: 24px;
21
+ }
22
+ .content {
23
+ padding: 20px;
24
+ }
25
+ .section {
26
+ margin-bottom: 40px;
27
+ }
28
+ .section h2 {
29
+ margin-bottom: 20px;
30
+ }
31
+ .grid {
32
+ display: grid;
33
+ grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
34
+ gap: 20px;
35
+ }
36
+ .card {
37
+ background-color: #333;
38
+ border-radius: 10px;
39
+ overflow: hidden;
40
+ text-align: center;
41
+ transition: transform 0.2s;
42
+ }
43
+ .card:hover {
44
+ transform: scale(1.05);
45
+ }
46
+ .card img {
47
+ width: 100%;
48
+ height: 300px;
49
+ object-fit: cover;
50
+ }
51
+ .card h3 {
52
+ margin: 0;
53
+ padding: 10px;
54
+ font-size: 18px;
55
+ }
56
+ .card p {
57
+ margin: 0;
58
+ padding: 10px;
59
+ }
60
+ .card a {
61
+ display: block;
62
+ padding: 10px;
63
+ color: #fff;
64
+ background-color: #e50914;
65
+ text-decoration: none;
66
+ border-radius: 0 0 10px 10px;
67
+ }
68
+ .card a:hover {
69
+ background-color: #f40612;
70
+ }
71
+ </style>
72
+ </head>
73
+ <body>
74
+ <div class="header">
75
+ <h1>Media Library</h1>
76
+ </div>
77
+ <div class="content">
78
+ <div class="section" id="films">
79
+ <h2>Films</h2>
80
+ <div class="grid" id="films-grid">
81
+ <!-- Film cards will be inserted here -->
82
+ </div>
83
+ </div>
84
+ <div class="section" id="tv">
85
+ <h2>TV Shows</h2>
86
+ <div class="grid" id="tv-grid">
87
+ <!-- TV show cards will be inserted here -->
88
+ </div>
89
+ </div>
90
+ </div>
91
+ <script>
92
+ async function fetchData(endpoint) {
93
+ try {
94
+ const response = await fetch(endpoint);
95
+ if (!response.ok) {
96
+ throw new Error('Network response was not ok');
97
+ }
98
+ return await response.json();
99
+ } catch (error) {
100
+ console.error('Fetch error:', error);
101
+ return [];
102
+ }
103
+ }
104
+
105
+ async function fetchImage(title, mediaType) {
106
+ try {
107
+ const response = await fetch(`/get_image?title=${encodeURIComponent(title)}&type=${mediaType}`);
108
+ if (response.headers.get('content-type') === 'application/json') {
109
+ const data = await response.json();
110
+ return `https://via.placeholder.com/200x300?text=${title}`;
111
+ } else {
112
+ return response.url;
113
+ }
114
+ } catch (error) {
115
+ console.error('Image fetch error:', error);
116
+ return `https://via.placeholder.com/200x300?text=${title}`;
117
+ }
118
+ }
119
+
120
+ async function createCard(item) {
121
+ const card = document.createElement('div');
122
+ card.className = 'card';
123
+
124
+ const title = item.path.split('/').pop();
125
+ const mediaType = item.path.includes('tv') ? 'series' : 'movie'; // Determine media type
126
+ const img = document.createElement('img');
127
+ img.src = await fetchImage(title, mediaType);
128
+ card.appendChild(img);
129
+
130
+ const titleElement = document.createElement('h3');
131
+ titleElement.textContent = title;
132
+ card.appendChild(titleElement);
133
+
134
+ if (item.contents) {
135
+ const p = document.createElement('p');
136
+ p.textContent = `Contains ${item.contents.length} items`;
137
+ card.appendChild(p);
138
+ } else {
139
+ const link = document.createElement('a');
140
+ link.href = `/play/${encodeURIComponent(item.path)}`;
141
+ link.textContent = 'Play';
142
+ card.appendChild(link);
143
+ }
144
+
145
+ return card;
146
+ }
147
+
148
+ async function loadSection(endpoint, containerId) {
149
+ const data = await fetchData(endpoint);
150
+ const container = document.getElementById(containerId);
151
+ for (const item of data) {
152
+ container.appendChild(await createCard(item));
153
+ }
154
+ }
155
+
156
+ document.addEventListener('DOMContentLoaded', () => {
157
+ loadSection('/films', 'films-grid');
158
+ loadSection('/tv', 'tv-grid');
159
+ });
160
+ </script>
161
+ </body>
162
+ </html>