Spaces:
Paused
Paused
import json | |
from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json | |
from dotenv import load_dotenv | |
import os | |
load_dotenv() | |
def index_repository(token, repo, current_path="", proxies=None): | |
file_structure = get_file_structure(repo, token, current_path, proxies) | |
full_structure = [] | |
for item in file_structure: | |
if item['type'] == 'directory': | |
sub_directory_structure = index_repository(token, repo, item['path'], proxies) | |
full_structure.append({ | |
"type": "directory", | |
"path": item['path'], | |
"contents": sub_directory_structure | |
}) | |
else: | |
full_structure.append(item) | |
return full_structure | |
def indexer(): | |
token = os.getenv("TOKEN") | |
repo = os.getenv("REPO") | |
output_path = os.getenv("INDEX_FILE") | |
proxies = get_system_proxies() | |
full_structure = index_repository(token, repo, "", proxies) | |
write_file_structure_to_json(full_structure, output_path) | |
print(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}") | |