|
import json |
|
import logging |
|
import asyncio |
|
from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json |
|
from dotenv import load_dotenv |
|
import os |
|
|
|
load_dotenv() |
|
|
|
async def index_repository(token, repo, current_path="", proxies=None): |
|
try: |
|
file_structure = await get_file_structure(repo, token, current_path, proxies) |
|
full_structure = [] |
|
for item in file_structure: |
|
if item['type'] == 'directory': |
|
sub_directory_structure = await index_repository(token, repo, item['path'], proxies) |
|
full_structure.append({ |
|
"type": "directory", |
|
"path": item['path'], |
|
"contents": sub_directory_structure |
|
}) |
|
else: |
|
full_structure.append(item) |
|
return full_structure |
|
except Exception as e: |
|
logging.error(f"Error indexing repository: {e}") |
|
raise |
|
|
|
async def indexer(): |
|
token = os.getenv("TOKEN") |
|
repo = os.getenv("REPO") |
|
output_path = os.getenv("INDEX_FILE") |
|
|
|
if not token or not repo or not output_path: |
|
logging.error("Environment variables TOKEN, REPO, or INDEX_FILE are not set.") |
|
return |
|
|
|
proxies = await get_system_proxies() |
|
|
|
try: |
|
full_structure = await index_repository(token, repo, "", proxies) |
|
await write_file_structure_to_json(full_structure, output_path) |
|
logging.info(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}") |
|
except Exception as e: |
|
logging.error(f"Error during indexing: {e}") |