import json import logging import asyncio from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json from dotenv import load_dotenv import os load_dotenv() async def index_repository(token, repo, current_path="", proxies=None): try: file_structure = await get_file_structure(repo, token, current_path, proxies) full_structure = [] for item in file_structure: if item['type'] == 'directory': sub_directory_structure = await index_repository(token, repo, item['path'], proxies) full_structure.append({ "type": "directory", "path": item['path'], "contents": sub_directory_structure }) else: full_structure.append(item) return full_structure except Exception as e: logging.error(f"Error indexing repository: {e}") raise async def indexer(): token = os.getenv("TOKEN") repo = os.getenv("REPO") output_path = os.getenv("INDEX_FILE") if not token or not repo or not output_path: logging.error("Environment variables TOKEN, REPO, or INDEX_FILE are not set.") return proxies = await get_system_proxies() try: full_structure = await index_repository(token, repo, "", proxies) await write_file_structure_to_json(full_structure, output_path) logging.info(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}") except Exception as e: logging.error(f"Error during indexing: {e}")