import gradio as gr from bing_image_downloader import downloader import os import requests from bs4 import BeautifulSoup from urllib.parse import quote import urllib.request import re import time # Configuration des headers HTTP HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36', 'Accept-Language': 'en-US,en;q=0.5' } # Téléchargement via Bing def download_bing_images(search_query, limit, adult_filter_off): try: # Ici, adult_filter_off est True si le mode sécurisé est désactivé ("Off") downloader.download( search_query, limit=limit, adult_filter_off=adult_filter_off, force_replace=False, timeout=60, filter_type='photo' ) output_dir = os.path.join('dataset', search_query) return get_image_paths(output_dir) except Exception as e: print(f"Erreur Bing : {str(e)}") return [] # Téléchargement via Google def download_google_images(search_query, limit): try: output_dir = os.path.join('dataset', f'google_{search_query}') os.makedirs(output_dir, exist_ok=True) url = f"https://www.google.com/search?q={quote(search_query)}&tbm=isch" response = requests.get(url, headers=HEADERS) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') scripts = soup.find_all('script') image_urls = [] pattern = r'\[\"(https?://[^\]\"]*\.(?:jpg|jpeg|png))\"' for script in scripts: if 'AF_initDataCallback' in script.text: matches = re.findall(pattern, script.text) image_urls.extend(matches) image_urls = list(set(image_urls))[:limit] return download_and_save(image_urls, output_dir) except Exception as e: print(f"Erreur Google : {str(e)}") return [] # Téléchargement et sauvegarde des images depuis une liste d'URLs def download_and_save(urls, output_dir): saved_paths = [] for idx, url in enumerate(urls): try: filename = f"image_{idx+1}_{int(time.time())}.jpg" full_path = os.path.join(output_dir, filename) req = urllib.request.Request(url, headers=HEADERS) with urllib.request.urlopen(req, timeout=10) as response: with open(full_path, 'wb') as f: f.write(response.read()) saved_paths.append(full_path) except Exception as e: print(f"Erreur téléchargement {url} : {str(e)}") return saved_paths # Récupérer les chemins des images dans un dossier def get_image_paths(directory): if os.path.exists(directory): return [os.path.join(directory, f) for f in os.listdir(directory) if f.lower().endswith(('png', 'jpg', 'jpeg'))] return [] # Fonction principale appelée par l'interface Gradio def download_handler(source, query, limit, safe_mode): limit = max(1, min(limit, 100)) try: if source == "Bing": # Si safe_mode est "Off", le filtre est désactivé (adult_filter_off=True) image_paths = download_bing_images(query, limit, safe_mode == "Off") elif source == "Google": image_paths = download_google_images(query, limit) else: image_paths = [] status_msg = f"{len(image_paths)} image(s) téléchargée(s)." if image_paths else "Aucune image téléchargée." return image_paths, status_msg except Exception as e: print(f"Erreur globale : {str(e)}") return [], f"Erreur: {str(e)}" # Création de l'interface Gradio avec Blocks with gr.Blocks(theme=gr.themes.Soft(), title="Image Downloader") as app: gr.Markdown("# 📸 Téléchargeur d'Images Multi-Sources") gr.Markdown("Téléchargez des images depuis Bing ou Google (max 100)") with gr.Row(): source = gr.Radio(["Bing", "Google"], label="Source", value="Bing") query = gr.Textbox(label="Recherche", placeholder="Entrez votre recherche...") limit = gr.Slider(1, 100, value=20, step=1, label="Nombre d'images") safe_mode = gr.Radio(["On", "Off"], label="Filtre de sécurité (Bing)", value="On") submit_btn = gr.Button("🚀 Lancer le téléchargement", variant="primary") gallery = gr.Gallery(label="Résultats", columns=5, object_fit="contain", height="auto") status = gr.Textbox(label="Statut", interactive=False) submit_btn.click( fn=download_handler, inputs=[source, query, limit, safe_mode], outputs=[gallery, status], api_name="download" ) gr.Examples( examples=[ ["Bing", "chatons mignons", 10, "On"], ["Google", "paysages montagneux", 15, "On"] ], inputs=[source, query, limit, safe_mode] ) if __name__ == "__main__": # Récupère le port depuis la variable d'environnement (nécessaire pour Hugging Face Spaces) port = int(os.environ.get("PORT", 7860)) app.launch(server_name="0.0.0.0", server_port=port, show_error=True)