Spaces:
Building
Building
File size: 5,243 Bytes
4cc9a2d 36b6998 5c74dbf 4cc9a2d e1f9112 5c74dbf 4cc9a2d e1f9112 5c74dbf e1f9112 5c74dbf e1f9112 5c74dbf 5be0243 5c74dbf 36b6998 5c74dbf e1f9112 5c74dbf e1f9112 5c74dbf e1f9112 5c74dbf e1f9112 5c74dbf e1f9112 5c74dbf e1f9112 5c74dbf 5be0243 e1f9112 5c74dbf e1f9112 5be0243 e1f9112 5c74dbf e1f9112 5c74dbf e1f9112 5c74dbf 36b6998 5c74dbf 36b6998 5c74dbf 36b6998 e1f9112 5c74dbf 36b6998 5c74dbf 4cc9a2d 5c74dbf 5be0243 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import gradio as gr
from bing_image_downloader import downloader
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
import urllib.request
import re
import time
# Configuration des headers HTTP
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
'Accept-Language': 'en-US,en;q=0.5'
}
# Téléchargement via Bing
def download_bing_images(search_query, limit, adult_filter_off):
try:
# Ici, adult_filter_off est True si le mode sécurisé est désactivé ("Off")
downloader.download(
search_query,
limit=limit,
adult_filter_off=adult_filter_off,
force_replace=False,
timeout=60,
filter_type='photo'
)
output_dir = os.path.join('dataset', search_query)
return get_image_paths(output_dir)
except Exception as e:
print(f"Erreur Bing : {str(e)}")
return []
# Téléchargement via Google
def download_google_images(search_query, limit):
try:
output_dir = os.path.join('dataset', f'google_{search_query}')
os.makedirs(output_dir, exist_ok=True)
url = f"https://www.google.com/search?q={quote(search_query)}&tbm=isch"
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
scripts = soup.find_all('script')
image_urls = []
pattern = r'\[\"(https?://[^\]\"]*\.(?:jpg|jpeg|png))\"'
for script in scripts:
if 'AF_initDataCallback' in script.text:
matches = re.findall(pattern, script.text)
image_urls.extend(matches)
image_urls = list(set(image_urls))[:limit]
return download_and_save(image_urls, output_dir)
except Exception as e:
print(f"Erreur Google : {str(e)}")
return []
# Téléchargement et sauvegarde des images depuis une liste d'URLs
def download_and_save(urls, output_dir):
saved_paths = []
for idx, url in enumerate(urls):
try:
filename = f"image_{idx+1}_{int(time.time())}.jpg"
full_path = os.path.join(output_dir, filename)
req = urllib.request.Request(url, headers=HEADERS)
with urllib.request.urlopen(req, timeout=10) as response:
with open(full_path, 'wb') as f:
f.write(response.read())
saved_paths.append(full_path)
except Exception as e:
print(f"Erreur téléchargement {url} : {str(e)}")
return saved_paths
# Récupérer les chemins des images dans un dossier
def get_image_paths(directory):
if os.path.exists(directory):
return [os.path.join(directory, f) for f in os.listdir(directory)
if f.lower().endswith(('png', 'jpg', 'jpeg'))]
return []
# Fonction principale appelée par l'interface Gradio
def download_handler(source, query, limit, safe_mode):
limit = max(1, min(limit, 100))
try:
if source == "Bing":
# Si safe_mode est "Off", le filtre est désactivé (adult_filter_off=True)
image_paths = download_bing_images(query, limit, safe_mode == "Off")
elif source == "Google":
image_paths = download_google_images(query, limit)
else:
image_paths = []
status_msg = f"{len(image_paths)} image(s) téléchargée(s)." if image_paths else "Aucune image téléchargée."
return image_paths, status_msg
except Exception as e:
print(f"Erreur globale : {str(e)}")
return [], f"Erreur: {str(e)}"
# Création de l'interface Gradio avec Blocks
with gr.Blocks(theme=gr.themes.Soft(), title="Image Downloader") as app:
gr.Markdown("# 📸 Téléchargeur d'Images Multi-Sources")
gr.Markdown("Téléchargez des images depuis Bing ou Google (max 100)")
with gr.Row():
source = gr.Radio(["Bing", "Google"], label="Source", value="Bing")
query = gr.Textbox(label="Recherche", placeholder="Entrez votre recherche...")
limit = gr.Slider(1, 100, value=20, step=1, label="Nombre d'images")
safe_mode = gr.Radio(["On", "Off"], label="Filtre de sécurité (Bing)", value="On")
submit_btn = gr.Button("🚀 Lancer le téléchargement", variant="primary")
gallery = gr.Gallery(label="Résultats", columns=5, object_fit="contain", height="auto")
status = gr.Textbox(label="Statut", interactive=False)
submit_btn.click(
fn=download_handler,
inputs=[source, query, limit, safe_mode],
outputs=[gallery, status],
api_name="download"
)
gr.Examples(
examples=[
["Bing", "chatons mignons", 10, "On"],
["Google", "paysages montagneux", 15, "On"]
],
inputs=[source, query, limit, safe_mode]
)
if __name__ == "__main__":
# Récupère le port depuis la variable d'environnement (nécessaire pour Hugging Face Spaces)
port = int(os.environ.get("PORT", 7860))
app.launch(server_name="0.0.0.0", server_port=port, show_error=True)
|