File size: 5,243 Bytes
4cc9a2d
 
 
36b6998
 
 
 
5c74dbf
 
4cc9a2d
e1f9112
5c74dbf
 
 
 
4cc9a2d
e1f9112
5c74dbf
 
e1f9112
5c74dbf
 
 
e1f9112
5c74dbf
 
 
 
 
 
 
 
 
 
5be0243
5c74dbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36b6998
5c74dbf
 
 
 
e1f9112
5c74dbf
 
 
 
 
 
 
 
 
 
e1f9112
5c74dbf
 
 
 
e1f9112
5c74dbf
 
e1f9112
5c74dbf
 
 
e1f9112
5c74dbf
e1f9112
5c74dbf
 
5be0243
e1f9112
5c74dbf
e1f9112
 
 
 
5be0243
e1f9112
5c74dbf
 
e1f9112
5c74dbf
e1f9112
5c74dbf
 
 
36b6998
5c74dbf
 
 
 
 
36b6998
5c74dbf
36b6998
e1f9112
5c74dbf
36b6998
5c74dbf
 
 
 
 
 
 
 
 
 
 
 
 
 
4cc9a2d
5c74dbf
5be0243
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
from bing_image_downloader import downloader
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
import urllib.request
import re
import time

# Configuration des headers HTTP
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.5'
}

# Téléchargement via Bing
def download_bing_images(search_query, limit, adult_filter_off):
    try:
        # Ici, adult_filter_off est True si le mode sécurisé est désactivé ("Off")
        downloader.download(
            search_query,
            limit=limit,
            adult_filter_off=adult_filter_off,
            force_replace=False,
            timeout=60,
            filter_type='photo'
        )
        output_dir = os.path.join('dataset', search_query)
        return get_image_paths(output_dir)
    except Exception as e:
        print(f"Erreur Bing : {str(e)}")
        return []

# Téléchargement via Google
def download_google_images(search_query, limit):
    try:
        output_dir = os.path.join('dataset', f'google_{search_query}')
        os.makedirs(output_dir, exist_ok=True)
        
        url = f"https://www.google.com/search?q={quote(search_query)}&tbm=isch"
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        scripts = soup.find_all('script')
        
        image_urls = []
        pattern = r'\[\"(https?://[^\]\"]*\.(?:jpg|jpeg|png))\"'
        
        for script in scripts:
            if 'AF_initDataCallback' in script.text:
                matches = re.findall(pattern, script.text)
                image_urls.extend(matches)
        
        image_urls = list(set(image_urls))[:limit]
        return download_and_save(image_urls, output_dir)
    
    except Exception as e:
        print(f"Erreur Google : {str(e)}")
        return []

# Téléchargement et sauvegarde des images depuis une liste d'URLs
def download_and_save(urls, output_dir):
    saved_paths = []
    for idx, url in enumerate(urls):
        try:
            filename = f"image_{idx+1}_{int(time.time())}.jpg"
            full_path = os.path.join(output_dir, filename)
            req = urllib.request.Request(url, headers=HEADERS)
            with urllib.request.urlopen(req, timeout=10) as response:
                with open(full_path, 'wb') as f:
                    f.write(response.read())
            saved_paths.append(full_path)
        except Exception as e:
            print(f"Erreur téléchargement {url} : {str(e)}")
    return saved_paths

# Récupérer les chemins des images dans un dossier
def get_image_paths(directory):
    if os.path.exists(directory):
        return [os.path.join(directory, f) for f in os.listdir(directory)
                if f.lower().endswith(('png', 'jpg', 'jpeg'))]
    return []

# Fonction principale appelée par l'interface Gradio
def download_handler(source, query, limit, safe_mode):
    limit = max(1, min(limit, 100))
    try:
        if source == "Bing":
            # Si safe_mode est "Off", le filtre est désactivé (adult_filter_off=True)
            image_paths = download_bing_images(query, limit, safe_mode == "Off")
        elif source == "Google":
            image_paths = download_google_images(query, limit)
        else:
            image_paths = []
        
        status_msg = f"{len(image_paths)} image(s) téléchargée(s)." if image_paths else "Aucune image téléchargée."
        return image_paths, status_msg
    except Exception as e:
        print(f"Erreur globale : {str(e)}")
        return [], f"Erreur: {str(e)}"

# Création de l'interface Gradio avec Blocks
with gr.Blocks(theme=gr.themes.Soft(), title="Image Downloader") as app:
    gr.Markdown("# 📸 Téléchargeur d'Images Multi-Sources")
    gr.Markdown("Téléchargez des images depuis Bing ou Google (max 100)")
    
    with gr.Row():
        source = gr.Radio(["Bing", "Google"], label="Source", value="Bing")
        query = gr.Textbox(label="Recherche", placeholder="Entrez votre recherche...")
        limit = gr.Slider(1, 100, value=20, step=1, label="Nombre d'images")
        safe_mode = gr.Radio(["On", "Off"], label="Filtre de sécurité (Bing)", value="On")
    
    submit_btn = gr.Button("🚀 Lancer le téléchargement", variant="primary")
    
    gallery = gr.Gallery(label="Résultats", columns=5, object_fit="contain", height="auto")
    status = gr.Textbox(label="Statut", interactive=False)

    submit_btn.click(
        fn=download_handler,
        inputs=[source, query, limit, safe_mode],
        outputs=[gallery, status],
        api_name="download"
    )
    
    gr.Examples(
        examples=[
            ["Bing", "chatons mignons", 10, "On"],
            ["Google", "paysages montagneux", 15, "On"]
        ],
        inputs=[source, query, limit, safe_mode]
    )

if __name__ == "__main__":
    # Récupère le port depuis la variable d'environnement (nécessaire pour Hugging Face Spaces)
    port = int(os.environ.get("PORT", 7860))
    app.launch(server_name="0.0.0.0", server_port=port, show_error=True)