Abhaykoul's picture
Upload 85 files
9e7090f verified
raw
history blame
18.7 kB
import csv
import logging
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from urllib.parse import unquote
from pathlib import Path
import click
from curl_cffi import requests
from .webscout_search import WEBS
from .utils import json_dumps, json_loads
from .version import __version__
logger = logging.getLogger(__name__)
COLORS = {
0: "black",
1: "red",
2: "green",
3: "yellow",
4: "blue",
5: "magenta",
6: "cyan",
7: "bright_black",
8: "bright_red",
9: "bright_green",
10: "bright_yellow",
11: "bright_blue",
12: "bright_magenta",
13: "bright_cyan",
14: "white",
15: "bright_white",
}
def _save_json(jsonfile, data):
with open(jsonfile, "w", encoding="utf-8") as file:
file.write(json_dumps(data))
def _save_csv(csvfile, data):
with open(csvfile, "w", newline="", encoding="utf-8") as file:
if data:
headers = data[0].keys()
writer = csv.DictWriter(file, fieldnames=headers, quoting=csv.QUOTE_MINIMAL)
writer.writeheader()
writer.writerows(data)
def _print_data(data):
if data:
for i, e in enumerate(data, start=1):
click.secho(f"{i}.\t {'=' * 78}", bg="black", fg="white")
for j, (k, v) in enumerate(e.items(), start=1):
if v:
width = 300 if k in ("content", "href", "image", "source", "thumbnail", "url") else 78
k = "language" if k == "detected_language" else k
text = click.wrap_text(
f"{v}", width=width, initial_indent="", subsequent_indent=" " * 12, preserve_paragraphs=True
)
else:
text = v
click.secho(f"{k:<12}{text}", bg="black", fg=COLORS[j], overline=True)
input()
def _sanitize_keywords(keywords):
keywords = (
keywords.replace("filetype", "")
.replace(":", "")
.replace('"', "'")
.replace("site", "")
.replace(" ", "_")
.replace("/", "_")
.replace("\\", "_")
.replace(" ", "")
)
return keywords
def _download_file(url, dir_path, filename, proxy):
try:
resp = requests.get(url, proxies=proxy, impersonate="chrome", timeout=10)
resp.raise_for_status()
with open(os.path.join(dir_path, filename[:200]), "wb") as file:
file.write(resp.content)
except Exception as ex:
logger.debug(f"download_file url={url} {type(ex).__name__} {ex}")
def _download_results(keywords, results, images=False, proxy=None, threads=None):
path_type = "images" if images else "text"
path = f"{path_type}_{keywords}_{datetime.now():%Y%m%d_%H%M%S}"
os.makedirs(path, exist_ok=True)
proxy = {"http": proxy, "https": proxy}
threads = 10 if threads is None else threads
with ThreadPoolExecutor(max_workers=threads) as executor:
futures = []
for i, res in enumerate(results, start=1):
url = res["image"] if images else res["href"]
filename = unquote(url.split("/")[-1].split("?")[0])
f = executor.submit(_download_file, url, path, f"{i}_{filename}", proxy)
futures.append(f)
with click.progressbar(
length=len(futures), label="Downloading", show_percent=True, show_pos=True, width=50
) as bar:
for future in as_completed(futures):
future.result()
bar.update(1)
@click.group(chain=True)
def cli():
"""dukduckgo_search CLI tool"""
pass
def safe_entry_point():
try:
cli()
except Exception as ex:
click.echo(f"{type(ex).__name__}: {ex}")
@cli.command()
def version():
print(__version__)
return __version__
@cli.command()
@click.option("-s", "--save", is_flag=True, default=False, help="save the conversation in the json file")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
def chat(save, proxy):
"""CLI function to perform an interactive AI chat using DuckDuckGo API."""
cache_file = "WEBS_chat_conversation.json"
models = ["gpt-3.5", "claude-3-haiku"]
client = WEBS(proxy=proxy)
print("DuckDuckGo AI chat. Available models:")
for idx, model in enumerate(models, start=1):
print(f"{idx}. {model}")
chosen_model_idx = input("Choose a model by entering its number[1]: ")
chosen_model_idx = 0 if not chosen_model_idx.strip() else int(chosen_model_idx) - 1
model = models[chosen_model_idx]
print(f"Using model: {model}")
if save and Path(cache_file).exists():
with open(cache_file) as f:
cache = json_loads(f.read())
client._chat_vqd = cache.get("vqd", None)
client._chat_messages = cache.get("messages", [])
while True:
user_input = input(f"{'-'*78}\nYou: ")
if not user_input.strip():
break
resp_answer = client.chat(keywords=user_input, model=model)
text = click.wrap_text(resp_answer, width=78, preserve_paragraphs=True)
click.secho(f"AI: {text}", bg="black", fg="green", overline=True)
cache = {"vqd": client._chat_vqd, "messages": client._chat_messages}
_save_json(cache_file, cache)
if "exit" in user_input.lower() or "quit" in user_input.lower():
break
@cli.command()
@click.option("-k", "--keywords", required=True, help="text search, keywords for query")
@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year")
@click.option("-m", "--max_results", default=20, help="maximum number of results, default=20")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-d", "--download", is_flag=True, default=False, help="download results to 'keywords' folder")
@click.option("-b", "--backend", default="api", type=click.Choice(["api", "html", "lite"]), help="which backend to use")
@click.option("-th", "--threads", default=10, help="download threads, default=10")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
def text(keywords, region, safesearch, timelimit, backend, output, download, threads, max_results, proxy):
"""CLI function to perform a text search using DuckDuckGo API."""
data = WEBS(proxies=proxy).text(
keywords=keywords,
region=region,
safesearch=safesearch,
timelimit=timelimit,
backend=backend,
max_results=max_results,
)
keywords = _sanitize_keywords(keywords)
filename = f"text_{keywords}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print" and not download:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
if download:
_download_results(keywords, data, proxy=proxy, threads=threads)
@cli.command()
@click.option("-k", "--keywords", required=True, help="answers search, keywords for query")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
def answers(keywords, output, proxy):
"""CLI function to perform a answers search using DuckDuckGo API."""
data = WEBS(proxies=proxy).answers(keywords=keywords)
filename = f"answers_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
@cli.command()
@click.option("-k", "--keywords", required=True, help="keywords for query")
@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
@click.option("-t", "--timelimit", default=None, type=click.Choice(["Day", "Week", "Month", "Year"]))
@click.option("-size", "--size", default=None, type=click.Choice(["Small", "Medium", "Large", "Wallpaper"]))
@click.option(
"-c",
"--color",
default=None,
type=click.Choice(
[
"color",
"Monochrome",
"Red",
"Orange",
"Yellow",
"Green",
"Blue",
"Purple",
"Pink",
"Brown",
"Black",
"Gray",
"Teal",
"White",
]
),
)
@click.option(
"-type", "--type_image", default=None, type=click.Choice(["photo", "clipart", "gif", "transparent", "line"])
)
@click.option("-l", "--layout", default=None, type=click.Choice(["Square", "Tall", "Wide"]))
@click.option(
"-lic",
"--license_image",
default=None,
type=click.Choice(["any", "Public", "Share", "Modify", "ModifyCommercially"]),
)
@click.option("-m", "--max_results", default=90, help="maximum number of results, default=90")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-d", "--download", is_flag=True, default=False, help="download and save images to 'keywords' folder")
@click.option("-th", "--threads", default=10, help="download threads, default=10")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
def images(
keywords,
region,
safesearch,
timelimit,
size,
color,
type_image,
layout,
license_image,
download,
threads,
max_results,
output,
proxy,
):
"""CLI function to perform a images search using DuckDuckGo API."""
data = WEBS(proxies=proxy).images(
keywords=keywords,
region=region,
safesearch=safesearch,
timelimit=timelimit,
size=size,
color=color,
type_image=type_image,
layout=layout,
license_image=license_image,
max_results=max_results,
)
keywords = _sanitize_keywords(keywords)
filename = f"images_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print" and not download:
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
if download:
_download_results(keywords, data, images=True, proxy=proxy, threads=threads)
@cli.command()
@click.option("-k", "--keywords", required=True, help="keywords for query")
@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m"]), help="day, week, month")
@click.option("-res", "--resolution", default=None, type=click.Choice(["high", "standart"]))
@click.option("-d", "--duration", default=None, type=click.Choice(["short", "medium", "long"]))
@click.option("-lic", "--license_videos", default=None, type=click.Choice(["creativeCommon", "youtube"]))
@click.option("-m", "--max_results", default=50, help="maximum number of results, default=50")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
def videos(keywords, region, safesearch, timelimit, resolution, duration, license_videos, max_results, output, proxy):
"""CLI function to perform a videos search using DuckDuckGo API."""
data = WEBS(proxies=proxy).videos(
keywords=keywords,
region=region,
safesearch=safesearch,
timelimit=timelimit,
resolution=resolution,
duration=duration,
license_videos=license_videos,
max_results=max_results,
)
filename = f"videos_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
@cli.command()
@click.option("-k", "--keywords", required=True, help="keywords for query")
@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"]))
@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year")
@click.option("-m", "--max_results", default=25, help="maximum number of results, default=25")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
def news(keywords, region, safesearch, timelimit, max_results, output, proxy):
"""CLI function to perform a news search using DuckDuckGo API."""
data = WEBS(proxies=proxy).news(
keywords=keywords, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results
)
filename = f"news_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
@cli.command()
@click.option("-k", "--keywords", required=True, help="keywords for query")
@click.option("-p", "--place", default=None, help="simplified search - if set, the other parameters are not used")
@click.option("-s", "--street", default=None, help="house number/street")
@click.option("-c", "--city", default=None, help="city of search")
@click.option("-county", "--county", default=None, help="county of search")
@click.option("-state", "--state", default=None, help="state of search")
@click.option("-country", "--country", default=None, help="country of search")
@click.option("-post", "--postalcode", default=None, help="postalcode of search")
@click.option("-lat", "--latitude", default=None, help="""if lat and long are set, the other params are not used""")
@click.option("-lon", "--longitude", default=None, help="""if lat and long are set, the other params are not used""")
@click.option("-r", "--radius", default=0, help="expand the search square by the distance in kilometers")
@click.option("-m", "--max_results", default=50, help="number of results, default=50")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-proxy", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
def maps(
keywords,
place,
street,
city,
county,
state,
country,
postalcode,
latitude,
longitude,
radius,
max_results,
output,
proxy,
):
"""CLI function to perform a maps search using DuckDuckGo API."""
data = WEBS(proxies=proxy).maps(
keywords=keywords,
place=place,
street=street,
city=city,
county=county,
state=state,
country=country,
postalcode=postalcode,
latitude=latitude,
longitude=longitude,
radius=radius,
max_results=max_results,
)
filename = f"maps_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
@cli.command()
@click.option("-k", "--keywords", required=True, help="text for translation")
@click.option("-f", "--from_", help="What language to translate from (defaults automatically)")
@click.option("-t", "--to", default="en", help="de, ru, fr, etc. What language to translate, defaults='en'")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
def translate(keywords, from_, to, output, proxy):
"""CLI function to perform translate using DuckDuckGo API."""
data = WEBS(proxies=proxy).translate(keywords=keywords, from_=from_, to=to)
filename = f"translate_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
@cli.command()
@click.option("-k", "--keywords", required=True, help="keywords for query")
@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params")
@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)")
@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150")
def suggestions(keywords, region, output, proxy):
"""CLI function to perform a suggestions search using DuckDuckGo API."""
data = WEBS(proxies=proxy).suggestions(keywords=keywords, region=region)
filename = f"suggestions_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}"
if output == "print":
_print_data(data)
elif output == "csv":
_save_csv(f"{filename}.csv", data)
elif output == "json":
_save_json(f"{filename}.json", data)
if __name__ == "__main__":
cli(prog_name="WEBS")