Abhaykoul commited on
Commit
5faeb3d
·
verified ·
1 Parent(s): 3fac53e

Update webscout.py

Browse files
Files changed (1) hide show
  1. webscout.py +175 -142
webscout.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import logging
2
  import warnings
3
  from concurrent.futures import ThreadPoolExecutor
@@ -8,21 +13,11 @@ from itertools import cycle, islice
8
  from random import choice
9
  from threading import Event
10
  from types import TracebackType
11
- from typing import Dict, List, Optional, Tuple, Type, Union, cast
12
- import asyncio
13
- import json
14
- import aiohttp
15
- import requests
16
- import http.cookiejar as cookiejar
17
- import json
18
- from xml.etree import ElementTree
19
- import re
20
- import html.parser
21
- from typing import List, Dict, Union, Optional
22
-
23
- import pyreqwest_impersonate as pri
24
 
 
25
 
 
26
 
27
  try:
28
  from lxml.etree import _Element
@@ -33,6 +28,34 @@ try:
33
  except ImportError:
34
  LXML_AVAILABLE = False
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  import re
37
  from decimal import Decimal
38
  from html import unescape
@@ -117,24 +140,10 @@ def _calculate_distance(lat1: Decimal, lon1: Decimal, lat2: Decimal, lon2: Decim
117
  c = 2 * atan2(sqrt(a), sqrt(1 - a))
118
  return R * c
119
 
120
- class WebscoutE(Exception):
121
- """Base exception class for search."""
122
-
123
-
124
- class RatelimitE(Exception):
125
- """Raised for rate limit exceeded errors during API requests."""
126
-
127
-
128
- class TimeoutE(Exception):
129
- """Raised for timeout errors during API requests."""
130
-
131
- class FailedToGenerateResponseError(Exception):
132
-
133
- """Provider failed to fetch response"""
134
- class AllProvidersFailure(Exception):
135
- """None of the providers generated response successfully"""
136
 
137
- pass
138
  logger = logging.getLogger("webscout.WEBS")
139
 
140
 
@@ -143,21 +152,22 @@ class WEBS:
143
 
144
  _executor: ThreadPoolExecutor = ThreadPoolExecutor()
145
  _impersonates = (
146
- "chrome_99", "chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_108",
147
- "chrome_107", "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118", "chrome_119",
148
- "chrome_120", #"chrome_123", "chrome_124", "chrome_126",
149
- "safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_15.3", "safari_15.5",
150
- "safari_15.6.1", "safari_16", "safari_16.5", "safari_17.2.1", "safari_17.4.1", "safari_17.5",
151
- #"okhttp_3.9", "okhttp_3.11", "okhttp_3.13", "okhttp_3.14", "okhttp_4.9", "okhttp_4.10", "okhttp_5",
152
- "edge_99", "edge_101", "edge_122",
 
153
  ) # fmt: skip
154
 
155
  def __init__(
156
  self,
157
- headers: Optional[Dict[str, str]] = None,
158
- proxy: Optional[str] = None,
159
- proxies: Union[Dict[str, str], str, None] = None, # deprecated
160
- timeout: Optional[int] = 10,
161
  ) -> None:
162
  """Initialize the WEBS object.
163
 
@@ -167,14 +177,14 @@ class WEBS:
167
  example: "http://user:[email protected]:3128". Defaults to None.
168
  timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
169
  """
170
- self.proxy: Optional[str] = proxy
171
  assert self.proxy is None or isinstance(self.proxy, str), "proxy must be a str"
172
  if not proxy and proxies:
173
  warnings.warn("'proxies' is deprecated, use 'proxy' instead.", stacklevel=1)
174
  self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies
175
  self.headers = headers if headers else {}
176
  self.headers["Referer"] = "https://duckduckgo.com/"
177
- self.client = pri.Client(
178
  headers=self.headers,
179
  proxy=self.proxy,
180
  timeout=timeout,
@@ -185,22 +195,23 @@ class WEBS:
185
  verify=False,
186
  )
187
  self._exception_event = Event()
188
- self._chat_messages: List[Dict[str, str]] = []
 
189
  self._chat_vqd: str = ""
190
 
191
- def __enter__(self) -> "WEBS":
192
  return self
193
 
194
  def __exit__(
195
  self,
196
- exc_type: Optional[Type[BaseException]] = None,
197
- exc_val: Optional[BaseException] = None,
198
- exc_tb: Optional[TracebackType] = None,
199
  ) -> None:
200
  pass
201
 
202
  @cached_property
203
- def parser(self) -> "LHTMLParser":
204
  """Get HTML parser."""
205
  return LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False)
206
 
@@ -208,9 +219,9 @@ class WEBS:
208
  self,
209
  method: str,
210
  url: str,
211
- params: Optional[Dict[str, str]] = None,
212
- content: Optional[bytes] = None,
213
- data: Optional[Union[Dict[str, str], bytes]] = None,
214
  ) -> bytes:
215
  if self._exception_event.is_set():
216
  raise WebscoutE("Exception occurred in previous call.")
@@ -234,24 +245,30 @@ class WEBS:
234
  resp_content = self._get_url("POST", "https://duckduckgo.com", data={"q": keywords})
235
  return _extract_vqd(resp_content, keywords)
236
 
237
- def chat(self, keywords: str, model: str = "gpt-3.5", timeout: int = 20) -> str:
238
- """Initiates a chat session with DuckDuckGo AI.
239
 
240
  Args:
241
  keywords (str): The initial message or question to send to the AI.
242
- model (str): The model to use: "gpt-3.5", "claude-3-haiku", "llama-3-70b", "mixtral-8x7b".
243
- Defaults to "gpt-3.5".
244
  timeout (int): Timeout value for the HTTP client. Defaults to 20.
245
 
246
  Returns:
247
  str: The response from the AI.
248
  """
 
 
 
 
 
 
 
249
  models = {
250
  "claude-3-haiku": "claude-3-haiku-20240307",
251
- "gpt-3.5": "gpt-3.5-turbo-0125",
252
- "llama-3-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
253
- "mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
254
  "gpt-4o-mini": "gpt-4o-mini",
 
 
255
  }
256
  # vqd
257
  if not self._chat_vqd:
@@ -259,6 +276,7 @@ class WEBS:
259
  self._chat_vqd = resp.headers.get("x-vqd-4", "")
260
 
261
  self._chat_messages.append({"role": "user", "content": keywords})
 
262
 
263
  json_data = {
264
  "model": models[model],
@@ -272,10 +290,26 @@ class WEBS:
272
  )
273
  self._chat_vqd = resp.headers.get("x-vqd-4", "")
274
 
275
- data = ",".join(x for line in resp.text.rstrip("[DONE]\n").split("data:") if (x := line.strip()))
276
- result = "".join(x.get("message", "") for x in json_loads("[" + data + "]"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
  self._chat_messages.append({"role": "assistant", "content": result})
 
279
  return result
280
 
281
  def text(
@@ -283,11 +317,11 @@ class WEBS:
283
  keywords: str,
284
  region: str = "wt-wt",
285
  safesearch: str = "moderate",
286
- timelimit: Optional[str] = None,
287
  backend: str = "api",
288
- max_results: Optional[int] = None,
289
- ) -> List[Dict[str, str]]:
290
- """DuckDuckGo text search. Query params: https://duckduckgo.com/params.
291
 
292
  Args:
293
  keywords: keywords for query.
@@ -325,10 +359,10 @@ class WEBS:
325
  keywords: str,
326
  region: str = "wt-wt",
327
  safesearch: str = "moderate",
328
- timelimit: Optional[str] = None,
329
- max_results: Optional[int] = None,
330
- ) -> List[Dict[str, str]]:
331
- """DuckDuckGo text search. Query params: https://duckduckgo.com/params.
332
 
333
  Args:
334
  keywords: keywords for query.
@@ -371,9 +405,9 @@ class WEBS:
371
  payload["df"] = timelimit
372
 
373
  cache = set()
374
- results: List[Dict[str, str]] = []
375
 
376
- def _text_api_page(s: int) -> List[Dict[str, str]]:
377
  payload["s"] = f"{s}"
378
  resp_content = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
379
  page_data = _text_extract_json(resp_content, keywords)
@@ -408,10 +442,10 @@ class WEBS:
408
  self,
409
  keywords: str,
410
  region: str = "wt-wt",
411
- timelimit: Optional[str] = None,
412
- max_results: Optional[int] = None,
413
- ) -> List[Dict[str, str]]:
414
- """DuckDuckGo text search. Query params: https://duckduckgo.com/params.
415
 
416
  Args:
417
  keywords: keywords for query.
@@ -445,9 +479,9 @@ class WEBS:
445
  payload["vqd"] = vqd
446
 
447
  cache = set()
448
- results: List[Dict[str, str]] = []
449
 
450
- def _text_html_page(s: int) -> List[Dict[str, str]]:
451
  payload["s"] = f"{s}"
452
  resp_content = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
453
  if b"No results." in resp_content:
@@ -456,12 +490,12 @@ class WEBS:
456
  page_results = []
457
  tree = document_fromstring(resp_content, self.parser)
458
  elements = tree.xpath("//div[h2]")
459
- if not isinstance(elements, List):
460
  return []
461
  for e in elements:
462
  if isinstance(e, _Element):
463
  hrefxpath = e.xpath("./a/@href")
464
- href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, List) else None
465
  if (
466
  href
467
  and href not in cache
@@ -471,9 +505,9 @@ class WEBS:
471
  ):
472
  cache.add(href)
473
  titlexpath = e.xpath("./h2/a/text()")
474
- title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, List) else ""
475
  bodyxpath = e.xpath("./a//text()")
476
- body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath, List) else ""
477
  result = {
478
  "title": _normalize(title),
479
  "href": _normalize_url(href),
@@ -498,10 +532,10 @@ class WEBS:
498
  self,
499
  keywords: str,
500
  region: str = "wt-wt",
501
- timelimit: Optional[str] = None,
502
- max_results: Optional[int] = None,
503
- ) -> List[Dict[str, str]]:
504
- """DuckDuckGo text search. Query params: https://duckduckgo.com/params.
505
 
506
  Args:
507
  keywords: keywords for query.
@@ -532,9 +566,9 @@ class WEBS:
532
  payload["df"] = timelimit
533
 
534
  cache = set()
535
- results: List[Dict[str, str]] = []
536
 
537
- def _text_lite_page(s: int) -> List[Dict[str, str]]:
538
  payload["s"] = f"{s}"
539
  resp_content = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
540
  if b"No more results." in resp_content:
@@ -543,7 +577,7 @@ class WEBS:
543
  page_results = []
544
  tree = document_fromstring(resp_content, self.parser)
545
  elements = tree.xpath("//table[last()]//tr")
546
- if not isinstance(elements, List):
547
  return []
548
 
549
  data = zip(cycle(range(1, 5)), elements)
@@ -551,7 +585,7 @@ class WEBS:
551
  if isinstance(e, _Element):
552
  if i == 1:
553
  hrefxpath = e.xpath(".//a//@href")
554
- href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, List) else None
555
  if (
556
  href is None
557
  or href in cache
@@ -563,12 +597,12 @@ class WEBS:
563
  else:
564
  cache.add(href)
565
  titlexpath = e.xpath(".//a//text()")
566
- title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, List) else ""
567
  elif i == 2:
568
  bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
569
  body = (
570
  "".join(str(x) for x in bodyxpath).strip()
571
- if bodyxpath and isinstance(bodyxpath, List)
572
  else ""
573
  )
574
  if href:
@@ -597,15 +631,15 @@ class WEBS:
597
  keywords: str,
598
  region: str = "wt-wt",
599
  safesearch: str = "moderate",
600
- timelimit: Optional[str] = None,
601
- size: Optional[str] = None,
602
- color: Optional[str] = None,
603
- type_image: Optional[str] = None,
604
- layout: Optional[str] = None,
605
- license_image: Optional[str] = None,
606
- max_results: Optional[int] = None,
607
- ) -> List[Dict[str, str]]:
608
- """DuckDuckGo images search. Query params: https://duckduckgo.com/params.
609
 
610
  Args:
611
  keywords: keywords for query.
@@ -653,9 +687,9 @@ class WEBS:
653
  }
654
 
655
  cache = set()
656
- results: List[Dict[str, str]] = []
657
 
658
- def _images_page(s: int) -> List[Dict[str, str]]:
659
  payload["s"] = f"{s}"
660
  resp_content = self._get_url("GET", "https://duckduckgo.com/i.js", params=payload)
661
  resp_json = json_loads(resp_content)
@@ -695,13 +729,13 @@ class WEBS:
695
  keywords: str,
696
  region: str = "wt-wt",
697
  safesearch: str = "moderate",
698
- timelimit: Optional[str] = None,
699
- resolution: Optional[str] = None,
700
- duration: Optional[str] = None,
701
- license_videos: Optional[str] = None,
702
- max_results: Optional[int] = None,
703
- ) -> List[Dict[str, str]]:
704
- """DuckDuckGo videos search. Query params: https://duckduckgo.com/params.
705
 
706
  Args:
707
  keywords: keywords for query.
@@ -740,9 +774,9 @@ class WEBS:
740
  }
741
 
742
  cache = set()
743
- results: List[Dict[str, str]] = []
744
 
745
- def _videos_page(s: int) -> List[Dict[str, str]]:
746
  payload["s"] = f"{s}"
747
  resp_content = self._get_url("GET", "https://duckduckgo.com/v.js", params=payload)
748
  resp_json = json_loads(resp_content)
@@ -772,10 +806,10 @@ class WEBS:
772
  keywords: str,
773
  region: str = "wt-wt",
774
  safesearch: str = "moderate",
775
- timelimit: Optional[str] = None,
776
- max_results: Optional[int] = None,
777
- ) -> List[Dict[str, str]]:
778
- """DuckDuckGo news search. Query params: https://duckduckgo.com/params.
779
 
780
  Args:
781
  keywords: keywords for query.
@@ -809,9 +843,9 @@ class WEBS:
809
  payload["df"] = timelimit
810
 
811
  cache = set()
812
- results: List[Dict[str, str]] = []
813
 
814
- def _news_page(s: int) -> List[Dict[str, str]]:
815
  payload["s"] = f"{s}"
816
  resp_content = self._get_url("GET", "https://duckduckgo.com/news.js", params=payload)
817
  resp_json = json_loads(resp_content)
@@ -844,8 +878,8 @@ class WEBS:
844
 
845
  return list(islice(results, max_results))
846
 
847
- def answers(self, keywords: str) -> List[Dict[str, str]]:
848
- """DuckDuckGo instant answers. Query params: https://duckduckgo.com/params.
849
 
850
  Args:
851
  keywords: keywords for query,
@@ -915,8 +949,8 @@ class WEBS:
915
 
916
  return results
917
 
918
- def suggestions(self, keywords: str, region: str = "wt-wt") -> List[Dict[str, str]]:
919
- """DuckDuckGo suggestions. Query params: https://duckduckgo.com/params.
920
 
921
  Args:
922
  keywords: keywords for query.
@@ -943,19 +977,19 @@ class WEBS:
943
  def maps(
944
  self,
945
  keywords: str,
946
- place: Optional[str] = None,
947
- street: Optional[str] = None,
948
- city: Optional[str] = None,
949
- county: Optional[str] = None,
950
- state: Optional[str] = None,
951
- country: Optional[str] = None,
952
- postalcode: Optional[str] = None,
953
- latitude: Optional[str] = None,
954
- longitude: Optional[str] = None,
955
  radius: int = 0,
956
- max_results: Optional[int] = None,
957
- ) -> List[Dict[str, str]]:
958
- """DuckDuckGo maps search. Query params: https://duckduckgo.com/params.
959
 
960
  Args:
961
  keywords: keywords for query
@@ -1038,11 +1072,11 @@ class WEBS:
1038
  logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}")
1039
 
1040
  cache = set()
1041
- results: List[Dict[str, str]] = []
1042
 
1043
  def _maps_page(
1044
- bbox: Tuple[Decimal, Decimal, Decimal, Decimal],
1045
- ) -> Optional[List[Dict[str, str]]]:
1046
  if max_results and len(results) >= max_results:
1047
  return None
1048
  lat_t, lon_l, lat_b, lon_r = bbox
@@ -1129,10 +1163,8 @@ class WEBS:
1129
 
1130
  return list(islice(results, max_results))
1131
 
1132
- def translate(
1133
- self, keywords: Union[List[str], str], from_: Optional[str] = None, to: str = "en"
1134
- ) -> List[Dict[str, str]]:
1135
- """DuckDuckGo translate.
1136
 
1137
  Args:
1138
  keywords: string or list of strings to translate.
@@ -1159,14 +1191,14 @@ class WEBS:
1159
  if from_:
1160
  payload["from"] = from_
1161
 
1162
- def _translate_keyword(keyword: str) -> Dict[str, str]:
1163
  resp_content = self._get_url(
1164
  "POST",
1165
  "https://duckduckgo.com/translation.js",
1166
  params=payload,
1167
  content=keyword.encode(),
1168
  )
1169
- page_data: Dict[str, str] = json_loads(resp_content)
1170
  page_data["original"] = keyword
1171
  return page_data
1172
 
@@ -1182,6 +1214,7 @@ class WEBS:
1182
 
1183
  return results
1184
 
 
1185
  html_parser = html.parser.HTMLParser()
1186
 
1187
 
 
1
+ from __future__ import annotations
2
+
3
+ import html
4
+ import http.cookiejar as cookiejar
5
+ from xml.etree import ElementTree
6
  import logging
7
  import warnings
8
  from concurrent.futures import ThreadPoolExecutor
 
13
  from random import choice
14
  from threading import Event
15
  from types import TracebackType
16
+ from typing import Optional, cast
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ import requests
19
 
20
+ import primp # type: ignore
21
 
22
  try:
23
  from lxml.etree import _Element
 
28
  except ImportError:
29
  LXML_AVAILABLE = False
30
 
31
+ class WebscoutE(Exception):
32
+ """Base exception class for search."""
33
+
34
+
35
+ class RatelimitE(Exception):
36
+ """Raised for rate limit exceeded errors during API requests."""
37
+
38
+ class ConversationLimitException(Exception):
39
+ """Raised for conversation limit exceeded errors during API requests."""
40
+ pass
41
+ class TimeoutE(Exception):
42
+ """Raised for timeout errors during API requests."""
43
+
44
+ class FailedToGenerateResponseError(Exception):
45
+
46
+ """Provider failed to fetch response"""
47
+ class AllProvidersFailure(Exception):
48
+ """None of the providers generated response successfully"""
49
+ pass
50
+
51
+ class FacebookInvalidCredentialsException(Exception):
52
+ pass
53
+
54
+
55
+ class FacebookRegionBlocked(Exception):
56
+ pass
57
+
58
+
59
  import re
60
  from decimal import Decimal
61
  from html import unescape
 
140
  c = 2 * atan2(sqrt(a), sqrt(1 - a))
141
  return R * c
142
 
143
+ def _expand_proxy_tb_alias(proxy: str | None) -> str | None:
144
+ """Expand "tb" to a full proxy URL if applicable."""
145
+ return "socks5://127.0.0.1:9150" if proxy == "tb" else proxy
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
 
147
  logger = logging.getLogger("webscout.WEBS")
148
 
149
 
 
152
 
153
  _executor: ThreadPoolExecutor = ThreadPoolExecutor()
154
  _impersonates = (
155
+ "chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_107", "chrome_108",
156
+ "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118", "chrome_119", "chrome_120",
157
+ #"chrome_123", "chrome_124", "chrome_126",
158
+ "chrome_127", "chrome_128", "chrome_129",
159
+ "safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_15.3", "safari_15.5", "safari_15.6.1",
160
+ "safari_16", "safari_16.5", "safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5", "safari_18",
161
+ "safari_ipad_18",
162
+ "edge_101", "edge_122", "edge_127",
163
  ) # fmt: skip
164
 
165
  def __init__(
166
  self,
167
+ headers: dict[str, str] | None = None,
168
+ proxy: str | None = None,
169
+ proxies: dict[str, str] | str | None = None, # deprecated
170
+ timeout: int | None = 10,
171
  ) -> None:
172
  """Initialize the WEBS object.
173
 
 
177
  example: "http://user:[email protected]:3128". Defaults to None.
178
  timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
179
  """
180
+ self.proxy: str | None = _expand_proxy_tb_alias(proxy) # replaces "tb" with "socks5://127.0.0.1:9150"
181
  assert self.proxy is None or isinstance(self.proxy, str), "proxy must be a str"
182
  if not proxy and proxies:
183
  warnings.warn("'proxies' is deprecated, use 'proxy' instead.", stacklevel=1)
184
  self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies
185
  self.headers = headers if headers else {}
186
  self.headers["Referer"] = "https://duckduckgo.com/"
187
+ self.client = primp.Client(
188
  headers=self.headers,
189
  proxy=self.proxy,
190
  timeout=timeout,
 
195
  verify=False,
196
  )
197
  self._exception_event = Event()
198
+ self._chat_messages: list[dict[str, str]] = []
199
+ self._chat_tokens_count = 0
200
  self._chat_vqd: str = ""
201
 
202
+ def __enter__(self) -> WEBS:
203
  return self
204
 
205
  def __exit__(
206
  self,
207
+ exc_type: type[BaseException] | None = None,
208
+ exc_val: BaseException | None = None,
209
+ exc_tb: TracebackType | None = None,
210
  ) -> None:
211
  pass
212
 
213
  @cached_property
214
+ def parser(self) -> LHTMLParser:
215
  """Get HTML parser."""
216
  return LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False)
217
 
 
219
  self,
220
  method: str,
221
  url: str,
222
+ params: dict[str, str] | None = None,
223
+ content: bytes | None = None,
224
+ data: dict[str, str] | bytes | None = None,
225
  ) -> bytes:
226
  if self._exception_event.is_set():
227
  raise WebscoutE("Exception occurred in previous call.")
 
245
  resp_content = self._get_url("POST", "https://duckduckgo.com", data={"q": keywords})
246
  return _extract_vqd(resp_content, keywords)
247
 
248
+ def chat(self, keywords: str, model: str = "gpt-4o-mini", timeout: int = 30) -> str:
249
+ """Initiates a chat session with webscout AI.
250
 
251
  Args:
252
  keywords (str): The initial message or question to send to the AI.
253
+ model (str): The model to use: "gpt-4o-mini", "claude-3-haiku", "llama-3.1-70b", "mixtral-8x7b".
254
+ Defaults to "gpt-4o-mini".
255
  timeout (int): Timeout value for the HTTP client. Defaults to 20.
256
 
257
  Returns:
258
  str: The response from the AI.
259
  """
260
+ models_deprecated = {
261
+ "gpt-3.5": "gpt-4o-mini",
262
+ "llama-3-70b": "llama-3.1-70b",
263
+ }
264
+ if model in models_deprecated:
265
+ logger.info(f"{model=} is deprecated, using {models_deprecated[model]}")
266
+ model = models_deprecated[model]
267
  models = {
268
  "claude-3-haiku": "claude-3-haiku-20240307",
 
 
 
269
  "gpt-4o-mini": "gpt-4o-mini",
270
+ "llama-3.1-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
271
+ "mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
272
  }
273
  # vqd
274
  if not self._chat_vqd:
 
276
  self._chat_vqd = resp.headers.get("x-vqd-4", "")
277
 
278
  self._chat_messages.append({"role": "user", "content": keywords})
279
+ self._chat_tokens_count += len(keywords) // 4 if len(keywords) >= 4 else 1 # approximate number of tokens
280
 
281
  json_data = {
282
  "model": models[model],
 
290
  )
291
  self._chat_vqd = resp.headers.get("x-vqd-4", "")
292
 
293
+ data = ",".join(x for line in resp.text.rstrip("[DONE]LIMT_CVRSA\n").split("data:") if (x := line.strip()))
294
+ data = json_loads("[" + data + "]")
295
+
296
+ results = []
297
+ for x in data:
298
+ if x.get("action") == "error":
299
+ err_message = x.get("type", "")
300
+ if x.get("status") == 429:
301
+ raise (
302
+ ConversationLimitException(err_message)
303
+ if err_message == "ERR_CONVERSATION_LIMIT"
304
+ else RatelimitE(err_message)
305
+ )
306
+ raise WebscoutE(err_message)
307
+ elif message := x.get("message"):
308
+ results.append(message)
309
+ result = "".join(results)
310
 
311
  self._chat_messages.append({"role": "assistant", "content": result})
312
+ self._chat_tokens_count += len(results)
313
  return result
314
 
315
  def text(
 
317
  keywords: str,
318
  region: str = "wt-wt",
319
  safesearch: str = "moderate",
320
+ timelimit: str | None = None,
321
  backend: str = "api",
322
+ max_results: int | None = None,
323
+ ) -> list[dict[str, str]]:
324
+ """webscout text search. Query params: https://duckduckgo.com/params.
325
 
326
  Args:
327
  keywords: keywords for query.
 
359
  keywords: str,
360
  region: str = "wt-wt",
361
  safesearch: str = "moderate",
362
+ timelimit: str | None = None,
363
+ max_results: int | None = None,
364
+ ) -> list[dict[str, str]]:
365
+ """webscout text search. Query params: https://duckduckgo.com/params.
366
 
367
  Args:
368
  keywords: keywords for query.
 
405
  payload["df"] = timelimit
406
 
407
  cache = set()
408
+ results: list[dict[str, str]] = []
409
 
410
+ def _text_api_page(s: int) -> list[dict[str, str]]:
411
  payload["s"] = f"{s}"
412
  resp_content = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
413
  page_data = _text_extract_json(resp_content, keywords)
 
442
  self,
443
  keywords: str,
444
  region: str = "wt-wt",
445
+ timelimit: str | None = None,
446
+ max_results: int | None = None,
447
+ ) -> list[dict[str, str]]:
448
+ """webscout text search. Query params: https://duckduckgo.com/params.
449
 
450
  Args:
451
  keywords: keywords for query.
 
479
  payload["vqd"] = vqd
480
 
481
  cache = set()
482
+ results: list[dict[str, str]] = []
483
 
484
+ def _text_html_page(s: int) -> list[dict[str, str]]:
485
  payload["s"] = f"{s}"
486
  resp_content = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
487
  if b"No results." in resp_content:
 
490
  page_results = []
491
  tree = document_fromstring(resp_content, self.parser)
492
  elements = tree.xpath("//div[h2]")
493
+ if not isinstance(elements, list):
494
  return []
495
  for e in elements:
496
  if isinstance(e, _Element):
497
  hrefxpath = e.xpath("./a/@href")
498
+ href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
499
  if (
500
  href
501
  and href not in cache
 
505
  ):
506
  cache.add(href)
507
  titlexpath = e.xpath("./h2/a/text()")
508
+ title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
509
  bodyxpath = e.xpath("./a//text()")
510
+ body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath, list) else ""
511
  result = {
512
  "title": _normalize(title),
513
  "href": _normalize_url(href),
 
532
  self,
533
  keywords: str,
534
  region: str = "wt-wt",
535
+ timelimit: str | None = None,
536
+ max_results: int | None = None,
537
+ ) -> list[dict[str, str]]:
538
+ """webscout text search. Query params: https://duckduckgo.com/params.
539
 
540
  Args:
541
  keywords: keywords for query.
 
566
  payload["df"] = timelimit
567
 
568
  cache = set()
569
+ results: list[dict[str, str]] = []
570
 
571
+ def _text_lite_page(s: int) -> list[dict[str, str]]:
572
  payload["s"] = f"{s}"
573
  resp_content = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
574
  if b"No more results." in resp_content:
 
577
  page_results = []
578
  tree = document_fromstring(resp_content, self.parser)
579
  elements = tree.xpath("//table[last()]//tr")
580
+ if not isinstance(elements, list):
581
  return []
582
 
583
  data = zip(cycle(range(1, 5)), elements)
 
585
  if isinstance(e, _Element):
586
  if i == 1:
587
  hrefxpath = e.xpath(".//a//@href")
588
+ href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
589
  if (
590
  href is None
591
  or href in cache
 
597
  else:
598
  cache.add(href)
599
  titlexpath = e.xpath(".//a//text()")
600
+ title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
601
  elif i == 2:
602
  bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
603
  body = (
604
  "".join(str(x) for x in bodyxpath).strip()
605
+ if bodyxpath and isinstance(bodyxpath, list)
606
  else ""
607
  )
608
  if href:
 
631
  keywords: str,
632
  region: str = "wt-wt",
633
  safesearch: str = "moderate",
634
+ timelimit: str | None = None,
635
+ size: str | None = None,
636
+ color: str | None = None,
637
+ type_image: str | None = None,
638
+ layout: str | None = None,
639
+ license_image: str | None = None,
640
+ max_results: int | None = None,
641
+ ) -> list[dict[str, str]]:
642
+ """webscout images search. Query params: https://duckduckgo.com/params.
643
 
644
  Args:
645
  keywords: keywords for query.
 
687
  }
688
 
689
  cache = set()
690
+ results: list[dict[str, str]] = []
691
 
692
+ def _images_page(s: int) -> list[dict[str, str]]:
693
  payload["s"] = f"{s}"
694
  resp_content = self._get_url("GET", "https://duckduckgo.com/i.js", params=payload)
695
  resp_json = json_loads(resp_content)
 
729
  keywords: str,
730
  region: str = "wt-wt",
731
  safesearch: str = "moderate",
732
+ timelimit: str | None = None,
733
+ resolution: str | None = None,
734
+ duration: str | None = None,
735
+ license_videos: str | None = None,
736
+ max_results: int | None = None,
737
+ ) -> list[dict[str, str]]:
738
+ """webscout videos search. Query params: https://duckduckgo.com/params.
739
 
740
  Args:
741
  keywords: keywords for query.
 
774
  }
775
 
776
  cache = set()
777
+ results: list[dict[str, str]] = []
778
 
779
+ def _videos_page(s: int) -> list[dict[str, str]]:
780
  payload["s"] = f"{s}"
781
  resp_content = self._get_url("GET", "https://duckduckgo.com/v.js", params=payload)
782
  resp_json = json_loads(resp_content)
 
806
  keywords: str,
807
  region: str = "wt-wt",
808
  safesearch: str = "moderate",
809
+ timelimit: str | None = None,
810
+ max_results: int | None = None,
811
+ ) -> list[dict[str, str]]:
812
+ """webscout news search. Query params: https://duckduckgo.com/params.
813
 
814
  Args:
815
  keywords: keywords for query.
 
843
  payload["df"] = timelimit
844
 
845
  cache = set()
846
+ results: list[dict[str, str]] = []
847
 
848
+ def _news_page(s: int) -> list[dict[str, str]]:
849
  payload["s"] = f"{s}"
850
  resp_content = self._get_url("GET", "https://duckduckgo.com/news.js", params=payload)
851
  resp_json = json_loads(resp_content)
 
878
 
879
  return list(islice(results, max_results))
880
 
881
+ def answers(self, keywords: str) -> list[dict[str, str]]:
882
+ """webscout instant answers. Query params: https://duckduckgo.com/params.
883
 
884
  Args:
885
  keywords: keywords for query,
 
949
 
950
  return results
951
 
952
+ def suggestions(self, keywords: str, region: str = "wt-wt") -> list[dict[str, str]]:
953
+ """webscout suggestions. Query params: https://duckduckgo.com/params.
954
 
955
  Args:
956
  keywords: keywords for query.
 
977
  def maps(
978
  self,
979
  keywords: str,
980
+ place: str | None = None,
981
+ street: str | None = None,
982
+ city: str | None = None,
983
+ county: str | None = None,
984
+ state: str | None = None,
985
+ country: str | None = None,
986
+ postalcode: str | None = None,
987
+ latitude: str | None = None,
988
+ longitude: str | None = None,
989
  radius: int = 0,
990
+ max_results: int | None = None,
991
+ ) -> list[dict[str, str]]:
992
+ """webscout maps search. Query params: https://duckduckgo.com/params.
993
 
994
  Args:
995
  keywords: keywords for query
 
1072
  logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}")
1073
 
1074
  cache = set()
1075
+ results: list[dict[str, str]] = []
1076
 
1077
  def _maps_page(
1078
+ bbox: tuple[Decimal, Decimal, Decimal, Decimal],
1079
+ ) -> list[dict[str, str]] | None:
1080
  if max_results and len(results) >= max_results:
1081
  return None
1082
  lat_t, lon_l, lat_b, lon_r = bbox
 
1163
 
1164
  return list(islice(results, max_results))
1165
 
1166
+ def translate(self, keywords: list[str] | str, from_: str | None = None, to: str = "en") -> list[dict[str, str]]:
1167
+ """webscout translate.
 
 
1168
 
1169
  Args:
1170
  keywords: string or list of strings to translate.
 
1191
  if from_:
1192
  payload["from"] = from_
1193
 
1194
+ def _translate_keyword(keyword: str) -> dict[str, str]:
1195
  resp_content = self._get_url(
1196
  "POST",
1197
  "https://duckduckgo.com/translation.js",
1198
  params=payload,
1199
  content=keyword.encode(),
1200
  )
1201
+ page_data: dict[str, str] = json_loads(resp_content)
1202
  page_data["original"] = keyword
1203
  return page_data
1204
 
 
1214
 
1215
  return results
1216
 
1217
+
1218
  html_parser = html.parser.HTMLParser()
1219
 
1220