import { decode } from "html-entities"; import { convert as htmlToText } from "html-to-text"; import { Tool } from "langchain/tools"; const SEARCH_REGEX = /DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'/; const IMAGES_REGEX = /;DDG\.duckbar\.load\('images', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('news/; const NEWS_REGEX = /;DDG\.duckbar\.load\('news', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('videos/; const VIDEOS_REGEX = /;DDG\.duckbar\.load\('videos', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.loadModule\('related_searches/; const RELATED_SEARCHES_REGEX = /DDG\.duckbar\.loadModule\('related_searches', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('products/; const VQD_REGEX = /vqd=['"](\d+-\d+(?:-\d+)?)['"]/; interface CallbackSearchResult { /** Website description */ a: string; /** Unknown */ ae: null; /** ddg!bang information (ex. w Wikipedia en.wikipedia.org) */ b?: string; /** URL */ c: string; /** URL of some sort. */ d: string; /** Class name associations. */ da?: string; /** Unknown */ h: number; /** Website hostname */ i: string; /** Unknown */ k: null; /** Unknown */ m: number; /** Unknown */ o: number; /** Unknown */ p: number; /** Unknown */ s: string; /** Website Title */ t: string; /** Website URL */ u: string; } interface CallbackNextSearch { /** URL to the next page of results */ n: string; } interface CallbackDuckbarPayload { ads: null | any[]; query: string; queryEncoded: string; response_type: string; results: T[]; vqd: { [query: string]: string; }; } interface DuckbarImageResult { /** The height of the image in pixels. */ height: number; /** The image URL. */ image: string; /** The source of the image. */ source: string; /** The thumbnail URL. */ thumbnail: string; /** The title (or caption) of the image. */ title: string; /** The website URL of where the image came from. */ url: string; /** The width of the image in pixels. */ width: number; } interface DuckbarVideoResult { /** URL of the video */ content: string; /** Description of the video */ description: string; /** Duration of the video */ duration: string; /** Embed HTML for the video */ embed_html: string; /** Embed URL for the video */ embed_url: string; /** Thumbnail images of the video */ images: { large: string; medium: string; motion: string; small: string; }; /** Where this search result came from */ provider: string; /** ISO timestamp of the upload */ published: string; /** What site the video was on */ publisher: string; /** Various statistics */ statistics: { /** View count of the video */ viewCount: number | null; }; /** Title of the video */ title: string; /** Name of the video uploader(?) */ uploader: string; } interface DuckbarRelatedSearch { display_text: string; text: string; web_search_url: string; } interface DuckbarNewsResult { date: number; excerpt: string; image?: string; relative_time: string; syndicate: string; title: string; url: string; use_relevancy: number; is_old?: number; fetch_image?: number; } interface SearchResults { /** Whether there were no results found. */ noResults: boolean; /** The VQD of the search query. */ vqd: string; /** The web results of the search. */ results: SearchResult[]; /** The image results of the search. */ images?: DuckbarImageResult[]; /** The news article results of the search. */ news?: NewsResult[]; /** The video results of the search. */ videos?: VideoResult[]; /** The related searches of the query. */ related?: RelatedResult[]; } interface VideoResult { /** The URL of the video. */ url: string; /** The title of the video. */ title: string; /** The description of the video. */ description: string; /** The image URL of the video. */ image: string; /** The duration of the video. (i.e. "9:20") */ duration: string; /** The ISO timestamp of when the video was published. */ published: string; /** Where the video was publised on. (i.e. "YouTube") */ publishedOn: string; /** The name of who uploaded the video. */ publisher: string; /** The view count of the video. */ viewCount?: number; } interface NewsResult { /** The timestamp of when the article was created. */ date: number; /** An except of the article. */ excerpt: string; /** The image URL used in the article. */ image?: string; /** The relative time of when the article was posted, in human readable format. */ relativeTime: string; /** Where this article was indexed from. */ syndicate: string; /** The title of the article. */ title: string; /** The URL of the article. */ url: string; /** Whether this article is classified as old. */ isOld: boolean; } interface SearchResult { /** The hostname of the website. (i.e. "google.com") */ hostname: string; /** The URL of the result. */ url: string; /** The title of the result. */ title: string; /** * The sanitized description of the result. * Bold tags will still be present in this string. */ description: string; /** The description of the result. */ rawDescription: string; /** The icon of the website. */ icon: string; /** The ddg!bang information of the website, if any. */ bang?: SearchResultBang; } interface SearchResultBang { /** The prefix of the bang. (i.e. "w" for !w) */ prefix: string; /** The title of the bang. */ title: string; /** The domain of the bang. */ domain: string; } interface RelatedResult { text: string; raw: string; } enum SearchTimeType { /** From any time. */ ALL = "a", /** From the past day. */ DAY = "d", /** From the past week. */ WEEK = "w", /** From the past month. */ MONTH = "m", /** From the past year. */ YEAR = "y", } interface SearchOptions { /** The safe search type of the search. */ safeSearch?: SafeSearchType; /** The time range of the searches, can be a SearchTimeType or a date range ("2021-03-16..2021-03-30") */ time?: SearchTimeType | string; /** The locale(?) of the search. Defaults to "en-us". */ locale?: string; /** The region of the search. Defaults to "wt-wt" or all regions. */ region?: string; /** The market region(?) of the search. Defaults to "US". */ marketRegion?: string; /** The number to offset the results to. */ offset?: number; /** * The string that acts like a key to a search. * Set this if you made a search with the same query. */ vqd?: string; } enum SafeSearchType { /** Strict filtering, no NSFW content. */ STRICT = 0, /** Moderate filtering. */ MODERATE = -1, /** No filtering. */ OFF = -2, } const defaultOptions: SearchOptions = { safeSearch: SafeSearchType.OFF, time: SearchTimeType.ALL, locale: "en-us", region: "wt-wt", offset: 0, marketRegion: "us", }; async function search( query: string, options?: SearchOptions, ): Promise { if (!query) throw new Error("Query cannot be empty!"); if (!options) options = defaultOptions; else options = sanityCheck(options); let vqd = options.vqd!; if (!vqd) vqd = await getVQD(query, "web"); const queryObject: Record = { q: query, ...(options.safeSearch !== SafeSearchType.STRICT ? { t: "D" } : {}), l: options.locale!, ...(options.safeSearch === SafeSearchType.STRICT ? { p: "1" } : {}), kl: options.region || "wt-wt", s: String(options.offset), dl: "en", ct: "US", ss_mkt: options.marketRegion!, df: options.time! as string, vqd, ...(options.safeSearch !== SafeSearchType.STRICT ? { ex: String(options.safeSearch) } : {}), sp: "1", bpa: "1", biaexp: "b", msvrtexp: "b", ...(options.safeSearch === SafeSearchType.STRICT ? { videxp: "a", nadse: "b", eclsexp: "a", stiaexp: "a", tjsexp: "b", related: "b", msnexp: "a", } : { nadse: "b", eclsexp: "b", tjsexp: "b", // cdrexp: 'b' }), }; const response = await fetch( `https://links.duckduckgo.com/d.js?${queryString(queryObject)}`, ); const data = await response.text(); if (data.includes("DDG.deep.is506")) throw new Error("A server error occurred!"); const searchResults = JSON.parse( SEARCH_REGEX.exec(data)![1].replace(/\t/g, " "), ) as (CallbackSearchResult | CallbackNextSearch)[]; if (searchResults.length === 1 && !("n" in searchResults[0])) { const onlyResult = searchResults[0] as CallbackSearchResult; /* istanbul ignore next */ if ( (!onlyResult.da && onlyResult.t === "EOF") || !onlyResult.a || onlyResult.d === "google.com search" ) return { noResults: true, vqd, results: [], }; } const results: SearchResults = { noResults: false, vqd, results: [], }; for (const search of searchResults) { if ("n" in search) continue; let bang: SearchResultBang | undefined; if (search.b) { const [prefix, title, domain] = search.b.split("\t"); bang = { prefix, title, domain }; } results.results.push({ title: search.t, description: decode(search.a), rawDescription: search.a, hostname: search.i, icon: `https://external-content.duckduckgo.com/ip3/${search.i}.ico`, url: search.u, bang, }); } // Images const imagesMatch = IMAGES_REGEX.exec(data); if (imagesMatch) { const imagesResult = JSON.parse( imagesMatch[1].replace(/\t/g, " "), ) as CallbackDuckbarPayload; results.images = imagesResult.results.map((i) => { i.title = decode(i.title); return i; }); } // News const newsMatch = NEWS_REGEX.exec(data); if (newsMatch) { const newsResult = JSON.parse( newsMatch[1].replace(/\t/g, " "), ) as CallbackDuckbarPayload; results.news = newsResult.results.map((article) => ({ date: article.date, excerpt: decode(article.excerpt), image: article.image, relativeTime: article.relative_time, syndicate: article.syndicate, title: decode(article.title), url: article.url, isOld: !!article.is_old, })) as NewsResult[]; } // Videos const videosMatch = VIDEOS_REGEX.exec(data); if (videosMatch) { const videoResult = JSON.parse( videosMatch[1].replace(/\t/g, " "), ) as CallbackDuckbarPayload; results.videos = []; /* istanbul ignore next */ for (const video of videoResult.results) { results.videos.push({ url: video.content, title: decode(video.title), description: decode(video.description), image: video.images.large || video.images.medium || video.images.small || video.images.motion, duration: video.duration, publishedOn: video.publisher, published: video.published, publisher: video.uploader, viewCount: video.statistics.viewCount || undefined, }); } } // Related Searches const relatedMatch = RELATED_SEARCHES_REGEX.exec(data); if (relatedMatch) { const relatedResult = JSON.parse( relatedMatch[1].replace(/\t/g, " "), ) as CallbackDuckbarPayload; results.related = []; for (const related of relatedResult.results) { results.related.push({ text: related.text, raw: related.display_text, }); } } return results; } function queryString(query: Record) { return new URLSearchParams(query).toString(); } async function getVQD(query: string, ia = "web") { try { const response = await fetch( `https://duckduckgo.com/?${queryString({ q: query, ia })}`, ); const data = await response.text(); return VQD_REGEX.exec(data)![1]; } catch (e) { throw new Error(`Failed to get the VQD for query "${query}".`); } } function sanityCheck(options: SearchOptions) { options = Object.assign({}, defaultOptions, options); if (!(options.safeSearch! in SafeSearchType)) throw new TypeError( `${options.safeSearch} is an invalid safe search type!`, ); /* istanbul ignore next */ if (typeof options.safeSearch! === "string") options.safeSearch = SafeSearchType[ options.safeSearch! ] as any as SafeSearchType; if (typeof options.offset !== "number") throw new TypeError(`Search offset is not a number!`); if (options.offset! < 0) throw new RangeError("Search offset cannot be below zero!"); if ( options.time && !Object.values(SearchTimeType).includes(options.time as SearchTimeType) && !/\d{4}-\d{2}-\d{2}..\d{4}-\d{2}-\d{2}/.test(options.time as string) ) throw new TypeError(`${options.time} is an invalid search time!`); if (!options.locale || typeof options.locale! !== "string") throw new TypeError("Search locale must be a string!"); if (!options.region || typeof options.region! !== "string") throw new TypeError("Search region must be a string!"); if (!options.marketRegion || typeof options.marketRegion! !== "string") throw new TypeError("Search market region must be a string!"); if (options.vqd && !/\d-\d+-\d+/.test(options.vqd)) throw new Error(`${options.vqd} is an invalid VQD!`); return options; } export class DuckDuckGo extends Tool { name = "duckduckgo_search"; maxResults = 4; /** @ignore */ async _call(input: string) { const searchResults = await search(input, { safeSearch: SafeSearchType.OFF, }); if (searchResults.noResults) { return "No good search result found"; } const results = searchResults.results .slice(0, this.maxResults) .map(({ title, description, url }) => htmlToText(description)) .join("\n\n"); return results; } description = "a search engine. useful for when you need to answer questions about current events. input should be a search query."; }