File size: 8,292 Bytes
24c4def
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# The following code was adapted from https://github.com/hwchase17/langchain/blob/master/langchain/utilities/google_serper.py

"""Util that calls Google Search using the Serper.dev API."""
import pdb
import requests
import asyncio
import aiohttp
import yaml
import os
from openai import OpenAI

# env
# serper_api_key = factool_env_config.serper_api_key
# a6a49bf063005dd814b426f0e925308926fdc08c


class GoogleSerperAPIWrapper():
    """Wrapper around the Serper.dev Google Search API.
    You can create a free API key at https://serper.dev.
    To use, you should have the environment variable ``SERPER_API_KEY``
    set with your API key, or pass `serper_api_key` as a named parameter
    to the constructor.
    Example:
        .. code-block:: python
            from langchain import GoogleSerperAPIWrapper
            google_serper = GoogleSerperAPIWrapper()
    """
    def __init__(self, snippet_cnt = 10):
        self.k = snippet_cnt# 结果段
        self.gl = "us"
        self.hl = "en"
        #self.serper_api_key = "a6a49bf063005dd814b426f0e925308926fdc08c"#os.environ.get(, None)
        #assert self.serper_api_key is not None, "Please set the SERPER_API_KEY environment variable."
        #assert self.serper_api_key != '', "Please set the SERPER_API_KEY environment variable."

    async def _google_serper_search_results(self, session, search_term: str, gl: str, hl: str) -> dict:
        headers = {
            "X-API-KEY": "f6fec9a06c92981a1734ff670c7d645e56120ad5",
            "Content-Type": "application/json",
        }
        params = {"q": search_term, "gl": gl, "hl": hl}
        async with session.post(
            "https://google.serper.dev/search", headers=headers, params=params, raise_for_status=True
        ) as response:
            return await response.json()
    
    def _parse_results(self, results):
        snippets = []
        if results.get("answerBox"):
            answer_box = results.get("answerBox", {})
            if answer_box.get("answer"):
                element = {"content":answer_box.get("answer"),"source":"None"}
                return [element]
            elif answer_box.get("snippet"):
                element = {"content":answer_box.get("snippet").replace("\n", " "),"source":"None"}
                return [element]
            elif answer_box.get("snippetHighlighted"):
                element = {"content":answer_box.get("snippetHighlighted"),"source":"None"}
                return [element]
            
        if results.get("knowledgeGraph"):
            kg = results.get("knowledgeGraph", {})
            title = kg.get("title")
            entity_type = kg.get("type")
            if entity_type:
                element = {"content":f"{title}: {entity_type}","source":"None"}
                snippets.append(element)
            description = kg.get("description")
            if description:
                element = {"content":description,"source":"None"}
                snippets.append(element)
            for attribute, value in kg.get("attributes", {}).items():
                element = {"content":f"{attribute}: {value}","source":"None"}
                snippets.append(element)

        for result in results["organic"][: self.k]:
            if "snippet" in result:
                if result["snippet"].find("Missing") != -1:
                    continue
                element = {"content":result["snippet"],"source":result["link"]}
                snippets.append(element)
            for attribute, value in result.get("attributes", {}).items():
                element = {"content":f"{attribute}: {value}","source":result["link"]}
                if element["content"].find("Missing") != -1:
                    continue
                snippets.append(element)

        if len(snippets) == 0:
            element = {"content":"No good Google Search Result was found","source":"None"}
            return [element]
        
        # keep only the first k snippets
        # TODO all in
        snippets = snippets[:int(self.k / 2)]

        return snippets
    
    async def parallel_searches(self, search_queries, gl, hl):
        async with aiohttp.ClientSession() as session:
            tasks = [self._google_serper_search_results(session, query, gl, hl) for query in search_queries]
            search_results = await asyncio.gather(*tasks, return_exceptions=True)
            return search_results


    async def run(self, queries):
        """Run query through GoogleSearch and parse result."""
        flattened_queries = []

        for sublist in queries:
            if sublist is None:
                sublist = ['None', 'None']
            for item in sublist:
                flattened_queries.append(item)

        results = await self.parallel_searches(flattened_queries, gl=self.gl, hl=self.hl)
        # print(results)
        # print(len(results))
        snippets_list = []
        for i in range(len(results)):
            snippets_list.append(self._parse_results(results[i]))
        # print(snippets_list)
        # print(len(snippets_list))
        # TODO review
        snippets_split = [snippets_list[i] + snippets_list[i+1] for i in range(0, len(snippets_list), 2)]
        return snippets_split
    
    
    def execute(self,input,content):
        query_list = [content.split(",")[0][2:-1],content.split(",")[1][2:-2]]
        #print(query_list) get_event_loop
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        search_outputs_for_claims = loop.run_until_complete(self.run([query_list]))
        evidences = [[output['content'] for output in search_outputs_for_claim] for search_outputs_for_claim in
                  search_outputs_for_claims]
        #print(evidences)
        return evidences[0]

if __name__ == "__main__":
    search = GoogleSerperAPIWrapper()
    evidence = search.execute(input="", content="['yu xiang rou si','Volkswagen logo']")
    # print(evidence[0])
    # print(evidence[1])
    print(evidence)
#     search.execute("Samsung is a South Korean multinational conglomerate headquartered in Suwon, South Korea")
    # loop = asyncio.get_event_loop()
    # What is the capital of the United States?
    # Could you provide information on the focus of AMGTV as a television network?
    # "Could you please inform me whether Tata Motors is included in the BSE SENSEX index?", "Tata Motors"
    # "Who is the CEO of twitter?", "CEO Twitter"
    # Could you please provide some information about Sancho Panza and his role in the poem by Don Miguel de Cervantes Saavedra?
    # search_outputs_for_claims = loop.run_until_complete(search.run([["Is Samsung a South Korean multinational conglomerate headquartered in Suwon, South Korea?"]]))
    # #print(search_outputs_for_claims)
    # evidences = [[output['content'] for output in search_outputs_for_claim] for search_outputs_for_claim in
    #              search_outputs_for_claims]
    # print(evidences)
    # print(loop.run_until_complete(sea.run([["Could you please inform me whether Tata Motors is included in the BSE SENSEX index?", "Tata Motors"]])))
    # print(asyncio.run(sea.run("What is the capital of the United States?")))
    # [["On the contrary, Dr Reddy's Labs, with a weightage of 0.8% on the index, is likely to have seen an outflow of $90 million, according to Nuvama ...", "Check Tata Motors Ltd live BSE/NSE stock price along with it's performance analysis, share price history, market capitalization, shareholding & financial ...", "Asia Index Private Limited on Friday announced reconstitution S&P BSE Sensex with auto major Tata Motors to replace pharma stock Dr Reddy's ...", 'Tata Motors Share Price: Find the latest news on Tata Motors Stock Price. Get all the information on Tata Motors with historic price charts for NSE / BSE.', "Stock exchange BSE today announced that auto major Tata Motors will replace pharma stock Dr Reddy's Laboratories in Sensex from next month.", 'ATA Motorcars: Used car dealer in Lilburn, Georgia', 'Address: 3945 Lawrenceville Hwy RM 6, Lilburn, GA 30047', 'Hours: Closed ⋅ Opens 10\u202fAM Mon', 'Phone: (470) 268-7745', 'ATA Motorcars is your #1 source for buying a quality pre-owned vehicle. We have extensive relationships in the dealer community allowing us to purchase a wide ...']]