File size: 3,134 Bytes
7a8b33f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import argparse
import json
import multiprocessing as mp
from datetime import datetime
import time
from zsvision.zs_multiproc import starmap_with_kwargs
import weaviate
import os


class CohereEvidence:
    def __init__(self, processes=8, filter_str="", refresh=False):
        self.processes = processes
        self.filter_str = filter_str
        self.refresh = refresh

    def semantic_search(self, query, client, results_lang=""):
        """
        Query the vectors database and return the top results.
        """

        nearText = {"concepts": [query]}
        properties = ["text", "title", "url", "views", "lang", "_additional {distance}"]

        # To filter by language
        if results_lang != "":
            where_filter = {
                "path": ["lang"],
                "operator": "Equal",
                "valueString": results_lang,
            }
            response = (
                client.query.get("Articles", properties)
                .with_where(where_filter)
                .with_near_text(nearText)
                .with_limit(5)
                .do()
            )

        # Search all languages
        else:
            response = (
                client.query.get("Articles", properties)
                .with_near_text(nearText)
                .with_limit(5)
                .do()
            )

        result = response["data"]["Get"]["Articles"]

        return result

    def fetch_cohere_semantic_search_results_to_gather_evidence(
        self,
        queryset: dict,
    ):
        """
        Generate a search query that can be used to verify a claim.
        """
        # 10M wiki embeddings (1M in English)
        weaviate_api_key = os.environ.get("WEAVIATE_API_KEY")

        cohere_api_key = os.environ.get("COHERE_API_KEY")

        client = weaviate.Client(
            url="https://cohere-demo.weaviate.network/",
            auth_client_secret=weaviate.auth.AuthApiKey(
                api_key=weaviate_api_key
            ),  # Replace w/ your Weaviate instance API key
            additional_headers={
                "X-Cohere-Api-Key": cohere_api_key  # Replace with your inference API key
            },
        )

        while not client.is_ready():
            print(f"Waiting for client to be ready")
            time.sleep(1)

        for item in queryset:
            results = self.semantic_search(
                item["claim"], client=client, results_lang="en"
            )
            # rename "url" to "link" to be consistent with google results
            reformatted_results = []
            for result in results:
                result["link"] = result.pop("url")
                reformatted_results.append(result)
            item["search_results"] = reformatted_results

        # update the queryset with new information
        date_str = datetime.now().strftime("%Y-%m-%d")
        results = {
            "documents": queryset,
            "dates": {"results_fetched_from_wikipedia_1M_with_cohere-22-12": date_str},
        }
        print(f"Returning Cohere Wikipedia paragraph for {len(queryset)} queries")
        return results