File size: 4,523 Bytes
7a8b33f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import argparse
import re
from collections import defaultdict
import json
from text_utils import find_matching_indices
from pathlib import Path


class FormatDocument:
    def __init__(
        self,
        footnote_style: str,
        temperature=0.0,
        model="gpt-4",
        dest_dir=Path("data/extracted_claims"),
        filter_str="",
        refresh=False,
    ):
        self.temperature = temperature
        self.model = model
        self.dest_dir = dest_dir
        self.filter_str = filter_str
        self.refresh = refresh
        self.footnote_style = footnote_style

    def cleanup_explanation(self, claim_assessment: dict, mode: str) -> str:
        claim = claim_assessment["claim"]
        assessment = claim_assessment["assessment"]
        justification = assessment["justification"]
        category = assessment["verdict"]
        urls = assessment["URLs"]
        date_accessed = assessment["date_accessed"]

        prefixes = {
            "Fully supported": "βœ…",
            "Partially supported": "❓",
            "Unsupported": "❗",
        }
        prefix = prefixes[category]
        quotes = ",".join(f'"{quote}"' for quote in assessment["quotes"])
        # Sometimes, the verdict justification contains newlines , which messes up the formatting of footnotes.
        justification = justification.replace("\n", "")

        if mode == "terse":
            footnote = f"Claim: {claim} πŸ‘‰ {category} {urls}"
        elif mode == "verbose":
            footnote = f"Claim: {claim} πŸ‘‰ {category} {quotes} {justification}, URLs: {urls}, date accessed: {date_accessed}"
        footnote = f"{prefix} {footnote}"
        return footnote

    def reformat_document_to_include_claims(
        self,
        original_text,
        fact_verdicts,
        footnote_style=None,
    ):
        bibliography = []
        footnote_markers_to_insert = []
        statistics = defaultdict(int)
        number_of_facts_checked = 0
        if footnote_style:
            self.footnote_style = footnote_style
        for fact_idx, claim_assessment in enumerate(fact_verdicts):
            if self.footnote_style == "terse":
                footnote_str = f"{fact_idx + 1}"
            elif self.footnote_style == "verbose":
                footnote_str = claim_assessment["claim"].replace(" ", "-")
                # footnote markers cannot contain much punctuation or commas in Jekyll
                # (even though this is valid in GitHub-flavoured markdown)
                for char in [
                    ",",
                    ".",
                    '"',
                    "'",
                    ":",
                    ";",
                    "(",
                    ")",
                    "[",
                    "]",
                    "{",
                    "}",
                    "*",
                ]:
                    footnote_str = footnote_str.replace(char, "")

            explanation = self.cleanup_explanation(
                claim_assessment, mode=self.footnote_style
            )
            footnote_marker = f"[^{footnote_str}]"
            query = claim_assessment["verbatim_quote"]

            assert (
                original_text.count(query) == 1
            ), f"Found {original_text.count(query)} matches for {query}, rather than 1"
            start_pos = original_text.find(query)
            assert start_pos != -1, f"Could not find {query} in {original_text}"
            end_pos = start_pos + len(query)
            footnote_markers_to_insert.append((end_pos, footnote_marker))
            verdict_category = claim_assessment["assessment"]["verdict"]
            statistics[verdict_category] += 1
            number_of_facts_checked += 1
            bibliography.append(f"{footnote_marker}: {explanation} ")

        # perform insertions in reverse order so that the indices don't get messed up
        modified_text = original_text
        for char_pos, footnote_marker in sorted(
            footnote_markers_to_insert, reverse=True
        ):
            modified_text = (
                modified_text[:char_pos] + footnote_marker + modified_text[char_pos:]
            )

        modified_text += "\n\n"
        modified_text += "\n".join(bibliography)

        # assert number_of_facts_checked != 0, "No facts were checked"
        if number_of_facts_checked == 0:
            print("No objective facts were found.")
            modified_text = "No clear-cut objective claims were detected."
        return modified_text