File size: 2,673 Bytes
06d7446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef3d4ad
 
06d7446
 
 
 
 
 
ef3d4ad
06d7446
 
 
e1118d8
 
 
06d7446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef3d4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import collections as cl
from dataclasses import dataclass

#
#
#
@dataclass
class Citation:
    text: str
    refn: str
    cite: str

def unique(values):
    seen = set()
    for v in values:
        if v not in seen:
            yield v
            seen.add(v)

#
#
#
class CitationParser:
    def __init__(self, client, start=1):
        self.client = client
        self.start = start

    def __next__(self):
        value = f'[{self.start}]'
        self.start += 1
        return value

    def __call__(self, annotations):
        for a in annotations:
            document = self.client.files.retrieve(a.file_citation.file_id)
            yield Citation(a.text, *self.extract(a, document.filename))

    def extract(self, annotation, document):
        raise NotImplementedError()

class StandardCitationParser(CitationParser):
    def extract(self, annotation, document):
        reference = next(self)
        citation = '{} {}:{}--{}'.format(
            reference,
            document,
            annotation.start_index,
            annotation.end_index,
        )

        return (reference, citation)

class SimpleCitationParser(CitationParser):
    def __init__(self, client, start=1):
        super().__init__(client, start)
        self.citations = {}

    def extract(self, annotation, document):
        if document in self.citations:
            reference = self.citations[document]
        else:
            reference = next(self)
            self.citations[document] = reference
        citation = f'{reference} {document}'

        return (reference, citation)

#
#
#
class CitationManager:
    # _c_parser = StandardCitationParser
    _c_parser = SimpleCitationParser

    def __init__(self, annotations, client, start):
        self.body = {}

        c_parser = self._c_parser(client, start)
        citations = []
        for c in c_parser(annotations):
            self.body[c.text] = c.refn
            citations.append(c.cite)

        self.citations = list(unique(citations))

    def __len__(self):
        return len(self.citations)

    def __str__(self):
        raise NotImplementedError()

    def __iter__(self):
        raise NotImplementedError()

    def replace(self, body):
        for i in self:
            body = body.replace(*i)

        return body

class NumericCitations(CitationManager):
    def __str__(self):
        return '\n\n{}'.format('\n'.join(self.citations))

    def __iter__(self):
        for (k, v) in self.body.items():
            yield (k, f' {v}')

class NoCitations(CitationManager):
    def __str__(self):
        return ''

    def __iter__(self):
        yield from zip(self.body, it.repeat(''))