openai-file-chat / mylib /_citations.py
jerome-white's picture
Make the parent abstract to follow the template pattern more closely
e1118d8
import collections as cl
from dataclasses import dataclass
#
#
#
@dataclass
class Citation:
text: str
refn: str
cite: str
def unique(values):
seen = set()
for v in values:
if v not in seen:
yield v
seen.add(v)
#
#
#
class CitationParser:
def __init__(self, client, start=1):
self.client = client
self.start = start
def __next__(self):
value = f'[{self.start}]'
self.start += 1
return value
def __call__(self, annotations):
for a in annotations:
document = self.client.files.retrieve(a.file_citation.file_id)
yield Citation(a.text, *self.extract(a, document.filename))
def extract(self, annotation, document):
raise NotImplementedError()
class StandardCitationParser(CitationParser):
def extract(self, annotation, document):
reference = next(self)
citation = '{} {}:{}--{}'.format(
reference,
document,
annotation.start_index,
annotation.end_index,
)
return (reference, citation)
class SimpleCitationParser(CitationParser):
def __init__(self, client, start=1):
super().__init__(client, start)
self.citations = {}
def extract(self, annotation, document):
if document in self.citations:
reference = self.citations[document]
else:
reference = next(self)
self.citations[document] = reference
citation = f'{reference} {document}'
return (reference, citation)
#
#
#
class CitationManager:
# _c_parser = StandardCitationParser
_c_parser = SimpleCitationParser
def __init__(self, annotations, client, start):
self.body = {}
c_parser = self._c_parser(client, start)
citations = []
for c in c_parser(annotations):
self.body[c.text] = c.refn
citations.append(c.cite)
self.citations = list(unique(citations))
def __len__(self):
return len(self.citations)
def __str__(self):
raise NotImplementedError()
def __iter__(self):
raise NotImplementedError()
def replace(self, body):
for i in self:
body = body.replace(*i)
return body
class NumericCitations(CitationManager):
def __str__(self):
return '\n\n{}'.format('\n'.join(self.citations))
def __iter__(self):
for (k, v) in self.body.items():
yield (k, f' {v}')
class NoCitations(CitationManager):
def __str__(self):
return ''
def __iter__(self):
yield from zip(self.body, it.repeat(''))