Spaces:
Runtime error
Runtime error
File size: 2,738 Bytes
11bd448 25bf2cc 11bd448 25bf2cc 11bd448 b4f5e30 25bf2cc b4f5e30 11bd448 b4f5e30 11bd448 b4f5e30 25bf2cc b4f5e30 25bf2cc 11bd448 b4f5e30 11bd448 b4f5e30 11bd448 b4f5e30 25bf2cc b4f5e30 11bd448 25bf2cc 11bd448 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from abc import ABC, abstractmethod
import markdown
from bs4 import BeautifulSoup, Comment
class ComplianceCheck(ABC):
@abstractmethod
def run_check(self, card: BeautifulSoup):
raise NotImplementedError
class ModelProviderIdentityCheck(ComplianceCheck):
def run_check(self, card: BeautifulSoup):
try:
developed_by = card.find("strong", string="Developed by:")
developer = "".join([str(s) for s in developed_by.next_siblings]).strip()
if developer == "[More Information Needed]":
return False, None
return True, developer
except AttributeError:
return False, None
def walk_to_next_heading(card, heading, heading_text):
stop_at = [heading, f"h{int(heading[1]) - 1}"]
try:
heading_node = card.find(heading, string=heading_text)
content = ""
sibling_gen = heading_node.nextSiblingGenerator()
sibling = next(sibling_gen)
while sibling and (not (sibling.name is not None and sibling.name in stop_at) or sibling.name is None):
if not isinstance(sibling, Comment):
content = content + sibling.text.strip()
sibling = next(sibling_gen, None)
if content.strip() == "[More Information Needed]":
return False, None
return True, content
except AttributeError:
return False, None
class IntendedPurposeCheck(ComplianceCheck):
def run_check(self, card: BeautifulSoup):
direct_use_check, direct_use_content = walk_to_next_heading(card, "h3", "Direct Use")
# TODO: Handle [optional], which doesn't exist in BLOOM, e.g.
downstream_use_check, downstream_use_content = walk_to_next_heading(card, "h3", "Downstream Use [optional]")
out_of_scope_use_check, out_of_scope_use_content = walk_to_next_heading(card, "h3", "Out-of-Scope Use")
return (
direct_use_check and out_of_scope_use_check,
[direct_use_content, downstream_use_content, out_of_scope_use_content]
)
class GeneralLimitationsCheck(ComplianceCheck):
def run_check(self, card: BeautifulSoup):
return walk_to_next_heading(card, "h2", "Bias, Risks, and Limitations")
class ComputationalRequirementsCheck(ComplianceCheck):
def run_check(self, card: BeautifulSoup):
return walk_to_next_heading(card, "h3", "Compute infrastructure")
class ComplianceSuite:
def __init__(self, checks):
self.checks = checks
def run(self, model_card):
model_card_html = markdown.markdown(model_card)
card_soup = BeautifulSoup(model_card_html, features="html.parser")
return [c.run_check(card_soup) for c in self.checks]
|