Spaces:
Runtime error
Runtime error
import pytest | |
from unittest.mock import MagicMock | |
import markdown | |
from bs4 import BeautifulSoup | |
from compliance_checks import ( | |
ComplianceSuite, | |
ModelProviderIdentityCheck, ModelProviderIdentityResult, | |
IntendedPurposeCheck, IntendedPurposeResult, | |
GeneralLimitationsCheck, GeneralLimitationsResult, | |
ComputationalRequirementsCheck, ComputationalRequirementsResult, | |
) | |
expected_infrastructure = """\ | |
Jean Zay Public Supercomputer, provided by the French government.\ | |
Hardware\ | |
384 A100 80GB GPUs (48 nodes)\ | |
Software\ | |
Megatron-DeepSpeed (Github link)\ | |
""" | |
class TestComplianceCheck: | |
def provider_identity_model_card(self): | |
return """ | |
# Model Card for Sample Model | |
Some random info... | |
## Model Details | |
### Model Description | |
<!-- Provide a longer summary of what this model is. --> | |
- **Developed by:** Nima Boscarino | |
- **Model type:** Yada yada yada | |
""" | |
def bad_provider_identity_model_card(self): | |
return """ | |
# Model Card for Sample Model | |
Some random info... | |
## Model Details | |
### Model Description | |
- **Developed by:** [More Information Needed] | |
- **Model type:** Yada yada yada | |
""" | |
def intended_purpose_model_card(self): | |
return """ | |
# Model Card for Sample Model | |
Some random info... | |
## Uses | |
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. --> | |
### Direct Use | |
Here is some info about direct uses... | |
### Downstream Use [optional] | |
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app --> | |
[More Information Needed] | |
### Out-of-Scope Use | |
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. --> | |
Here is some info about out-of-scope uses... | |
## Bias, Risks, and Limitations | |
<!-- This section is meant to convey both technical and sociotechnical limitations. --> | |
[More Information Needed] | |
""" | |
def bad_intended_purpose_model_card(self): | |
return """ | |
# Model Card for Sample Model | |
Some random info... | |
## Uses | |
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. --> | |
### Direct Use | |
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. --> | |
[More Information Needed] | |
### Downstream Use [optional] | |
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app --> | |
[More Information Needed] | |
### Out-of-Scope Use | |
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. --> | |
[More Information Needed] | |
## Bias, Risks, and Limitations | |
<!-- This section is meant to convey both technical and sociotechnical limitations. --> | |
[More Information Needed] | |
""" | |
def general_limitations_model_card(self): | |
return """ | |
# Model Card for Sample Model | |
## Some Random Header | |
## Bias, Risks, and Limitations | |
<!-- This section is meant to convey both technical and sociotechnical limitations. --> | |
Hello world! These are some risks... | |
## More Things | |
""" | |
def bad_general_limitations_model_card(self): | |
return """ | |
# Model Card for Sample Model | |
## Some Random Header | |
## Bias, Risks, and Limitations | |
<!-- This section is meant to convey both technical and sociotechnical limitations. --> | |
[More Information Needed] | |
## More Things | |
""" | |
def computational_requirements_model_card(self): | |
# Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md | |
return """ | |
# Model Card for Sample Model | |
## Some Random Header | |
## Technical Specifications | |
### Compute infrastructure | |
Jean Zay Public Supercomputer, provided by the French government. | |
#### Hardware | |
* 384 A100 80GB GPUs (48 nodes) | |
#### Software | |
* Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed)) | |
</details> | |
## Intended Use | |
Etc.. | |
""" | |
def bad_computational_requirements_model_card(self): | |
# Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md | |
return """ | |
# Model Card for Sample Model | |
## Some Random Header | |
## Technical Specifications | |
### Compute infrastructure | |
[More Information Needed] | |
## Intended Use | |
Etc.. | |
""" | |
def test_run_checks(self, check, card, expected, request): | |
card = request.getfixturevalue(card) | |
model_card_html = markdown.markdown(card) | |
card_soup = BeautifulSoup(model_card_html, features="html.parser") | |
results = check.run_check(card_soup) | |
assert results == expected | |
class TestComplianceSuite: | |
def mock_compliance_check(self): | |
mockComplianceCheck = MagicMock() | |
mockComplianceCheck.run_check = MagicMock(return_value=True) | |
return mockComplianceCheck | |
def empty_compliance_suite(self): | |
return ComplianceSuite( | |
checks=[] | |
) | |
def compliance_suite(self, mock_compliance_check): | |
return ComplianceSuite( | |
checks=[mock_compliance_check] | |
) | |
def empty_compliance_results(self): | |
return [] | |
def compliance_results(self): | |
return [True] | |
def test_create_empty_compliance_suite(self, empty_compliance_suite): | |
assert len(empty_compliance_suite.checks) == 0 | |
def test_create_compliance_suite(self, compliance_suite): | |
assert len(compliance_suite.checks) == 1 | |
def test_run_compliance_suite(self, suite, results, request): | |
suite: ComplianceSuite = request.getfixturevalue(suite) | |
results: list = request.getfixturevalue(results) | |
assert suite.run("") == results | |
for check in suite.checks: | |
check.run_check.assert_called_once() | |
class TestEndToEnd: | |
def test_end_to_end_compliance_suite(self, card, fixture, request): | |
if fixture: | |
card = request.getfixturevalue(card) | |
suite = ComplianceSuite(checks=[ | |
ModelProviderIdentityCheck(), | |
IntendedPurposeCheck(), | |
GeneralLimitationsCheck(), | |
ComputationalRequirementsCheck() | |
]) | |
results = suite.run(card) | |
assert all([r.status for r in results]) | |