NimaBoscarino commited on
Commit
e814211
·
1 Parent(s): 93711b8

WIP: Split up unit tests to their own files

Browse files
requirements.txt CHANGED
@@ -1,7 +1,4 @@
1
- # fastapi
2
- # uvicorn
3
  markdown
4
  beautifulsoup4
5
- # tabulate
6
- # pytest
7
- gradio
 
 
 
1
  markdown
2
  beautifulsoup4
3
+ pytest
4
+ gradio
 
tests/test_compliance_checks.py CHANGED
@@ -1,241 +1,14 @@
1
  import pytest
2
  from unittest.mock import MagicMock
3
 
4
- import markdown
5
- from bs4 import BeautifulSoup
6
  from compliance_checks import (
7
  ComplianceSuite,
8
- ModelProviderIdentityCheck, ModelProviderIdentityResult,
9
- IntendedPurposeCheck, IntendedPurposeResult,
10
- GeneralLimitationsCheck, GeneralLimitationsResult,
11
- ComputationalRequirementsCheck, ComputationalRequirementsResult,
12
  )
13
 
14
 
15
- expected_infrastructure = """\
16
- Jean Zay Public Supercomputer, provided by the French government.\
17
- Hardware\
18
- 384 A100 80GB GPUs (48 nodes)\
19
- Software\
20
- Megatron-DeepSpeed (Github link)\
21
- """
22
-
23
-
24
- class TestComplianceCheck:
25
- @pytest.fixture
26
- def provider_identity_model_card(self):
27
- return """
28
- # Model Card for Sample Model
29
-
30
- Some random info...
31
-
32
- ## Model Details
33
-
34
- ### Model Description
35
-
36
- <!-- Provide a longer summary of what this model is. -->
37
-
38
- - **Developed by:** Nima Boscarino
39
- - **Model type:** Yada yada yada
40
- """
41
-
42
- @pytest.fixture
43
- def bad_provider_identity_model_card(self):
44
- return """
45
- # Model Card for Sample Model
46
-
47
- Some random info...
48
-
49
- ## Model Details
50
-
51
- ### Model Description
52
-
53
- - **Developed by:** [More Information Needed]
54
- - **Model type:** Yada yada yada
55
- """
56
-
57
- @pytest.fixture
58
- def intended_purpose_model_card(self):
59
- return """
60
- # Model Card for Sample Model
61
-
62
- Some random info...
63
-
64
- ## Uses
65
-
66
- <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
67
-
68
- ### Direct Use
69
-
70
- Here is some info about direct uses...
71
-
72
- ### Downstream Use [optional]
73
-
74
- <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
75
-
76
- [More Information Needed]
77
-
78
- ### Out-of-Scope Use
79
-
80
- <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
81
-
82
- Here is some info about out-of-scope uses...
83
-
84
- ## Bias, Risks, and Limitations
85
-
86
- <!-- This section is meant to convey both technical and sociotechnical limitations. -->
87
-
88
- [More Information Needed]
89
- """
90
-
91
- @pytest.fixture
92
- def bad_intended_purpose_model_card(self):
93
- return """
94
- # Model Card for Sample Model
95
-
96
- Some random info...
97
-
98
- ## Uses
99
-
100
- <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
101
-
102
- ### Direct Use
103
-
104
- <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
105
-
106
- [More Information Needed]
107
-
108
- ### Downstream Use [optional]
109
-
110
- <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
111
-
112
- [More Information Needed]
113
-
114
- ### Out-of-Scope Use
115
-
116
- <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
117
-
118
- [More Information Needed]
119
-
120
- ## Bias, Risks, and Limitations
121
-
122
- <!-- This section is meant to convey both technical and sociotechnical limitations. -->
123
-
124
- [More Information Needed]
125
- """
126
-
127
- @pytest.fixture
128
- def general_limitations_model_card(self):
129
- return """
130
- # Model Card for Sample Model
131
-
132
- ## Some Random Header
133
-
134
- ## Bias, Risks, and Limitations
135
-
136
- <!-- This section is meant to convey both technical and sociotechnical limitations. -->
137
-
138
- Hello world! These are some risks...
139
-
140
- ## More Things
141
- """
142
-
143
- @pytest.fixture
144
- def bad_general_limitations_model_card(self):
145
- return """
146
- # Model Card for Sample Model
147
-
148
- ## Some Random Header
149
-
150
- ## Bias, Risks, and Limitations
151
-
152
- <!-- This section is meant to convey both technical and sociotechnical limitations. -->
153
-
154
- [More Information Needed]
155
-
156
- ## More Things
157
- """
158
-
159
- @pytest.fixture
160
- def computational_requirements_model_card(self):
161
- # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
162
- return """
163
- # Model Card for Sample Model
164
-
165
- ## Some Random Header
166
-
167
- ## Technical Specifications
168
-
169
- ### Compute infrastructure
170
- Jean Zay Public Supercomputer, provided by the French government.
171
-
172
- #### Hardware
173
-
174
- * 384 A100 80GB GPUs (48 nodes)
175
-
176
- #### Software
177
-
178
- * Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed))
179
- </details>
180
-
181
- ## Intended Use
182
-
183
- Etc..
184
- """
185
-
186
- @pytest.fixture
187
- def bad_computational_requirements_model_card(self):
188
- # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
189
- return """
190
- # Model Card for Sample Model
191
-
192
- ## Some Random Header
193
-
194
- ## Technical Specifications
195
-
196
- ### Compute infrastructure
197
- [More Information Needed]
198
-
199
- ## Intended Use
200
-
201
- Etc..
202
- """
203
-
204
- @pytest.mark.parametrize("check,card,expected", [
205
- (ModelProviderIdentityCheck(), "provider_identity_model_card", ModelProviderIdentityResult(
206
- status=True,
207
- provider="Nima Boscarino",
208
- )),
209
- (ModelProviderIdentityCheck(), "bad_provider_identity_model_card", ModelProviderIdentityResult()),
210
- (IntendedPurposeCheck(), "intended_purpose_model_card", IntendedPurposeResult(
211
- status=True,
212
- direct_use="Here is some info about direct uses...",
213
- downstream_use=None,
214
- out_of_scope_use="Here is some info about out-of-scope uses...",
215
- )),
216
- (IntendedPurposeCheck(), "bad_intended_purpose_model_card", IntendedPurposeResult()),
217
- (GeneralLimitationsCheck(), "general_limitations_model_card", GeneralLimitationsResult(
218
- status=True,
219
- limitations="Hello world! These are some risks..."
220
- )),
221
- (GeneralLimitationsCheck(), "bad_general_limitations_model_card", GeneralLimitationsResult()),
222
- (ComputationalRequirementsCheck(), "computational_requirements_model_card", ComputationalRequirementsResult(
223
- status=True,
224
- requirements=expected_infrastructure,
225
- )),
226
- (ComputationalRequirementsCheck(), "bad_computational_requirements_model_card", ComputationalRequirementsResult()),
227
- ])
228
- def test_run_checks(self, check, card, expected, request):
229
- card = request.getfixturevalue(card)
230
-
231
- model_card_html = markdown.markdown(card)
232
- card_soup = BeautifulSoup(model_card_html, features="html.parser")
233
-
234
- results = check.run_check(card_soup)
235
-
236
- assert results == expected
237
-
238
-
239
  class TestComplianceSuite:
240
  @pytest.fixture
241
  def mock_compliance_check(self):
 
1
  import pytest
2
  from unittest.mock import MagicMock
3
 
 
 
4
  from compliance_checks import (
5
  ComplianceSuite,
6
+ IntendedPurposeCheck,
7
+ GeneralLimitationsCheck,
8
+ ComputationalRequirementsCheck,
 
9
  )
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  class TestComplianceSuite:
13
  @pytest.fixture
14
  def mock_compliance_check(self):
tests/test_computational_requirements_check.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ import markdown
4
+ from bs4 import BeautifulSoup
5
+ from compliance_checks import (
6
+ ComputationalRequirementsCheck, ComputationalRequirementsResult,
7
+ )
8
+
9
+
10
+ expected_infrastructure = """\
11
+ Jean Zay Public Supercomputer, provided by the French government.\
12
+ Hardware\
13
+ 384 A100 80GB GPUs (48 nodes)\
14
+ Software\
15
+ Megatron-DeepSpeed (Github link)\
16
+ """
17
+
18
+
19
+ @pytest.fixture
20
+ def computational_requirements_model_card():
21
+ # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
22
+ return """
23
+ # Model Card for Sample Model
24
+
25
+ ## Some Random Header
26
+
27
+ ## Technical Specifications
28
+
29
+ ### Compute infrastructure
30
+ Jean Zay Public Supercomputer, provided by the French government.
31
+
32
+ #### Hardware
33
+
34
+ * 384 A100 80GB GPUs (48 nodes)
35
+
36
+ #### Software
37
+
38
+ * Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed))
39
+ </details>
40
+
41
+ ## Intended Use
42
+
43
+ Etc..
44
+ """
45
+
46
+
47
+ @pytest.fixture
48
+ def bad_computational_requirements_model_card():
49
+ # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
50
+ return """
51
+ # Model Card for Sample Model
52
+
53
+ ## Some Random Header
54
+
55
+ ## Technical Specifications
56
+
57
+ ### Compute infrastructure
58
+ [More Information Needed]
59
+
60
+ ## Intended Use
61
+
62
+ Etc..
63
+ """
64
+
65
+
66
+ @pytest.mark.parametrize("check,card,expected", [
67
+ (ComputationalRequirementsCheck(), "computational_requirements_model_card", ComputationalRequirementsResult(
68
+ status=True,
69
+ requirements=expected_infrastructure,
70
+ )),
71
+ (ComputationalRequirementsCheck(), "bad_computational_requirements_model_card", ComputationalRequirementsResult()),
72
+ ])
73
+ def test_run_checks(check, card, expected, request):
74
+ card = request.getfixturevalue(card)
75
+
76
+ model_card_html = markdown.markdown(card)
77
+ card_soup = BeautifulSoup(model_card_html, features="html.parser")
78
+
79
+ results = check.run_check(card_soup)
80
+
81
+ assert results == expected
tests/test_general_limitations_check.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ import markdown
4
+ from bs4 import BeautifulSoup
5
+ from compliance_checks import (
6
+ GeneralLimitationsCheck, GeneralLimitationsResult,
7
+ )
8
+
9
+
10
+ @pytest.fixture
11
+ def general_limitations_model_card():
12
+ return """
13
+ # Model Card for Sample Model
14
+
15
+ ## Some Random Header
16
+
17
+ ## Bias, Risks, and Limitations
18
+
19
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
20
+
21
+ Hello world! These are some risks...
22
+
23
+ ## More Things
24
+ """
25
+
26
+
27
+ @pytest.fixture
28
+ def bad_general_limitations_model_card():
29
+ return """
30
+ # Model Card for Sample Model
31
+
32
+ ## Some Random Header
33
+
34
+ ## Bias, Risks, and Limitations
35
+
36
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
37
+
38
+ [More Information Needed]
39
+
40
+ ## More Things
41
+ """
42
+
43
+
44
+ @pytest.mark.parametrize("check,card,expected", [
45
+ (GeneralLimitationsCheck(), "general_limitations_model_card", GeneralLimitationsResult(
46
+ status=True,
47
+ limitations="Hello world! These are some risks..."
48
+ )),
49
+ (GeneralLimitationsCheck(), "bad_general_limitations_model_card", GeneralLimitationsResult()),
50
+ ])
51
+ def test_run_checks(check, card, expected, request):
52
+ card = request.getfixturevalue(card)
53
+
54
+ model_card_html = markdown.markdown(card)
55
+ card_soup = BeautifulSoup(model_card_html, features="html.parser")
56
+
57
+ results = check.run_check(card_soup)
58
+
59
+ assert results == expected
tests/test_intended_purpose_check.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ import markdown
4
+ from bs4 import BeautifulSoup
5
+ from compliance_checks import (
6
+ IntendedPurposeCheck, IntendedPurposeResult,
7
+ )
8
+
9
+
10
+ @pytest.fixture
11
+ def intended_purpose_model_card():
12
+ return """
13
+ # Model Card for Sample Model
14
+
15
+ Some random info...
16
+
17
+ ## Uses
18
+
19
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
20
+
21
+ ### Direct Use
22
+
23
+ Here is some info about direct uses...
24
+
25
+ ### Downstream Use [optional]
26
+
27
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
28
+
29
+ [More Information Needed]
30
+
31
+ ### Out-of-Scope Use
32
+
33
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
34
+
35
+ Here is some info about out-of-scope uses...
36
+
37
+ ## Bias, Risks, and Limitations
38
+
39
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
40
+
41
+ [More Information Needed]
42
+ """
43
+
44
+
45
+ @pytest.fixture
46
+ def bad_intended_purpose_model_card():
47
+ return """
48
+ # Model Card for Sample Model
49
+
50
+ Some random info...
51
+
52
+ ## Uses
53
+
54
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
55
+
56
+ ### Direct Use
57
+
58
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
59
+
60
+ [More Information Needed]
61
+
62
+ ### Downstream Use [optional]
63
+
64
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
65
+
66
+ [More Information Needed]
67
+
68
+ ### Out-of-Scope Use
69
+
70
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
71
+
72
+ [More Information Needed]
73
+
74
+ ## Bias, Risks, and Limitations
75
+
76
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
77
+
78
+ [More Information Needed]
79
+ """
80
+
81
+
82
+ @pytest.mark.parametrize("check,card,expected", [
83
+ (IntendedPurposeCheck(), "intended_purpose_model_card", IntendedPurposeResult(
84
+ status=True,
85
+ direct_use="Here is some info about direct uses...",
86
+ downstream_use=None,
87
+ out_of_scope_use="Here is some info about out-of-scope uses...",
88
+ )),
89
+ (IntendedPurposeCheck(), "bad_intended_purpose_model_card", IntendedPurposeResult()),
90
+ ])
91
+ def test_run_checks(check, card, expected, request):
92
+ card = request.getfixturevalue(card)
93
+
94
+ model_card_html = markdown.markdown(card)
95
+ card_soup = BeautifulSoup(model_card_html, features="html.parser")
96
+
97
+ results = check.run_check(card_soup)
98
+
99
+ assert results == expected