rookiemango commited on
Commit
1d2a897
·
verified ·
1 Parent(s): dddc1ae

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -68,3 +68,6 @@ gpt_result/lean_random/gpt4/2.jsonl filter=lfs diff=lfs merge=lfs -text
68
  gpt_result/lean_random/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
69
  gpt_result/lean_random/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
70
  gpt_result/lean_random/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
68
  gpt_result/lean_random/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
69
  gpt_result/lean_random/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
70
  gpt_result/lean_random/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text
71
+ data/updated_lean4_kv.json filter=lfs diff=lfs merge=lfs -text
72
+ pass_rate_results/compile_result/lean4_basic_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
73
+ pass_rate_results/compile_result/lean4_random_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -5,3 +5,4 @@
5
  /test/Mathlib/.lake
6
  /test/*.olean
7
  /test/*.olean.tmp
 
 
5
  /test/Mathlib/.lake
6
  /test/*.olean
7
  /test/*.olean.tmp
8
+ pass_rate_results/*/*/*.json
__pycache__/code.cpython-39.pyc CHANGED
Binary files a/__pycache__/code.cpython-39.pyc and b/__pycache__/code.cpython-39.pyc differ
 
compile_result/lean4_basic_test/output.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+
2
+ Pass@1: 0.21311475409836064
3
+ Pass@5: 0.3155737704918033
4
+
5
+
6
+
compile_result/lean4_random_test/output.log ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Traceback (most recent call last):
3
+ File "/opt/tiger/lean4-compile/pass_rate_new_test.py", line 165, in multi
4
+ result = future.result()
5
+ File "/usr/lib/python3.9/concurrent/futures/_base.py", line 435, in result
6
+ self._condition.wait(timeout)
7
+ File "/usr/lib/python3.9/threading.py", line 312, in wait
8
+ waiter.acquire()
9
+ KeyboardInterrupt
10
+
11
+ During handling of the above exception, another exception occurred:
12
+
13
+ Traceback (most recent call last):
14
+ File "/opt/tiger/lean4-compile/pass_rate_new_test.py", line 266, in <module>
15
+ main(args)
16
+ File "/opt/tiger/lean4-compile/pass_rate_new_test.py", line 252, in main
17
+ multi(command_list, args.output_path, args.k)
18
+ File "/opt/tiger/lean4-compile/pass_rate_new_test.py", line 166, in multi
19
+ results.append(result)
20
+ File "/usr/lib/python3.9/concurrent/futures/_base.py", line 628, in __exit__
21
+ self.shutdown(wait=True)
22
+ File "/usr/lib/python3.9/concurrent/futures/thread.py", line 229, in shutdown
23
+ t.join()
24
+ File "/usr/lib/python3.9/threading.py", line 1033, in join
25
+ self._wait_for_tstate_lock()
26
+ File "/usr/lib/python3.9/threading.py", line 1049, in _wait_for_tstate_lock
27
+ elif lock.acquire(block, timeout):
28
+ KeyboardInterrupt
29
+
30
+ Pass@1: 0.14285714285714285
31
+ Pass@5: 0.23949579831932774
32
+
33
+
34
+
data/updated_lean4_kv.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c798624b063042e0cf22453432c9d640a8e72204203b89ee1e40b915838e7dd2
3
+ size 352638720
pass_rate_atp_pass.py CHANGED
@@ -89,9 +89,6 @@ input_path_lists = [
89
  # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
90
  # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
91
  # Add more input paths as needed
92
- "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/5/",
93
- "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/5/",
94
- "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/5/",
95
  ]
96
 
97
  # Iterate through the input paths and run the command
 
89
  # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
90
  # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
91
  # Add more input paths as needed
 
 
 
92
  ]
93
 
94
  # Iterate through the input paths and run the command
pass_rate_multi_pass.py CHANGED
@@ -1,18 +1,10 @@
1
  import pdb
2
  import subprocess
3
  import re
 
4
 
5
- # Output file
6
- output_file = "pass_rate_output.txt"
7
 
8
- # Clearing the output file before appending new content
9
- with open(output_file, "w") as file:
10
- file.write("")
11
 
12
- # List of input paths
13
- input_path_lists = [
14
- "test/zero_shot/wild_test/generation/lean4_random_15k_all/2/1/",
15
- ]
16
 
17
  def get_output(input_string, k):
18
  pattern = r"zero_shot/(\w+)/(.+?)/(\w+)"
@@ -20,7 +12,12 @@ def get_output(input_string, k):
20
  if match:
21
  part1 = match.group(1)
22
  part2 = match.group(3) + f"pass{k}.jsonl"
23
- result = "/".join([part1, part2])
 
 
 
 
 
24
  print(result)
25
  else:
26
  print("No match found.")
@@ -89,24 +86,25 @@ input_path_lists = [
89
  # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
90
  # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
91
  # Add more input paths as needed
92
- "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/5/",
93
- "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/5/",
94
- "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/5/",
95
  ]
96
 
97
  # Iterate through the input paths and run the command
98
  for input_path in input_path_lists:
99
  k = 5
100
- if "wild_test" in input_path or "gsm8k_train" in input_path or "math_train" in input_path:
101
- print(f"wild")
102
- print(f"Running for input path: {input_path}", file=open(output_file, "a"))
103
- command = f"python3 pass_rate_notlean_test.py --input_path {input_path} --output_path {get_output(input_path,k)} --k {k}"
104
- subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
105
- print("\n\n",file=open(output_file, "a"))
106
 
 
 
107
  else:
108
- print(f"lean")
109
- print(f"Running for input path: {input_path}", file=open(output_file, "a"))
110
- command = f"python3 pass_rate_new_test.py --input_path {input_path} --output_path {get_output(input_path, k)} --k {k}"
111
- subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
112
- print("\n\n",file=open(output_file, "a"))
 
 
 
 
1
  import pdb
2
  import subprocess
3
  import re
4
+ import os
5
 
 
 
6
 
 
 
 
7
 
 
 
 
 
8
 
9
  def get_output(input_string, k):
10
  pattern = r"zero_shot/(\w+)/(.+?)/(\w+)"
 
12
  if match:
13
  part1 = match.group(1)
14
  part2 = match.group(3) + f"pass{k}.jsonl"
15
+ result = os.path.join("compile_result", part1, part2)
16
+
17
+ # Create the parent directory if it doesn't exist
18
+ parent_dir = os.path.dirname(result)
19
+ os.makedirs(parent_dir, exist_ok=True)
20
+
21
  print(result)
22
  else:
23
  print("No match found.")
 
86
  # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
87
  # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
88
  # Add more input paths as needed
89
+ "/opt/tiger/formal-align/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k/5",
90
+ "/opt/tiger/formal-align/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k/5"
 
91
  ]
92
 
93
  # Iterate through the input paths and run the command
94
  for input_path in input_path_lists:
95
  k = 5
96
+ output_path = get_output(input_path, k)
97
+ output_log_path = os.path.join(os.path.dirname(output_path), 'output.log')
98
+
 
 
 
99
 
100
+ if "wild_test" in input_path or "gsm8k_train" in input_path or "math_train" in input_path:
101
+ script_name = "pass_rate_notlean_test.py"
102
  else:
103
+ script_name = "pass_rate_new_test.py"
104
+
105
+ command = f"python3 {script_name} --input_path {input_path} --output_path {output_path} --k {k}"
106
+ print(command)
107
+
108
+ with open(output_log_path, "w") as output_file:
109
+ subprocess.run(command, shell=True, stdout=output_file, stderr=subprocess.STDOUT)
110
+ print("\n\n", file=output_file)
pass_rate_new_test.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import subprocess
3
  from argparse import ArgumentParser
4
  import json
5
- from concurrent.futures import ThreadPoolExecutor
6
  from tqdm import tqdm
7
  import glob
8
  import tempfile
@@ -110,7 +110,7 @@ def multi(command_list, output_path, k ):
110
  return filtered_data
111
  result_dict = filter_json(item)
112
  result_dict['results'] = []
113
-
114
  for i, cmd in enumerate(item['cmd']):
115
  temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean") # Ensure unique filenames
116
  with open(temp_file, "w") as f:
@@ -136,16 +136,14 @@ def multi(command_list, output_path, k ):
136
  elif not len(stderr) and "messages" in stdout:
137
  flag = 0
138
  for me in stdout['messages']:
139
- import pdb
140
- pdb.set_trace()
141
  if me['severity'] == 'error':
142
  flag = 1
143
  start_line = me['pos']['line'] - 1
144
- current_column =me['pos']['column'] -1
145
  for line_n in range(start_line - 1, 0 , -1):
146
  line_len = len(cmd.split('\n')[line_n])
147
  current_column += line_len + 1
148
- if not line_len:
149
  break
150
  result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
151
  break
@@ -161,14 +159,21 @@ def multi(command_list, output_path, k ):
161
 
162
  total = len(command_list)
163
 
164
- with ThreadPoolExecutor(max_workers=1) as executor:
 
 
 
 
 
165
  futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
166
- for future in tqdm(futures, total=total, desc="Processing Commands"):
167
- result = future.result()
168
- results.append(result)
169
- # if result['status'] == 'pass':
170
- # passed += 1
171
-
 
 
172
  def calculate_pass(result_list, k):
173
  pass_1_count = 0
174
  pass_k_count = 0
@@ -190,7 +195,7 @@ def multi(command_list, output_path, k ):
190
  pass_k = pass_k_count / len(result_list) if result_list else 0
191
 
192
  return pass_1, pass_k
193
-
194
  pass_1, pass_k = calculate_pass(results, k)
195
  print("Pass@1:", pass_1)
196
  print(f"Pass@{k}:", pass_k)
@@ -199,7 +204,7 @@ def multi(command_list, output_path, k ):
199
  # print(f"total test: {total}")
200
  # print(f"Pass rate: {pass_rate}%")
201
 
202
- output_file = f"pass_rate_results/{output_path}"
203
  # Create the directory if it doesn't exist
204
  os.makedirs(os.path.dirname(output_file), exist_ok=True)
205
 
@@ -211,22 +216,38 @@ def remove_simp_pattern_from_end(s):
211
  pattern = r'@\[simp\s*.*?\]$'
212
  return re.sub(pattern, '', s)
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def main(args):
215
  command_list = []
 
216
  file_pattern = os.path.join(args.input_path, '[0-1]*.json')
217
  for file_path in glob.glob(file_pattern):
218
- with open(file_path, 'r', encoding='utf-8') as rf:
 
219
  for line in rf.readlines():
220
  try:
221
  json_item = json.loads(line)
222
- working_env = json_item['content']['working_file']
223
- # pdb.set_trace()
224
  # statement = json_item['total output'][0]
225
  json_item['cmd'] = []
226
  for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
227
  statement = output.split("#align")[0]
228
  json_item['cmd'].append('\n\n'.join([working_env, statement]))
229
- json_item['answer'] = json_item['content']['statement_poof']
230
  assert len(statement) > 0
231
  # json_item['cmd'] = '\n'.join([working_env, json_item['total output'][0]])
232
  except:
 
2
  import subprocess
3
  from argparse import ArgumentParser
4
  import json
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
  from tqdm import tqdm
7
  import glob
8
  import tempfile
 
110
  return filtered_data
111
  result_dict = filter_json(item)
112
  result_dict['results'] = []
113
+
114
  for i, cmd in enumerate(item['cmd']):
115
  temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean") # Ensure unique filenames
116
  with open(temp_file, "w") as f:
 
136
  elif not len(stderr) and "messages" in stdout:
137
  flag = 0
138
  for me in stdout['messages']:
 
 
139
  if me['severity'] == 'error':
140
  flag = 1
141
  start_line = me['pos']['line'] - 1
142
+ current_column =me['pos']['column'] -1
143
  for line_n in range(start_line - 1, 0 , -1):
144
  line_len = len(cmd.split('\n')[line_n])
145
  current_column += line_len + 1
146
+ if not line_len:
147
  break
148
  result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
149
  break
 
159
 
160
  total = len(command_list)
161
 
162
+
163
+ # Get the number of available CPU cores
164
+ max_workers = os.cpu_count()
165
+
166
+ results = []
167
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
168
  futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
169
+
170
+ with tqdm(total=len(futures), desc="Processing Commands") as progress_bar:
171
+ for future in as_completed(futures):
172
+ result = future.result()
173
+ results.append(result)
174
+ progress_bar.update(1)
175
+
176
+
177
  def calculate_pass(result_list, k):
178
  pass_1_count = 0
179
  pass_k_count = 0
 
195
  pass_k = pass_k_count / len(result_list) if result_list else 0
196
 
197
  return pass_1, pass_k
198
+
199
  pass_1, pass_k = calculate_pass(results, k)
200
  print("Pass@1:", pass_1)
201
  print(f"Pass@{k}:", pass_k)
 
204
  # print(f"total test: {total}")
205
  # print(f"Pass rate: {pass_rate}%")
206
 
207
+ output_file = f"output_path"
208
  # Create the directory if it doesn't exist
209
  os.makedirs(os.path.dirname(output_file), exist_ok=True)
210
 
 
216
  pattern = r'@\[simp\s*.*?\]$'
217
  return re.sub(pattern, '', s)
218
 
219
+ def update_dict(lean_kv):
220
+ update_kv = {}
221
+ for k, v in lean_kv.items():
222
+ # print(k)
223
+ # print(k.split("#align")[0])
224
+ update_kv[k.split("#align")[0]] = v
225
+ # Write the combined data to a new JSON file
226
+ with open('up_lean4_kv.json', 'w') as output_file:
227
+ json.dump(update_kv, output_file, indent=4)
228
+ return update_kv
229
+
230
+
231
+
232
+
233
  def main(args):
234
  command_list = []
235
+ retrieval_path = "data/updated_lean4_kv.json"
236
  file_pattern = os.path.join(args.input_path, '[0-1]*.json')
237
  for file_path in glob.glob(file_pattern):
238
+ with open(file_path, 'r', encoding='utf-8') as rf, open(retrieval_path, 'r', encoding='utf-8') as retrival_kv:
239
+ lean4_kv = json.load(retrival_kv)
240
  for line in rf.readlines():
241
  try:
242
  json_item = json.loads(line)
243
+ # working_env = content']['working_file']
244
+ working_env = lean4_kv[json_item['content']['formal']]
245
  # statement = json_item['total output'][0]
246
  json_item['cmd'] = []
247
  for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
248
  statement = output.split("#align")[0]
249
  json_item['cmd'].append('\n\n'.join([working_env, statement]))
250
+ json_item['answer'] = json_item['content']['formal']
251
  assert len(statement) > 0
252
  # json_item['cmd'] = '\n'.join([working_env, json_item['total output'][0]])
253
  except:
pass_rate_output.txt CHANGED
@@ -1,82 +1 @@
1
- Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/deepseek-math-7b-base/5/
2
-
3
-
4
-
5
- Running for input path: ../auto-info/generate_result/zero_shot/lean4_random_test/generation/deepseek-math-7b-base/5/
6
-
7
-
8
-
9
- Running for input path: ../auto-info/generate_result/zero_shot/wild_test/generation/deepseek-math-7b-base/5/
10
-
11
- total len: 1000
12
- Pass@1: 0.0
13
- Pass@5: 0.0
14
-
15
-
16
-
17
- Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/deepseek-math-7b-instruct/5/
18
-
19
- total len: 981
20
- Pass@1: 0.004077471967380225
21
- Pass@5: 0.029561671763506627
22
-
23
-
24
-
25
- Running for input path: ../auto-info/generate_result/zero_shot/lean4_random_test/generation/deepseek-math-7b-instruct/5/
26
-
27
- total len: 970
28
- Pass@1: 0.002061855670103093
29
- Pass@5: 0.016494845360824743
30
-
31
-
32
-
33
- Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/llemma_7b/5/
34
-
35
- total len: 981
36
- Pass@1: 0.0010193679918450561
37
- Pass@5: 0.004077471967380225
38
-
39
-
40
-
41
- Running for input path: ../auto-info/generate_result/zero_shot/lean4_random_test/generation/llemma_7b/5/
42
-
43
- total len: 970
44
- Pass@1: 0.0
45
- Pass@5: 0.007216494845360825
46
-
47
-
48
-
49
- Running for input path: ../auto-info/generate_result/zero_shot/wild_test/generation/llemma_7b/5/
50
-
51
- total len: 1000
52
- Pass@1: 0.0
53
- Pass@5: 0.0
54
-
55
-
56
-
57
- Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/llemma_34b/5/
58
-
59
- total len: 981
60
- Pass@1: 0.0
61
- Pass@5: 0.0
62
-
63
-
64
-
65
- Running for input path: ../auto-info/generate_result/zero_shot/lean4_random_test/generation/llemma_34b/5/
66
-
67
- total len: 970
68
- Pass@1: 0.0
69
- Pass@5: 0.0010309278350515464
70
-
71
-
72
-
73
- Running for input path: ../auto-info/generate_result/zero_shot/wild_test/generation/llemma_34b/5/
74
-
75
- total len: 1000
76
- Pass@1: 0.0
77
- Pass@5: 0.0
78
-
79
-
80
-
81
- Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/internlm2-math-7b/5/
82
-
 
1
+ Running for input path: /opt/tiger/formal-align/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k/5/opt/tiger/formal-align/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k/5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pass_rate_results/compile_result/lean4_basic_test/lean4_random_15kpass5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4040c382e0ee3c796951b7c60b55f91616ea8653c1730c75978c3464d9562ecc
3
+ size 12184988
pass_rate_results/compile_result/lean4_random_test/lean4_random_15kpass5.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba08a0aea4c769c9f16378f48121a50f7df9379fec11518773896607a9ad1a0
3
+ size 55140396
update_lean4_kv.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ from argparse import ArgumentParser
4
+ import json
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from tqdm import tqdm
7
+ import glob
8
+ import tempfile
9
+ import random
10
+
11
+ def wrapped_function(item):
12
+ results = []
13
+ passed = 0
14
+ total = 0
15
+
16
+ temp_dir = tempfile.gettempdir()
17
+ temp_file = os.path.join(temp_dir, f"test.lean")
18
+
19
+ with open(temp_file, "w") as f:
20
+ f.write(item['cmd'])
21
+
22
+ # Rest of the function code...
23
+ # Process the item using the temporary file
24
+ # ...
25
+
26
+ # Clean up the temporary file
27
+ data = '{"path": "%s", "allTactics": true}' %(temp_file)
28
+ command = 'echo \'%s\' | lake exe repl' % data
29
+
30
+ try:
31
+ result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
32
+ stdout = result.stdout.decode('utf-8')
33
+ stderr = result.stderr.decode('utf-8')
34
+ # stdout = result.stdout.decode('utf-8')
35
+ json_stdout = json.loads(stdout)
36
+ if "messages" not in json_stdout.keys():
37
+ passed += 1
38
+ # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
39
+ results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
40
+ except subprocess.CalledProcessError as e:
41
+ # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
42
+ results.append({ 'error': str(e), 'status': 'nopass'})
43
+ total += 1
44
+
45
+ pass_rate = passed / (passed + total) * 100
46
+
47
+
48
+ return {'results': results, 'pass_rate': pass_rate}
49
+
50
+ # Set the directory where your .lean files are located
51
+
52
+ # Get a list of all .lean files in the directory
53
+ # lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
54
+ # lean_files = ["test/file.lean"]
55
+ def single(command_list, args):
56
+ results = []
57
+ passed = 0
58
+ total = 0
59
+ for item in tqdm(command_list):
60
+ with open("test/test.lean", "w", encoding = 'utf-8') as f:
61
+ f.write(item['cmd'])
62
+ data = '{"path": "test/test.lean", "allTactics": true}'
63
+ # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
64
+ command = 'echo \'%s\' | lake exe repl' % data
65
+ try:
66
+ # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
67
+ # stderr=subprocess.PIPE)
68
+ # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
69
+ # stdout = stdout.decode('utf-8')
70
+ result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
71
+ stdout = result.stdout.decode('utf-8')
72
+ json_stdout = json.loads(stdout)
73
+ if "messages" not in json_stdout.keys():
74
+ passed += 1
75
+ stderr = result.stderr.decode('utf-8')
76
+ results.append({
77
+ # 'item': item['content'],
78
+ 'stdout': stdout,
79
+ 'stderr': stderr,
80
+ 'status': 'pass'
81
+ })
82
+ except subprocess.CalledProcessError as e:
83
+ results.append({
84
+ # 'item': item['content'],
85
+ 'error': str(e),
86
+ 'status': 'nopass'
87
+ })
88
+ total += 1
89
+
90
+ # Calculate pass rate
91
+ pass_rate = passed / total * 100
92
+ print(pass_rate)
93
+
94
+ # Save results to a JSON file
95
+ with open('results.json', 'w') as f:
96
+ json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
97
+
98
+
99
+ def multi(command_list, output_path, k ):
100
+ results = []
101
+ passed = 0
102
+ total = 0
103
+ def execute_command(item, index):
104
+ temp_dir = '/opt/jianqiao'
105
+ def filter_json(json_data):
106
+ filtered_data = {}
107
+ for key in json_data.keys():
108
+ if key in ['question', 'answer', 'total output', 'results']:
109
+ filtered_data[key] = json_data[key]
110
+ return filtered_data
111
+ result_dict = filter_json(item)
112
+ result_dict['results'] = []
113
+
114
+ for i, cmd in enumerate(item['cmd']):
115
+ temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean") # Ensure unique filenames
116
+ with open(temp_file, "w") as f:
117
+ f.write(cmd)
118
+
119
+ data = '{"path": "%s", "allTactics": true}' % temp_file
120
+ command = f'echo \'{data}\' | lake exe repl'
121
+
122
+ try:
123
+ result = subprocess.run(command, shell=True, check=True,timeout=600, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
124
+ stdout = json.loads(result.stdout.decode('utf-8'))
125
+ stderr = result.stderr.decode('utf-8')
126
+
127
+ except subprocess.TimeoutExpired as e:
128
+ result_item = {'error': str(e), 'status': 'nopass_limit'}
129
+
130
+ except subprocess.CalledProcessError as e:
131
+ result_item = {'error': str(e), 'status': 'nopass_error'}
132
+
133
+ else:
134
+ if "messages" not in stdout and not len(stderr):
135
+ result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
136
+ elif not len(stderr) and "messages" in stdout:
137
+ flag = 0
138
+ for me in stdout['messages']:
139
+ import pdb
140
+ pdb.set_trace()
141
+ if me['severity'] == 'error':
142
+ flag = 1
143
+ start_line = me['pos']['line'] - 1
144
+ current_column =me['pos']['column'] -1
145
+ for line_n in range(start_line - 1, 0 , -1):
146
+ line_len = len(cmd.split('\n')[line_n])
147
+ current_column += line_len + 1
148
+ if not line_len:
149
+ break
150
+ result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
151
+ break
152
+ if not flag :
153
+ result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
154
+ else:
155
+ assert len(stderr)
156
+ result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
157
+
158
+ result_dict['results'].append(result_item)
159
+ return result_dict
160
+
161
+
162
+ total = len(command_list)
163
+
164
+ with ThreadPoolExecutor(max_workers=1) as executor:
165
+ futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
166
+ for future in tqdm(futures, total=total, desc="Processing Commands"):
167
+ result = future.result()
168
+ results.append(result)
169
+ # if result['status'] == 'pass':
170
+ # passed += 1
171
+
172
+ def calculate_pass(result_list, k):
173
+ pass_1_count = 0
174
+ pass_k_count = 0
175
+
176
+ for result in result_list:
177
+ results = result.get('results', [])
178
+ if results:
179
+ for j in range(min(1, len(results))):
180
+ if results[j].get('status') == 'pass':
181
+ pass_1_count += 1
182
+ break
183
+
184
+ for j in range(min(k, len(results))):
185
+ if results[j].get('status') == 'pass':
186
+ pass_k_count += 1
187
+ break
188
+
189
+ pass_1 = pass_1_count / len(result_list) if result_list else 0
190
+ pass_k = pass_k_count / len(result_list) if result_list else 0
191
+
192
+ return pass_1, pass_k
193
+
194
+ pass_1, pass_k = calculate_pass(results, k)
195
+ print("Pass@1:", pass_1)
196
+ print(f"Pass@{k}:", pass_k)
197
+
198
+ # pass_rate = (passed / total) * 100
199
+ # print(f"total test: {total}")
200
+ # print(f"Pass rate: {pass_rate}%")
201
+
202
+ output_file = f"pass_rate_results/{output_path}"
203
+ # Create the directory if it doesn't exist
204
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
205
+
206
+ with open(f"{output_file}", 'w') as f:
207
+ json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
208
+
209
+ import re
210
+ def remove_simp_pattern_from_end(s):
211
+ pattern = r'@\[simp\s*.*?\]$'
212
+ return re.sub(pattern, '', s)
213
+
214
+ def update_dict(lean_kv):
215
+ update_kv = {}
216
+ for k, v in lean_kv.items():
217
+ # print(k)
218
+ # print(k.split("#align")[0])
219
+ update_kv[k.split("#align")[0]] = v
220
+ # Write the combined data to a new JSON file
221
+ with open('up_lean4_kv.json', 'w') as output_file:
222
+ json.dump(update_kv, output_file, indent=4)
223
+ return update_kv
224
+
225
+
226
+ def find_key(lean_kv):
227
+ update_kv = {}
228
+ for k, v in lean_kv.items():
229
+ if "theorem odd_sub" in k:
230
+ import pdb
231
+ pdb.set_trace()
232
+
233
+
234
+ import os
235
+ import json
236
+ import tqdm
237
+
238
+ def main(args):
239
+ # Define the directory and file names
240
+ file_dir = '../formal-align/data'
241
+ file_names = ['FormL4_basic_test.json', 'FormL4_random_test.json', 'FormL4_train.json']
242
+
243
+ # Initialize a list to store all items from JSON files
244
+ all_items = []
245
+
246
+ # Loop through each file and load its content
247
+ for file_name in file_names:
248
+ file_path = os.path.join(file_dir, file_name)
249
+
250
+ with open(file_path, 'r', encoding='utf-8') as file:
251
+ data = json.load(file)
252
+ if isinstance(data, list):
253
+ all_items.extend(data) # Assuming each file contains a list of items
254
+ else:
255
+ all_items.append(data) # If the file contains a single item
256
+
257
+ # Load the original key-value pairs from the file
258
+ filename = 'data/lean4_kv.json'
259
+ with open(filename, 'r') as file:
260
+ original_kv = json.load(file)
261
+
262
+ new_kv = {}
263
+
264
+ # Iterate over each item in all_items and update the new_kv dictionary
265
+ for item in tqdm.tqdm(all_items):
266
+ formal = item['formal']
267
+ for k, v in list(original_kv.items()):
268
+ if formal in k:
269
+ new_kv[formal] = v
270
+ del original_kv[k]
271
+ break # Exit the inner loop since a match is found
272
+
273
+ # Save the updated key-value pairs to a new file
274
+ output_filename = 'data/updated_lean4_kv.json'
275
+ with open(output_filename, 'w') as file:
276
+ json.dump(new_kv, file, indent=4)
277
+
278
+ print(f"Updated key-value pairs saved to: {output_filename}")
279
+
280
+ if __name__ == '__main__':
281
+ arg_parser = ArgumentParser()
282
+ arg_parser.add_argument('--data_path', type=str,
283
+ default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
284
+ arg_parser.add_argument('--input_path', type=str, default='')
285
+ arg_parser.add_argument('--cuda_num', type=int, default=8)
286
+ arg_parser.add_argument('--k', type=int, default=1)
287
+ arg_parser.add_argument('--output_path', type=str, default='total.json')
288
+ arg_parser.add_argument('--generate_method', type=str,
289
+ choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
290
+ arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
291
+ args = arg_parser.parse_args()
292
+ main(args)
293
+
294
+
295
+