rookiemango
/

lean4-compile

Model card Files Files and versions Community

rookiemango commited on Sep 23, 2024

Commit

1d2a897

verified ·

1 Parent(s): dddc1ae

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.gitattributes +3 -0
.gitignore +1 -0
__pycache__/code.cpython-39.pyc +0 -0
compile_result/lean4_basic_test/output.log +6 -0
compile_result/lean4_random_test/output.log +34 -0
data/updated_lean4_kv.json +3 -0
pass_rate_atp_pass.py +0 -3
pass_rate_multi_pass.py +22 -24
pass_rate_new_test.py +40 -19
pass_rate_output.txt +1 -82
pass_rate_results/compile_result/lean4_basic_test/lean4_random_15kpass5.jsonl +3 -0
pass_rate_results/compile_result/lean4_random_test/lean4_random_15kpass5.jsonl +3 -0
update_lean4_kv.py +295 -0

.gitattributes CHANGED Viewed

@@ -68,3 +68,6 @@ gpt_result/lean_random/gpt4/2.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text

 gpt_result/lean_random/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text
+data/updated_lean4_kv.json filter=lfs diff=lfs merge=lfs -text
+pass_rate_results/compile_result/lean4_basic_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
+pass_rate_results/compile_result/lean4_random_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -5,3 +5,4 @@
 /test/Mathlib/.lake
 /test/*.olean
 /test/*.olean.tmp

 /test/Mathlib/.lake
 /test/*.olean
 /test/*.olean.tmp
+pass_rate_results/*/*/*.json

__pycache__/code.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/code.cpython-39.pyc and b/__pycache__/code.cpython-39.pyc differ

compile_result/lean4_basic_test/output.log ADDED Viewed

	@@ -0,0 +1,6 @@


1	+
2	+ Pass@1: 0.21311475409836064
3	+ Pass@5: 0.3155737704918033
4	+
5	+
6	+

compile_result/lean4_random_test/output.log ADDED Viewed

	@@ -0,0 +1,34 @@

+Traceback (most recent call last):
+  File "/opt/tiger/lean4-compile/pass_rate_new_test.py", line 165, in multi
+    result = future.result()
+  File "/usr/lib/python3.9/concurrent/futures/_base.py", line 435, in result
+    self._condition.wait(timeout)
+  File "/usr/lib/python3.9/threading.py", line 312, in wait
+    waiter.acquire()
+KeyboardInterrupt
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "/opt/tiger/lean4-compile/pass_rate_new_test.py", line 266, in <module>
+    main(args)
+  File "/opt/tiger/lean4-compile/pass_rate_new_test.py", line 252, in main
+    multi(command_list, args.output_path, args.k)
+  File "/opt/tiger/lean4-compile/pass_rate_new_test.py", line 166, in multi
+    results.append(result)
+  File "/usr/lib/python3.9/concurrent/futures/_base.py", line 628, in __exit__
+    self.shutdown(wait=True)
+  File "/usr/lib/python3.9/concurrent/futures/thread.py", line 229, in shutdown
+    t.join()
+  File "/usr/lib/python3.9/threading.py", line 1033, in join
+    self._wait_for_tstate_lock()
+  File "/usr/lib/python3.9/threading.py", line 1049, in _wait_for_tstate_lock
+    elif lock.acquire(block, timeout):
+KeyboardInterrupt
+Pass@1: 0.14285714285714285
+Pass@5: 0.23949579831932774

data/updated_lean4_kv.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c798624b063042e0cf22453432c9d640a8e72204203b89ee1e40b915838e7dd2
+size 352638720

pass_rate_atp_pass.py CHANGED Viewed

@@ -89,9 +89,6 @@ input_path_lists = [
     # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
     # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
     # Add more input paths as needed
-    "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/5/",
-    "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/5/",
-    "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/5/",
 ]
 # Iterate through the input paths and run the command

     # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
     # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
     # Add more input paths as needed
 ]
 # Iterate through the input paths and run the command

pass_rate_multi_pass.py CHANGED Viewed

@@ -1,18 +1,10 @@
 import pdb
 import subprocess
 import re
-# Output file
-output_file = "pass_rate_output.txt"
-# Clearing the output file before appending new content
-with open(output_file, "w") as file:
-    file.write("")
-# List of input paths
-input_path_lists = [
-    "test/zero_shot/wild_test/generation/lean4_random_15k_all/2/1/",
-]
 def get_output(input_string, k):
     pattern = r"zero_shot/(\w+)/(.+?)/(\w+)"
@@ -20,7 +12,12 @@ def get_output(input_string, k):
     if match:
         part1 = match.group(1)
         part2 = match.group(3) + f"pass{k}.jsonl"
-        result = "/".join([part1, part2])
         print(result)
     else:
         print("No match found.")
@@ -89,24 +86,25 @@ input_path_lists = [
     # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
     # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
     # Add more input paths as needed
-    "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/5/",
-    "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/5/",
-    "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/5/",
 ]
 # Iterate through the input paths and run the command
 for input_path in input_path_lists:
     k = 5
-    if "wild_test" in input_path or "gsm8k_train" in input_path or "math_train" in input_path:
-        print(f"wild")
-        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
-        command = f"python3 pass_rate_notlean_test.py --input_path {input_path} --output_path {get_output(input_path,k)}  --k {k}"
-        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
-        print("\n\n",file=open(output_file, "a"))
     else:
-        print(f"lean")
-        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
-        command = f"python3 pass_rate_new_test.py --input_path {input_path} --output_path {get_output(input_path, k)} --k {k}"
-        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
-        print("\n\n",file=open(output_file, "a"))

 import pdb
 import subprocess
 import re
+import os
 def get_output(input_string, k):
     pattern = r"zero_shot/(\w+)/(.+?)/(\w+)"
     if match:
         part1 = match.group(1)
         part2 = match.group(3) + f"pass{k}.jsonl"
+        result = os.path.join("compile_result", part1, part2)
+        # Create the parent directory if it doesn't exist
+        parent_dir = os.path.dirname(result)
+        os.makedirs(parent_dir, exist_ok=True)
         print(result)
     else:
         print("No match found.")
     # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
     # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
     # Add more input paths as needed
+    "/opt/tiger/formal-align/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k/5",
+    "/opt/tiger/formal-align/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k/5"
 ]
 # Iterate through the input paths and run the command
 for input_path in input_path_lists:
     k = 5
+    output_path = get_output(input_path, k)
+    output_log_path = os.path.join(os.path.dirname(output_path), 'output.log')
+    if "wild_test" in input_path or "gsm8k_train" in input_path or "math_train" in input_path:
+        script_name = "pass_rate_notlean_test.py"
     else:
+        script_name = "pass_rate_new_test.py"
+    command = f"python3 {script_name} --input_path {input_path} --output_path {output_path} --k {k}"
+    print(command)
+    with open(output_log_path, "w") as output_file:
+        subprocess.run(command, shell=True, stdout=output_file, stderr=subprocess.STDOUT)
+        print("\n\n", file=output_file)

pass_rate_new_test.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import subprocess
 from argparse import ArgumentParser
 import json
-from concurrent.futures import ThreadPoolExecutor
 from tqdm import tqdm
 import glob
 import tempfile
@@ -110,7 +110,7 @@ def multi(command_list, output_path, k ):
             return filtered_data
         result_dict = filter_json(item)
         result_dict['results'] = []
         for i, cmd in enumerate(item['cmd']):
             temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
             with open(temp_file, "w") as f:
@@ -136,16 +136,14 @@ def multi(command_list, output_path, k ):
                 elif not len(stderr) and "messages" in stdout:
                     flag = 0
                     for me in stdout['messages']:
-                        import pdb
-                        pdb.set_trace()
                         if me['severity'] == 'error':
                             flag = 1
                             start_line = me['pos']['line'] - 1
-                            current_column =me['pos']['column'] -1
                             for line_n in range(start_line - 1, 0 , -1):
                                 line_len = len(cmd.split('\n')[line_n])
                                 current_column  += line_len + 1
-                                if not line_len:
                                     break
                             result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
                             break
@@ -161,14 +159,21 @@ def multi(command_list, output_path, k ):
     total = len(command_list)
-    with ThreadPoolExecutor(max_workers=1) as executor:
         futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
-        for future in tqdm(futures, total=total, desc="Processing Commands"):
-            result = future.result()
-            results.append(result)
-            # if result['status'] == 'pass':
-            #     passed += 1
     def calculate_pass(result_list, k):
         pass_1_count = 0
         pass_k_count = 0
@@ -190,7 +195,7 @@ def multi(command_list, output_path, k ):
         pass_k = pass_k_count / len(result_list) if result_list else 0
         return pass_1, pass_k
     pass_1, pass_k = calculate_pass(results, k)
     print("Pass@1:", pass_1)
     print(f"Pass@{k}:", pass_k)
@@ -199,7 +204,7 @@ def multi(command_list, output_path, k ):
     # print(f"total test: {total}")
     # print(f"Pass rate: {pass_rate}%")
-    output_file = f"pass_rate_results/{output_path}"
     # Create the directory if it doesn't exist
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
@@ -211,22 +216,38 @@ def remove_simp_pattern_from_end(s):
     pattern = r'@\[simp\s*.*?\]$'
     return re.sub(pattern, '', s)
 def main(args):
     command_list = []
     file_pattern = os.path.join(args.input_path, '[0-1]*.json')
     for file_path in glob.glob(file_pattern):
-        with open(file_path, 'r', encoding='utf-8') as rf:
             for line in rf.readlines():
                 try:
                     json_item = json.loads(line)
-                    working_env = json_item['content']['working_file']
-                    # pdb.set_trace()
                     # statement = json_item['total output'][0]
                     json_item['cmd'] = []
                     for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
                         statement = output.split("#align")[0]
                         json_item['cmd'].append('\n\n'.join([working_env,  statement]))
-                    json_item['answer'] = json_item['content']['statement_poof']
                     assert len(statement) > 0
                     # json_item['cmd']  = '\n'.join([working_env, json_item['total output'][0]])
                 except:

 import subprocess
 from argparse import ArgumentParser
 import json
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from tqdm import tqdm
 import glob
 import tempfile
             return filtered_data
         result_dict = filter_json(item)
         result_dict['results'] = []
         for i, cmd in enumerate(item['cmd']):
             temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
             with open(temp_file, "w") as f:
                 elif not len(stderr) and "messages" in stdout:
                     flag = 0
                     for me in stdout['messages']:
                         if me['severity'] == 'error':
                             flag = 1
                             start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
                             for line_n in range(start_line - 1, 0 , -1):
                                 line_len = len(cmd.split('\n')[line_n])
                                 current_column  += line_len + 1
+                                if not line_len:
                                     break
                             result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
                             break
     total = len(command_list)
+    # Get the number of available CPU cores
+    max_workers = os.cpu_count()
+    results = []
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
         futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        with tqdm(total=len(futures), desc="Processing Commands") as progress_bar:
+            for future in as_completed(futures):
+                result = future.result()
+                results.append(result)
+                progress_bar.update(1)
     def calculate_pass(result_list, k):
         pass_1_count = 0
         pass_k_count = 0
         pass_k = pass_k_count / len(result_list) if result_list else 0
         return pass_1, pass_k
     pass_1, pass_k = calculate_pass(results, k)
     print("Pass@1:", pass_1)
     print(f"Pass@{k}:", pass_k)
     # print(f"total test: {total}")
     # print(f"Pass rate: {pass_rate}%")
+    output_file = f"output_path"
     # Create the directory if it doesn't exist
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
     pattern = r'@\[simp\s*.*?\]$'
     return re.sub(pattern, '', s)
+def update_dict(lean_kv):
+    update_kv = {}
+    for k, v in lean_kv.items():
+        # print(k)
+        # print(k.split("#align")[0])
+        update_kv[k.split("#align")[0]] = v
+    # Write the combined data to a new JSON file
+    with open('up_lean4_kv.json', 'w') as output_file:
+        json.dump(update_kv, output_file, indent=4)
+    return update_kv
 def main(args):
     command_list = []
+    retrieval_path = "data/updated_lean4_kv.json"
     file_pattern = os.path.join(args.input_path, '[0-1]*.json')
     for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf, open(retrieval_path, 'r', encoding='utf-8') as retrival_kv:
+            lean4_kv = json.load(retrival_kv)
             for line in rf.readlines():
                 try:
                     json_item = json.loads(line)
+                    # working_env = content']['working_file']
+                    working_env = lean4_kv[json_item['content']['formal']]
                     # statement = json_item['total output'][0]
                     json_item['cmd'] = []
                     for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
                         statement = output.split("#align")[0]
                         json_item['cmd'].append('\n\n'.join([working_env,  statement]))
+                    json_item['answer'] = json_item['content']['formal']
                     assert len(statement) > 0
                     # json_item['cmd']  = '\n'.join([working_env, json_item['total output'][0]])
                 except:

pass_rate_output.txt CHANGED Viewed

@@ -1,82 +1 @@
-Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/deepseek-math-7b-base/5/
-Running for input path: ../auto-info/generate_result/zero_shot/lean4_random_test/generation/deepseek-math-7b-base/5/
-Running for input path: ../auto-info/generate_result/zero_shot/wild_test/generation/deepseek-math-7b-base/5/
-total len: 1000
-Pass@1: 0.0
-Pass@5: 0.0
-Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/deepseek-math-7b-instruct/5/
-total len: 981
-Pass@1: 0.004077471967380225
-Pass@5: 0.029561671763506627
-Running for input path: ../auto-info/generate_result/zero_shot/lean4_random_test/generation/deepseek-math-7b-instruct/5/
-total len: 970
-Pass@1: 0.002061855670103093
-Pass@5: 0.016494845360824743
-Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/llemma_7b/5/
-total len: 981
-Pass@1: 0.0010193679918450561
-Pass@5: 0.004077471967380225
-Running for input path: ../auto-info/generate_result/zero_shot/lean4_random_test/generation/llemma_7b/5/
-total len: 970
-Pass@1: 0.0
-Pass@5: 0.007216494845360825
-Running for input path: ../auto-info/generate_result/zero_shot/wild_test/generation/llemma_7b/5/
-total len: 1000
-Pass@1: 0.0
-Pass@5: 0.0
-Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/llemma_34b/5/
-total len: 981
-Pass@1: 0.0
-Pass@5: 0.0
-Running for input path: ../auto-info/generate_result/zero_shot/lean4_random_test/generation/llemma_34b/5/
-total len: 970
-Pass@1: 0.0
-Pass@5: 0.0010309278350515464
-Running for input path: ../auto-info/generate_result/zero_shot/wild_test/generation/llemma_34b/5/
-total len: 1000
-Pass@1: 0.0
-Pass@5: 0.0
-Running for input path: ../auto-info/generate_result/zero_shot/lean4_basic_test/generation/internlm2-math-7b/5/


1	+ Running for input path: /opt/tiger/formal-align/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k/5/opt/tiger/formal-align/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k/5

pass_rate_results/compile_result/lean4_basic_test/lean4_random_15kpass5.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4040c382e0ee3c796951b7c60b55f91616ea8653c1730c75978c3464d9562ecc
+size 12184988

pass_rate_results/compile_result/lean4_random_test/lean4_random_15kpass5.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ba08a0aea4c769c9f16378f48121a50f7df9379fec11518773896607a9ad1a0
+size 55140396

update_lean4_kv.py ADDED Viewed

	@@ -0,0 +1,295 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import glob
+import tempfile
+import random
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, args):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path, k ):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item, index):
+        temp_dir = '/opt/jianqiao'
+        def filter_json(json_data):
+            filtered_data = {}
+            for key in json_data.keys():
+                if key in ['question', 'answer', 'total output', 'results']:
+                    filtered_data[key] = json_data[key]
+            return filtered_data
+        result_dict = filter_json(item)
+        result_dict['results'] = []
+        for i, cmd in enumerate(item['cmd']):
+            temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
+            with open(temp_file, "w") as f:
+                f.write(cmd)
+            data = '{"path": "%s", "allTactics": true}' % temp_file
+            command = f'echo \'{data}\' | lake exe repl'
+            try:
+                result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                stdout = json.loads(result.stdout.decode('utf-8'))
+                stderr = result.stderr.decode('utf-8')
+            except subprocess.TimeoutExpired as e:
+                result_item = {'error': str(e), 'status': 'nopass_limit'}
+            except subprocess.CalledProcessError as e:
+                result_item = {'error': str(e), 'status': 'nopass_error'}
+            else:
+                if "messages" not in stdout and not len(stderr):
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
+                elif not len(stderr) and "messages" in stdout:
+                    flag = 0
+                    for me in stdout['messages']:
+                        import pdb
+                        pdb.set_trace()
+                        if me['severity'] == 'error':
+                            flag = 1
+                            start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
+                            for line_n in range(start_line - 1, 0 , -1):
+                                line_len = len(cmd.split('\n')[line_n])
+                                current_column  += line_len + 1
+                                if not line_len:
+                                    break
+                            result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
+                            break
+                    if not flag :
+                        result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+                else:
+                    assert len(stderr)
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
+            result_dict['results'].append(result_item)
+        return result_dict
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=1) as executor:
+        futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            # if result['status'] == 'pass':
+            #     passed += 1
+    def calculate_pass(result_list, k):
+        pass_1_count = 0
+        pass_k_count = 0
+        for result in result_list:
+            results = result.get('results', [])
+            if results:
+                for j in range(min(1, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_1_count += 1
+                        break
+                for j in range(min(k, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_k_count += 1
+                        break
+        pass_1 = pass_1_count / len(result_list) if result_list else 0
+        pass_k = pass_k_count / len(result_list) if result_list else 0
+        return pass_1, pass_k
+    pass_1, pass_k = calculate_pass(results, k)
+    print("Pass@1:", pass_1)
+    print(f"Pass@{k}:", pass_k)
+    # pass_rate = (passed / total) * 100
+    # print(f"total test: {total}")
+    # print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def update_dict(lean_kv):
+    update_kv = {}
+    for k, v in lean_kv.items():
+        # print(k)
+        # print(k.split("#align")[0])
+        update_kv[k.split("#align")[0]] = v
+    # Write the combined data to a new JSON file
+    with open('up_lean4_kv.json', 'w') as output_file:
+        json.dump(update_kv, output_file, indent=4)
+    return update_kv
+def find_key(lean_kv):
+    update_kv = {}
+    for k, v in lean_kv.items():
+        if "theorem odd_sub" in k:
+            import pdb
+            pdb.set_trace()
+import os
+import json
+import tqdm
+def main(args):
+    # Define the directory and file names
+    file_dir = '../formal-align/data'
+    file_names = ['FormL4_basic_test.json', 'FormL4_random_test.json', 'FormL4_train.json']
+    # Initialize a list to store all items from JSON files
+    all_items = []
+    # Loop through each file and load its content
+    for file_name in file_names:
+        file_path = os.path.join(file_dir, file_name)
+        with open(file_path, 'r', encoding='utf-8') as file:
+            data = json.load(file)
+            if isinstance(data, list):
+                all_items.extend(data)  # Assuming each file contains a list of items
+            else:
+                all_items.append(data)  # If the file contains a single item
+    # Load the original key-value pairs from the file
+    filename = 'data/lean4_kv.json'
+    with open(filename, 'r') as file:
+        original_kv = json.load(file)
+    new_kv = {}
+    # Iterate over each item in all_items and update the new_kv dictionary
+    for item in tqdm.tqdm(all_items):
+        formal = item['formal']
+        for k, v in list(original_kv.items()):
+            if formal in k:
+                new_kv[formal] = v
+                del original_kv[k]
+                break  # Exit the inner loop since a match is found
+    # Save the updated key-value pairs to a new file
+    output_filename = 'data/updated_lean4_kv.json'
+    with open(output_filename, 'w') as file:
+        json.dump(new_kv, file, indent=4)
+    print(f"Updated key-value pairs saved to: {output_filename}")
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--k', type=int, default=1)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)