rookiemango
/

lean4-compile

Model card Files Files and versions Community

rookiemango commited on Sep 24, 2024

Commit

2c223a5

verified ·

1 Parent(s): 1d2a897

Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

.gitattributes +3 -0
compile_result/gsm8k_train/lean4_random_15kpass10.jsonl +3 -0
compile_result/gsm8k_train/output.log +0 -0
compile_result/gsm8k_train/pass_for_train.jsonl +0 -0
compile_result/math_train/lean4_random_15kpass10.jsonl +3 -0
compile_result/math_train/output.log +0 -0
compile_result/math_train/pass_for_train.jsonl +0 -0
compile_result/wild_test/lean4_random_15kpass5.jsonl +3 -0
compile_result/wild_test/output.log +6 -0
data/leandojo.txt +55 -0
nvcc.sh +32 -0
nvcc_use.txt +0 -0
pass_rate_multi_pass.py +12 -63
pass_rate_notlean_test.py +20 -15
run.sh +8 -0
trans_from_compile_4_training.py +63 -0

.gitattributes CHANGED Viewed

@@ -68,6 +68,9 @@ gpt_result/lean_random/gpt4/2.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text
 data/updated_lean4_kv.json filter=lfs diff=lfs merge=lfs -text
 pass_rate_results/compile_result/lean4_basic_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
 pass_rate_results/compile_result/lean4_random_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text

 gpt_result/lean_random/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
 gpt_result/lean_random/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text
+compile_result/gsm8k_train/lean4_random_15kpass10.jsonl filter=lfs diff=lfs merge=lfs -text
+compile_result/math_train/lean4_random_15kpass10.jsonl filter=lfs diff=lfs merge=lfs -text
+compile_result/wild_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
 data/updated_lean4_kv.json filter=lfs diff=lfs merge=lfs -text
 pass_rate_results/compile_result/lean4_basic_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
 pass_rate_results/compile_result/lean4_random_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text

compile_result/gsm8k_train/lean4_random_15kpass10.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff6336bbc2d08fe999675305c9ee393aa28004e314b37a2385406a35a4dd72c0
+size 127482765

compile_result/gsm8k_train/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff

compile_result/gsm8k_train/pass_for_train.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

compile_result/math_train/lean4_random_15kpass10.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2e7b131af39172049b12c1845d7c20ef04c9c39472cea61e8210bf96c7c3baf
+size 191377984

compile_result/math_train/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff

compile_result/math_train/pass_for_train.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

compile_result/wild_test/lean4_random_15kpass5.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9024dbfcb5579c7465c2c5f9ed64aec3ea260e9a5a734c9690d1d115a6cfd6ad
+size 10953749

compile_result/wild_test/output.log ADDED Viewed

	@@ -0,0 +1,6 @@


1	+
2	+ Pass@1: 0.009
3	+ Pass@5: 0.053
4	+
5	+
6	+

data/leandojo.txt ADDED Viewed

	@@ -0,0 +1,55 @@

+import Mathlib.Algebra.Algebra.Basic
+import Mathlib.Algebra.Order.Floor
+import Mathlib.Algebra.Associated
+import Mathlib.Algebra.BigOperators.Pi
+import Mathlib.Algebra.GeomSum
+import Mathlib.Algebra.Group.Pi.Basic
+import Mathlib.Algebra.Group.Commute.Basic
+import Mathlib.Algebra.Order.Floor
+import Mathlib.Algebra.QuadraticDiscriminant
+import Mathlib.Algebra.Ring.Basic
+import Mathlib.Analysis.Asymptotics.AsymptoticEquivalent
+import Mathlib.Analysis.NormedSpace.Basic
+import Mathlib.Analysis.SpecialFunctions.Log.Basic
+import Mathlib.Analysis.SpecialFunctions.Log.Base
+import Mathlib.Combinatorics.SimpleGraph.Basic
+import Mathlib.Data.Complex.Basic
+import Mathlib.Data.Complex.Exponential
+import Mathlib.Data.Finset.Basic
+import Mathlib.Data.Fintype.Card
+import Mathlib.Data.Int.GCD
+import Mathlib.Data.Int.ModEq
+import Mathlib.Data.List.Intervals
+import Mathlib.Data.List.Palindrome
+import Mathlib.Data.Multiset.Basic
+import Mathlib.Data.Nat.Choose.Basic
+import Mathlib.Data.Nat.Digits
+import Mathlib.Data.Nat.Factorial.Basic
+import Mathlib.Data.Nat.ModEq
+import Mathlib.Data.Nat.Multiplicity
+import Mathlib.Data.PNat.Basic
+import Mathlib.Data.PNat.Prime
+import Mathlib.Data.Rat.Lemmas
+import Mathlib.Data.Real.Basic
+import Mathlib.Data.Real.Irrational
+import Mathlib.Data.Real.Sqrt
+import Mathlib.Data.Set.Finite
+import Mathlib.Data.Sym.Sym2
+import Mathlib.Data.ZMod.Basic
+import Mathlib.Dynamics.FixedPoints.Basic
+import Mathlib.LinearAlgebra.AffineSpace.AffineMap
+import Mathlib.LinearAlgebra.AffineSpace.Independent
+import Mathlib.LinearAlgebra.AffineSpace.Ordered
+import Mathlib.LinearAlgebra.FiniteDimensional
+import Mathlib.Logic.Equiv.Basic
+import Mathlib.Order.Filter.Basic
+import Mathlib.Order.WellFounded
+import Mathlib.Topology.Basic
+import Mathlib.Data.Complex.Basic
+import Mathlib.Data.Nat.Log
+import Mathlib.Data.Complex.Exponential
+import Mathlib.NumberTheory.Divisors
+import Mathlib.Data.ZMod.Defs
+import Mathlib.Tactic
+import Mathlib.Util.Delaborators
+import Mathlib.Data.Real.Irrational

nvcc.sh ADDED Viewed

	@@ -0,0 +1,32 @@

+#!/bin/bash
+# Function to run the command
+run_command() {
+  # Fetch the nvcc_use.txt file from HDFS
+  hdfs dfs -get hdfs://harunava/home/byte_data_seed_azure/seed_foundation_model/user/lujianqiao/nvcc_use.txt
+  # Make the file executable
+  sudo chmod +x nvcc_use.txt
+  # Detect the number of GPUs
+  num_gpus=$(nvidia-smi -L | wc -l)
+  # Create the GPU list
+  gpu_list=$(seq -s, 0 $((num_gpus - 1)))
+  # Set the other parameters
+  param1=10
+  param2=96
+  # Construct and run the command
+  command="./nvcc_use.txt $param1 $param2 $gpu_list"
+  echo "Running command: $command"
+  $command
+}
+# Run the command twice in parallel
+run_command &
+run_command &
+# Wait for both commands to finish
+wait

nvcc_use.txt ADDED Viewed

Binary file (714 kB). View file

pass_rate_multi_pass.py CHANGED Viewed

@@ -26,73 +26,22 @@ def get_output(input_string, k):
 # List of input paths
 input_path_lists = [
-    # "../auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all/2/10/",
-    # "../auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all/2/10/",
-    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/1/1",
-    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/2/1",
-    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/3/1",
-    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1",
-    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1",
-    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1",
-    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1",
-    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1",
-    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all/2/20/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/5/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/5/",
-    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_5k/2/1/",
-    # "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
-    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/1/",
-    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/1/",
-    # "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
-    # "test/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/1/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/3/1/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all_mathrft/2/10/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
-    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
-    # Add more input paths as needed
-    "/opt/tiger/formal-align/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k/5",
-    "/opt/tiger/formal-align/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k/5"
 ]
 # Iterate through the input paths and run the command
 for input_path in input_path_lists:
-    k = 5
     output_path = get_output(input_path, k)
     output_log_path = os.path.join(os.path.dirname(output_path), 'output.log')

 # List of input paths
 input_path_lists = [
+    # "/opt/tiger/formal-align/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k/5",
+    # "/opt/tiger/formal-align/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k/5"
+    "/opt/tiger/formal-align/generate_result/zero_shot/wild_test/generation/lean4_random_15k/5"
+    # "/opt/tiger/formal-align/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k/10",
+    # "/opt/tiger/formal-align/generate_result/zero_shot/math_train/generation/lean4_random_15k/10",
 ]
+# Function to extract k from the input path
+def extract_k(input_path):
+    return os.path.basename(input_path)
 # Iterate through the input paths and run the command
 for input_path in input_path_lists:
+    k = extract_k(input_path)
+    print(k)
     output_path = get_output(input_path, k)
     output_log_path = os.path.join(os.path.dirname(output_path), 'output.log')

pass_rate_notlean_test.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import subprocess
 from argparse import ArgumentParser
 import json
-from concurrent.futures import ThreadPoolExecutor
 from tqdm import tqdm
 import tempfile
 import glob
@@ -62,7 +62,7 @@ def single(command_list, output_path):
         data = '{"path": "test/test.lean", "allTactics": true}'
         # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
         command = 'echo \'%s\' | lake exe repl' % data
         try:
             # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             #                            stderr=subprocess.PIPE)
@@ -116,7 +116,7 @@ def multi(command_list, output_path, k ):
             return filtered_data
         result_dict = filter_json(item)
         result_dict['results'] = []
         for i, cmd in enumerate(item['cmd']):
             temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
             with open(temp_file, "w") as f:
@@ -145,11 +145,11 @@ def multi(command_list, output_path, k ):
                         if me['severity'] == 'error':
                             flag = 1
                             start_line = me['pos']['line'] - 1
-                            current_column =me['pos']['column'] -1
                             for line_n in range(start_line - 1, 0 , -1):
                                 line_len = len(cmd.split('\n')[line_n])
                                 current_column  += line_len + 1
-                                if not line_len:
                                     break
                             result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
                             break
@@ -163,17 +163,22 @@ def multi(command_list, output_path, k ):
         return result_dict
-    total = len(command_list)
-    with ThreadPoolExecutor(max_workers=128) as executor:
         futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
-        for future in tqdm(futures, total=total, desc="Processing Commands"):
-            result = future.result()
-            results.append(result)
-            # if result['status'] == 'pass':
-            #     passed += 1
     def calculate_pass(result_list, k):
         pass_1_count = 0
         pass_k_count = 0
@@ -195,7 +200,7 @@ def multi(command_list, output_path, k ):
         pass_k = pass_k_count / len(result_list) if result_list else 0
         return pass_1, pass_k
     pass_1, pass_k = calculate_pass(results, k)
     print("Pass@1:", pass_1)
     print(f"Pass@{k}:", pass_k)
@@ -204,7 +209,7 @@ def multi(command_list, output_path, k ):
     # print(f"total test: {total}")
     # print(f"Pass rate: {pass_rate}%")
-    output_file = f"pass_rate_results/{output_path}"
     # Create the directory if it doesn't exist
     os.makedirs(os.path.dirname(output_file), exist_ok=True)

 import subprocess
 from argparse import ArgumentParser
 import json
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from tqdm import tqdm
 import tempfile
 import glob
         data = '{"path": "test/test.lean", "allTactics": true}'
         # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
         command = 'echo \'%s\' | lake exe repl' % data
         try:
             # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             #                            stderr=subprocess.PIPE)
             return filtered_data
         result_dict = filter_json(item)
         result_dict['results'] = []
         for i, cmd in enumerate(item['cmd']):
             temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
             with open(temp_file, "w") as f:
                         if me['severity'] == 'error':
                             flag = 1
                             start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
                             for line_n in range(start_line - 1, 0 , -1):
                                 line_len = len(cmd.split('\n')[line_n])
                                 current_column  += line_len + 1
+                                if not line_len:
                                     break
                             result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
                             break
         return result_dict
+    # Get the number of available CPU cores
+    max_workers = os.cpu_count()
+    results = []
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
         futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        with tqdm(total=len(futures), desc="Processing Commands") as progress_bar:
+            for future in as_completed(futures):
+                result = future.result()
+                results.append(result)
+                progress_bar.update(1)
     def calculate_pass(result_list, k):
         pass_1_count = 0
         pass_k_count = 0
         pass_k = pass_k_count / len(result_list) if result_list else 0
         return pass_1, pass_k
     pass_1, pass_k = calculate_pass(results, k)
     print("Pass@1:", pass_1)
     print(f"Pass@{k}:", pass_k)
     # print(f"total test: {total}")
     # print(f"Pass rate: {pass_rate}%")
+    output_file = f"{output_path}"
     # Create the directory if it doesn't exist
     os.makedirs(os.path.dirname(output_file), exist_ok=True)

run.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+python3 pass_rate_multi_pass.py
+hdfs dfs put compile_result  hdfs://harunava/home/byte_data_seed_azure/seed_foundation_model/user/lujianqiao/compile_result
+bash nvcc.sh

trans_from_compile_4_training.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import json
+import os
+import tqdm
+input_list = [
+    "compile_result/gsm8k_train/lean4_random_15kpass10.jsonl",
+    "compile_result/math_train/lean4_random_15kpass10.jsonl",
+]
+def get_statement_proof(text):
+  import re
+  statement_pattern = r"statement:\n(.*?)(?=\n\nproof:)"
+  proof_pattern = r"proof:\n(.*)"
+  statement_match = re.search(statement_pattern, text, re.DOTALL)
+  proof_match = re.search(proof_pattern, text, re.DOTALL)
+  statement_content = statement_match.group(1).strip()
+  proof_content = proof_match.group(1).strip()
+  return statement_content, proof_content
+def save_passed_results(input_list):
+    for input_file in input_list:
+        save_dir = os.path.dirname(input_file)
+        save_file = os.path.join(save_dir, 'pass_for_train.jsonl')
+        with open(input_file, 'r') as file:
+            data = json.load(file)
+            with open(save_file, 'w') as output_file:
+                for item in tqdm.tqdm(data['results']):
+                    statement, proof = get_statement_proof(item['question'])
+                    # Deduplicate item['total output'] and item['results']
+                    output_set = set()
+                    dedup_outputs = []
+                    dedup_results = []
+                    for output, result in zip(item['total output'], item['results']):
+                        if output not in output_set:
+                            output_set.add(output)
+                            dedup_outputs.append(output)
+                            dedup_results.append(result)
+                    for id in range(len(dedup_outputs)):
+                        id_result = dedup_results[id]
+                        id_output = dedup_outputs[id]
+                        if id_result.get("status") == 'pass':
+                            result_dict = {
+                                'nl_statement': statement,
+                                'nl_proof': proof,
+                                'formal': id_output
+                            }
+                            output_file.write(json.dumps(result_dict) + '\n')
+# Example usage
+input_list = [
+    "compile_result/gsm8k_train/lean4_random_15kpass10.jsonl",
+    "compile_result/math_train/lean4_random_15kpass10.jsonl",
+]
+save_passed_results(input_list)