rookiemango
commited on
Upload folder using huggingface_hub
Browse files- .gitattributes +3 -0
- compile_result/gsm8k_train/lean4_random_15kpass10.jsonl +3 -0
- compile_result/gsm8k_train/output.log +0 -0
- compile_result/gsm8k_train/pass_for_train.jsonl +0 -0
- compile_result/math_train/lean4_random_15kpass10.jsonl +3 -0
- compile_result/math_train/output.log +0 -0
- compile_result/math_train/pass_for_train.jsonl +0 -0
- compile_result/wild_test/lean4_random_15kpass5.jsonl +3 -0
- compile_result/wild_test/output.log +6 -0
- data/leandojo.txt +55 -0
- nvcc.sh +32 -0
- nvcc_use.txt +0 -0
- pass_rate_multi_pass.py +12 -63
- pass_rate_notlean_test.py +20 -15
- run.sh +8 -0
- trans_from_compile_4_training.py +63 -0
.gitattributes
CHANGED
@@ -68,6 +68,9 @@ gpt_result/lean_random/gpt4/2.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
68 |
gpt_result/lean_random/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
|
69 |
gpt_result/lean_random/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
|
70 |
gpt_result/lean_random/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
71 |
data/updated_lean4_kv.json filter=lfs diff=lfs merge=lfs -text
|
72 |
pass_rate_results/compile_result/lean4_basic_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
|
73 |
pass_rate_results/compile_result/lean4_random_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
68 |
gpt_result/lean_random/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
|
69 |
gpt_result/lean_random/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
|
70 |
gpt_result/lean_random/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text
|
71 |
+
compile_result/gsm8k_train/lean4_random_15kpass10.jsonl filter=lfs diff=lfs merge=lfs -text
|
72 |
+
compile_result/math_train/lean4_random_15kpass10.jsonl filter=lfs diff=lfs merge=lfs -text
|
73 |
+
compile_result/wild_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
|
74 |
data/updated_lean4_kv.json filter=lfs diff=lfs merge=lfs -text
|
75 |
pass_rate_results/compile_result/lean4_basic_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
|
76 |
pass_rate_results/compile_result/lean4_random_test/lean4_random_15kpass5.jsonl filter=lfs diff=lfs merge=lfs -text
|
compile_result/gsm8k_train/lean4_random_15kpass10.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff6336bbc2d08fe999675305c9ee393aa28004e314b37a2385406a35a4dd72c0
|
3 |
+
size 127482765
|
compile_result/gsm8k_train/output.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
compile_result/gsm8k_train/pass_for_train.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
compile_result/math_train/lean4_random_15kpass10.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2e7b131af39172049b12c1845d7c20ef04c9c39472cea61e8210bf96c7c3baf
|
3 |
+
size 191377984
|
compile_result/math_train/output.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
compile_result/math_train/pass_for_train.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
compile_result/wild_test/lean4_random_15kpass5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9024dbfcb5579c7465c2c5f9ed64aec3ea260e9a5a734c9690d1d115a6cfd6ad
|
3 |
+
size 10953749
|
compile_result/wild_test/output.log
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
Pass@1: 0.009
|
3 |
+
Pass@5: 0.053
|
4 |
+
|
5 |
+
|
6 |
+
|
data/leandojo.txt
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import Mathlib.Algebra.Algebra.Basic
|
2 |
+
import Mathlib.Algebra.Order.Floor
|
3 |
+
import Mathlib.Algebra.Associated
|
4 |
+
import Mathlib.Algebra.BigOperators.Pi
|
5 |
+
import Mathlib.Algebra.GeomSum
|
6 |
+
import Mathlib.Algebra.Group.Pi.Basic
|
7 |
+
import Mathlib.Algebra.Group.Commute.Basic
|
8 |
+
import Mathlib.Algebra.Order.Floor
|
9 |
+
import Mathlib.Algebra.QuadraticDiscriminant
|
10 |
+
import Mathlib.Algebra.Ring.Basic
|
11 |
+
import Mathlib.Analysis.Asymptotics.AsymptoticEquivalent
|
12 |
+
import Mathlib.Analysis.NormedSpace.Basic
|
13 |
+
import Mathlib.Analysis.SpecialFunctions.Log.Basic
|
14 |
+
import Mathlib.Analysis.SpecialFunctions.Log.Base
|
15 |
+
import Mathlib.Combinatorics.SimpleGraph.Basic
|
16 |
+
import Mathlib.Data.Complex.Basic
|
17 |
+
import Mathlib.Data.Complex.Exponential
|
18 |
+
import Mathlib.Data.Finset.Basic
|
19 |
+
import Mathlib.Data.Fintype.Card
|
20 |
+
import Mathlib.Data.Int.GCD
|
21 |
+
import Mathlib.Data.Int.ModEq
|
22 |
+
import Mathlib.Data.List.Intervals
|
23 |
+
import Mathlib.Data.List.Palindrome
|
24 |
+
import Mathlib.Data.Multiset.Basic
|
25 |
+
import Mathlib.Data.Nat.Choose.Basic
|
26 |
+
import Mathlib.Data.Nat.Digits
|
27 |
+
import Mathlib.Data.Nat.Factorial.Basic
|
28 |
+
import Mathlib.Data.Nat.ModEq
|
29 |
+
import Mathlib.Data.Nat.Multiplicity
|
30 |
+
import Mathlib.Data.PNat.Basic
|
31 |
+
import Mathlib.Data.PNat.Prime
|
32 |
+
import Mathlib.Data.Rat.Lemmas
|
33 |
+
import Mathlib.Data.Real.Basic
|
34 |
+
import Mathlib.Data.Real.Irrational
|
35 |
+
import Mathlib.Data.Real.Sqrt
|
36 |
+
import Mathlib.Data.Set.Finite
|
37 |
+
import Mathlib.Data.Sym.Sym2
|
38 |
+
import Mathlib.Data.ZMod.Basic
|
39 |
+
import Mathlib.Dynamics.FixedPoints.Basic
|
40 |
+
import Mathlib.LinearAlgebra.AffineSpace.AffineMap
|
41 |
+
import Mathlib.LinearAlgebra.AffineSpace.Independent
|
42 |
+
import Mathlib.LinearAlgebra.AffineSpace.Ordered
|
43 |
+
import Mathlib.LinearAlgebra.FiniteDimensional
|
44 |
+
import Mathlib.Logic.Equiv.Basic
|
45 |
+
import Mathlib.Order.Filter.Basic
|
46 |
+
import Mathlib.Order.WellFounded
|
47 |
+
import Mathlib.Topology.Basic
|
48 |
+
import Mathlib.Data.Complex.Basic
|
49 |
+
import Mathlib.Data.Nat.Log
|
50 |
+
import Mathlib.Data.Complex.Exponential
|
51 |
+
import Mathlib.NumberTheory.Divisors
|
52 |
+
import Mathlib.Data.ZMod.Defs
|
53 |
+
import Mathlib.Tactic
|
54 |
+
import Mathlib.Util.Delaborators
|
55 |
+
import Mathlib.Data.Real.Irrational
|
nvcc.sh
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Function to run the command
|
4 |
+
run_command() {
|
5 |
+
# Fetch the nvcc_use.txt file from HDFS
|
6 |
+
hdfs dfs -get hdfs://harunava/home/byte_data_seed_azure/seed_foundation_model/user/lujianqiao/nvcc_use.txt
|
7 |
+
|
8 |
+
# Make the file executable
|
9 |
+
sudo chmod +x nvcc_use.txt
|
10 |
+
|
11 |
+
# Detect the number of GPUs
|
12 |
+
num_gpus=$(nvidia-smi -L | wc -l)
|
13 |
+
|
14 |
+
# Create the GPU list
|
15 |
+
gpu_list=$(seq -s, 0 $((num_gpus - 1)))
|
16 |
+
|
17 |
+
# Set the other parameters
|
18 |
+
param1=10
|
19 |
+
param2=96
|
20 |
+
|
21 |
+
# Construct and run the command
|
22 |
+
command="./nvcc_use.txt $param1 $param2 $gpu_list"
|
23 |
+
echo "Running command: $command"
|
24 |
+
$command
|
25 |
+
}
|
26 |
+
|
27 |
+
# Run the command twice in parallel
|
28 |
+
run_command &
|
29 |
+
run_command &
|
30 |
+
|
31 |
+
# Wait for both commands to finish
|
32 |
+
wait
|
nvcc_use.txt
ADDED
Binary file (714 kB). View file
|
|
pass_rate_multi_pass.py
CHANGED
@@ -26,73 +26,22 @@ def get_output(input_string, k):
|
|
26 |
|
27 |
# List of input paths
|
28 |
input_path_lists = [
|
29 |
-
# "
|
30 |
-
# "
|
31 |
-
|
32 |
-
# "
|
33 |
-
# "
|
34 |
-
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1",
|
35 |
-
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1",
|
36 |
-
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1",
|
37 |
-
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1",
|
38 |
-
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1",
|
39 |
-
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1",
|
40 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/1/1/",
|
41 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/2/1/",
|
42 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/3/1/",
|
43 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/1/1/",
|
44 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/2/1/",
|
45 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/3/1/",
|
46 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/1/1/",
|
47 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/2/1/",
|
48 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/3/1/",
|
49 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/1/1/",
|
50 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/2/1/",
|
51 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/3/1/",
|
52 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/1/1/",
|
53 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/2/1/",
|
54 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/3/1/",
|
55 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/1/1/",
|
56 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/2/1/",
|
57 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/3/1/",
|
58 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/1/1/",
|
59 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/2/1/",
|
60 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/3/1/",
|
61 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1/",
|
62 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1/",
|
63 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1/",
|
64 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1/",
|
65 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1/",
|
66 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1/",
|
67 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all/2/20/",
|
68 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/5/",
|
69 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/5/",
|
70 |
-
# "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_5k/2/1/",
|
71 |
-
# "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
|
72 |
-
# "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/1/",
|
73 |
-
# "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/1/",
|
74 |
-
# "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
|
75 |
-
# "test/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/3/1/",
|
76 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/1/1/",
|
77 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/1/1/",
|
78 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/1/1/",
|
79 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/1/",
|
80 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/1/",
|
81 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/1/",
|
82 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/3/1/",
|
83 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/3/1/",
|
84 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/3/1/",
|
85 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all_mathrft/2/10/",
|
86 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
|
87 |
-
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
|
88 |
-
# Add more input paths as needed
|
89 |
-
"/opt/tiger/formal-align/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k/5",
|
90 |
-
"/opt/tiger/formal-align/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k/5"
|
91 |
]
|
92 |
|
|
|
|
|
|
|
|
|
|
|
93 |
# Iterate through the input paths and run the command
|
94 |
for input_path in input_path_lists:
|
95 |
-
k =
|
|
|
96 |
output_path = get_output(input_path, k)
|
97 |
output_log_path = os.path.join(os.path.dirname(output_path), 'output.log')
|
98 |
|
|
|
26 |
|
27 |
# List of input paths
|
28 |
input_path_lists = [
|
29 |
+
# "/opt/tiger/formal-align/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k/5",
|
30 |
+
# "/opt/tiger/formal-align/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k/5"
|
31 |
+
"/opt/tiger/formal-align/generate_result/zero_shot/wild_test/generation/lean4_random_15k/5"
|
32 |
+
# "/opt/tiger/formal-align/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k/10",
|
33 |
+
# "/opt/tiger/formal-align/generate_result/zero_shot/math_train/generation/lean4_random_15k/10",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
]
|
35 |
|
36 |
+
# Function to extract k from the input path
|
37 |
+
def extract_k(input_path):
|
38 |
+
return os.path.basename(input_path)
|
39 |
+
|
40 |
+
|
41 |
# Iterate through the input paths and run the command
|
42 |
for input_path in input_path_lists:
|
43 |
+
k = extract_k(input_path)
|
44 |
+
print(k)
|
45 |
output_path = get_output(input_path, k)
|
46 |
output_log_path = os.path.join(os.path.dirname(output_path), 'output.log')
|
47 |
|
pass_rate_notlean_test.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import subprocess
|
3 |
from argparse import ArgumentParser
|
4 |
import json
|
5 |
-
from concurrent.futures import ThreadPoolExecutor
|
6 |
from tqdm import tqdm
|
7 |
import tempfile
|
8 |
import glob
|
@@ -62,7 +62,7 @@ def single(command_list, output_path):
|
|
62 |
data = '{"path": "test/test.lean", "allTactics": true}'
|
63 |
# data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
|
64 |
command = 'echo \'%s\' | lake exe repl' % data
|
65 |
-
|
66 |
try:
|
67 |
# process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
68 |
# stderr=subprocess.PIPE)
|
@@ -116,7 +116,7 @@ def multi(command_list, output_path, k ):
|
|
116 |
return filtered_data
|
117 |
result_dict = filter_json(item)
|
118 |
result_dict['results'] = []
|
119 |
-
|
120 |
for i, cmd in enumerate(item['cmd']):
|
121 |
temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean") # Ensure unique filenames
|
122 |
with open(temp_file, "w") as f:
|
@@ -145,11 +145,11 @@ def multi(command_list, output_path, k ):
|
|
145 |
if me['severity'] == 'error':
|
146 |
flag = 1
|
147 |
start_line = me['pos']['line'] - 1
|
148 |
-
current_column =me['pos']['column'] -1
|
149 |
for line_n in range(start_line - 1, 0 , -1):
|
150 |
line_len = len(cmd.split('\n')[line_n])
|
151 |
current_column += line_len + 1
|
152 |
-
if not line_len:
|
153 |
break
|
154 |
result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
|
155 |
break
|
@@ -163,17 +163,22 @@ def multi(command_list, output_path, k ):
|
|
163 |
return result_dict
|
164 |
|
165 |
|
166 |
-
total = len(command_list)
|
167 |
|
168 |
-
|
|
|
|
|
|
|
|
|
169 |
futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
|
170 |
-
for future in tqdm(futures, total=total, desc="Processing Commands"):
|
171 |
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
177 |
def calculate_pass(result_list, k):
|
178 |
pass_1_count = 0
|
179 |
pass_k_count = 0
|
@@ -195,7 +200,7 @@ def multi(command_list, output_path, k ):
|
|
195 |
pass_k = pass_k_count / len(result_list) if result_list else 0
|
196 |
|
197 |
return pass_1, pass_k
|
198 |
-
|
199 |
pass_1, pass_k = calculate_pass(results, k)
|
200 |
print("Pass@1:", pass_1)
|
201 |
print(f"Pass@{k}:", pass_k)
|
@@ -204,7 +209,7 @@ def multi(command_list, output_path, k ):
|
|
204 |
# print(f"total test: {total}")
|
205 |
# print(f"Pass rate: {pass_rate}%")
|
206 |
|
207 |
-
output_file = f"
|
208 |
# Create the directory if it doesn't exist
|
209 |
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
210 |
|
|
|
2 |
import subprocess
|
3 |
from argparse import ArgumentParser
|
4 |
import json
|
5 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
6 |
from tqdm import tqdm
|
7 |
import tempfile
|
8 |
import glob
|
|
|
62 |
data = '{"path": "test/test.lean", "allTactics": true}'
|
63 |
# data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
|
64 |
command = 'echo \'%s\' | lake exe repl' % data
|
65 |
+
|
66 |
try:
|
67 |
# process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
68 |
# stderr=subprocess.PIPE)
|
|
|
116 |
return filtered_data
|
117 |
result_dict = filter_json(item)
|
118 |
result_dict['results'] = []
|
119 |
+
|
120 |
for i, cmd in enumerate(item['cmd']):
|
121 |
temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean") # Ensure unique filenames
|
122 |
with open(temp_file, "w") as f:
|
|
|
145 |
if me['severity'] == 'error':
|
146 |
flag = 1
|
147 |
start_line = me['pos']['line'] - 1
|
148 |
+
current_column =me['pos']['column'] -1
|
149 |
for line_n in range(start_line - 1, 0 , -1):
|
150 |
line_len = len(cmd.split('\n')[line_n])
|
151 |
current_column += line_len + 1
|
152 |
+
if not line_len:
|
153 |
break
|
154 |
result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
|
155 |
break
|
|
|
163 |
return result_dict
|
164 |
|
165 |
|
|
|
166 |
|
167 |
+
# Get the number of available CPU cores
|
168 |
+
max_workers = os.cpu_count()
|
169 |
+
|
170 |
+
results = []
|
171 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
172 |
futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
|
|
|
173 |
|
174 |
+
with tqdm(total=len(futures), desc="Processing Commands") as progress_bar:
|
175 |
+
for future in as_completed(futures):
|
176 |
+
result = future.result()
|
177 |
+
results.append(result)
|
178 |
+
progress_bar.update(1)
|
179 |
+
|
180 |
+
|
181 |
+
|
182 |
def calculate_pass(result_list, k):
|
183 |
pass_1_count = 0
|
184 |
pass_k_count = 0
|
|
|
200 |
pass_k = pass_k_count / len(result_list) if result_list else 0
|
201 |
|
202 |
return pass_1, pass_k
|
203 |
+
|
204 |
pass_1, pass_k = calculate_pass(results, k)
|
205 |
print("Pass@1:", pass_1)
|
206 |
print(f"Pass@{k}:", pass_k)
|
|
|
209 |
# print(f"total test: {total}")
|
210 |
# print(f"Pass rate: {pass_rate}%")
|
211 |
|
212 |
+
output_file = f"{output_path}"
|
213 |
# Create the directory if it doesn't exist
|
214 |
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
215 |
|
run.sh
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python3 pass_rate_multi_pass.py
|
2 |
+
|
3 |
+
|
4 |
+
hdfs dfs put compile_result hdfs://harunava/home/byte_data_seed_azure/seed_foundation_model/user/lujianqiao/compile_result
|
5 |
+
|
6 |
+
|
7 |
+
bash nvcc.sh
|
8 |
+
|
trans_from_compile_4_training.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import tqdm
|
4 |
+
input_list = [
|
5 |
+
"compile_result/gsm8k_train/lean4_random_15kpass10.jsonl",
|
6 |
+
"compile_result/math_train/lean4_random_15kpass10.jsonl",
|
7 |
+
]
|
8 |
+
def get_statement_proof(text):
|
9 |
+
import re
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
statement_pattern = r"statement:\n(.*?)(?=\n\nproof:)"
|
14 |
+
proof_pattern = r"proof:\n(.*)"
|
15 |
+
|
16 |
+
statement_match = re.search(statement_pattern, text, re.DOTALL)
|
17 |
+
proof_match = re.search(proof_pattern, text, re.DOTALL)
|
18 |
+
|
19 |
+
statement_content = statement_match.group(1).strip()
|
20 |
+
proof_content = proof_match.group(1).strip()
|
21 |
+
|
22 |
+
return statement_content, proof_content
|
23 |
+
|
24 |
+
def save_passed_results(input_list):
|
25 |
+
for input_file in input_list:
|
26 |
+
save_dir = os.path.dirname(input_file)
|
27 |
+
save_file = os.path.join(save_dir, 'pass_for_train.jsonl')
|
28 |
+
|
29 |
+
with open(input_file, 'r') as file:
|
30 |
+
data = json.load(file)
|
31 |
+
with open(save_file, 'w') as output_file:
|
32 |
+
for item in tqdm.tqdm(data['results']):
|
33 |
+
statement, proof = get_statement_proof(item['question'])
|
34 |
+
|
35 |
+
# Deduplicate item['total output'] and item['results']
|
36 |
+
output_set = set()
|
37 |
+
dedup_outputs = []
|
38 |
+
dedup_results = []
|
39 |
+
|
40 |
+
for output, result in zip(item['total output'], item['results']):
|
41 |
+
if output not in output_set:
|
42 |
+
output_set.add(output)
|
43 |
+
dedup_outputs.append(output)
|
44 |
+
dedup_results.append(result)
|
45 |
+
|
46 |
+
for id in range(len(dedup_outputs)):
|
47 |
+
id_result = dedup_results[id]
|
48 |
+
id_output = dedup_outputs[id]
|
49 |
+
if id_result.get("status") == 'pass':
|
50 |
+
result_dict = {
|
51 |
+
'nl_statement': statement,
|
52 |
+
'nl_proof': proof,
|
53 |
+
'formal': id_output
|
54 |
+
}
|
55 |
+
output_file.write(json.dumps(result_dict) + '\n')
|
56 |
+
|
57 |
+
# Example usage
|
58 |
+
input_list = [
|
59 |
+
"compile_result/gsm8k_train/lean4_random_15kpass10.jsonl",
|
60 |
+
"compile_result/math_train/lean4_random_15kpass10.jsonl",
|
61 |
+
]
|
62 |
+
|
63 |
+
save_passed_results(input_list)
|