Upload 4 files
Browse files
scripts/evaluate_negative_rejection.py
CHANGED
@@ -15,10 +15,10 @@ def evaluate_negative_rejection(config):
|
|
15 |
noise_rate = config['noise_rate']
|
16 |
passage_num = config['passage_num']
|
17 |
|
18 |
-
if
|
19 |
-
model = GroqClient(plm=
|
20 |
else:
|
21 |
-
logging.warning(f"Skipping unknown model: {
|
22 |
return
|
23 |
|
24 |
# File paths
|
|
|
15 |
noise_rate = config['noise_rate']
|
16 |
passage_num = config['passage_num']
|
17 |
|
18 |
+
if modelname in config['models']:
|
19 |
+
model = GroqClient(plm=modelname)
|
20 |
else:
|
21 |
+
logging.warning(f"Skipping unknown model: {modelname}")
|
22 |
return
|
23 |
|
24 |
# File paths
|
scripts/evaluate_noise_robustness.py
CHANGED
@@ -13,9 +13,10 @@ def evaluate_noise_robustness(config):
|
|
13 |
result_path = config['result_path'] + 'Noise Robustness/'
|
14 |
noise_rate = config['noise_rate']
|
15 |
passage_num = config['passage_num']
|
|
|
16 |
|
17 |
# Iterate over each model specified in the config
|
18 |
-
filename = os.path.join(result_path, f'prediction_{
|
19 |
ensure_directory_exists(filename)
|
20 |
|
21 |
# Load existing results if file exists
|
@@ -56,7 +57,7 @@ def evaluate_noise_robustness(config):
|
|
56 |
logging.info(f"score: {scores}")
|
57 |
logging.info(f"Noise Robustness Accuracy: {accuracy:.2%}")
|
58 |
|
59 |
-
score_filename = os.path.join(result_path, f'scores_{
|
60 |
with open(score_filename, 'w') as f:
|
61 |
json.dump(scores, f, ensure_ascii=False, indent=4)
|
62 |
|
|
|
13 |
result_path = config['result_path'] + 'Noise Robustness/'
|
14 |
noise_rate = config['noise_rate']
|
15 |
passage_num = config['passage_num']
|
16 |
+
model_name = config['model_name']
|
17 |
|
18 |
# Iterate over each model specified in the config
|
19 |
+
filename = os.path.join(result_path, f'prediction_{model_name}_noise_{noise_rate}_passage_{passage_num}.json')
|
20 |
ensure_directory_exists(filename)
|
21 |
|
22 |
# Load existing results if file exists
|
|
|
57 |
logging.info(f"score: {scores}")
|
58 |
logging.info(f"Noise Robustness Accuracy: {accuracy:.2%}")
|
59 |
|
60 |
+
score_filename = os.path.join(result_path, f'scores_{model_name}_noise_{noise_rate}_passage_{passage_num}.json')
|
61 |
with open(score_filename, 'w') as f:
|
62 |
json.dump(scores, f, ensure_ascii=False, indent=4)
|
63 |
|
scripts/get_factual_evaluation.py
CHANGED
@@ -13,9 +13,10 @@ def get_factual_evaluation(config):
|
|
13 |
result_path = config['result_path'] + 'Counterfactual Robustness/'
|
14 |
noise_rate = config['noise_rate']
|
15 |
passage_num = config['passage_num']
|
|
|
16 |
|
17 |
# Iterate over each model specified in the config
|
18 |
-
filename = os.path.join(result_path, f'prediction_{
|
19 |
ensure_directory_exists(filename)
|
20 |
|
21 |
# Load existing results if file exists
|
@@ -61,7 +62,7 @@ def get_factual_evaluation(config):
|
|
61 |
scores['correct_tt'] = correct_tt
|
62 |
|
63 |
#logging.info(f"score: {scores}")
|
64 |
-
score_filename = os.path.join(result_path, f'scores_{
|
65 |
with open(score_filename, 'w') as f:
|
66 |
json.dump(scores, f, ensure_ascii=False, indent=4)
|
67 |
|
|
|
13 |
result_path = config['result_path'] + 'Counterfactual Robustness/'
|
14 |
noise_rate = config['noise_rate']
|
15 |
passage_num = config['passage_num']
|
16 |
+
model_name = config['model_name']
|
17 |
|
18 |
# Iterate over each model specified in the config
|
19 |
+
filename = os.path.join(result_path, f'prediction_{model_name}_noise_{noise_rate}_passage_{passage_num}.json')
|
20 |
ensure_directory_exists(filename)
|
21 |
|
22 |
# Load existing results if file exists
|
|
|
62 |
scores['correct_tt'] = correct_tt
|
63 |
|
64 |
#logging.info(f"score: {scores}")
|
65 |
+
score_filename = os.path.join(result_path, f'scores_{model_name}_noise_{noise_rate}_passage_{passage_num}.json')
|
66 |
with open(score_filename, 'w') as f:
|
67 |
json.dump(scores, f, ensure_ascii=False, indent=4)
|
68 |
|
scripts/get_prediction_result.py
CHANGED
@@ -12,16 +12,18 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
12 |
def get_prediction_result(config, data_file_name):
|
13 |
results = []
|
14 |
dataset = load_dataset(data_file_name)
|
|
|
|
|
15 |
# Create GroqClient instance for supported models
|
16 |
-
if
|
17 |
-
model = GroqClient(plm=
|
18 |
else:
|
19 |
-
logging.warning(f"Skipping unknown model: {
|
20 |
return
|
21 |
|
22 |
# Iterate through dataset and process queries
|
23 |
for idx, instance in enumerate(dataset[:config['num_queries']], start=0):
|
24 |
-
logging.info(f"Executing Query {idx + 1} for Model: {
|
25 |
|
26 |
query, ans, docs = process_data(instance, config['noise_rate'], config['passage_num'], data_file_name)
|
27 |
|
|
|
12 |
def get_prediction_result(config, data_file_name):
|
13 |
results = []
|
14 |
dataset = load_dataset(data_file_name)
|
15 |
+
modelname = config['model_name']
|
16 |
+
|
17 |
# Create GroqClient instance for supported models
|
18 |
+
if modelname in config['models']:
|
19 |
+
model = GroqClient(plm=modelname)
|
20 |
else:
|
21 |
+
logging.warning(f"Skipping unknown model: {modelname}")
|
22 |
return
|
23 |
|
24 |
# Iterate through dataset and process queries
|
25 |
for idx, instance in enumerate(dataset[:config['num_queries']], start=0):
|
26 |
+
logging.info(f"Executing Query {idx + 1} for Model: {modelname}")
|
27 |
|
28 |
query, ans, docs = process_data(instance, config['noise_rate'], config['passage_num'], data_file_name)
|
29 |
|