secure_code_leaderboard_archived / test_security_eval.py
lukehinds's picture
Response handling
bd08c12
raw
history blame
1.58 kB
from src.leaderboard.security_eval import check_safetensors, load_model_and_tokenizer, evaluate_secure_coding
def main():
# You can replace this with any HuggingFace model you want to test
model_path = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Using a small model for testing
revision = "main"
print(f"Running security evaluation on {model_path}...")
print("Step 1: Checking safetensors...")
safetensors_result = check_safetensors(model_path, revision)
print(f"Safetensors check complete: {safetensors_result}")
print("\nStep 2: Loading model and tokenizer...")
try:
model, tokenizer = load_model_and_tokenizer(model_path, revision)
print("Model and tokenizer loaded successfully")
except Exception as e:
print(f"Error loading model: {str(e)}")
return
print("\nStep 3: Running security evaluation...")
try:
security_score = evaluate_secure_coding(model_path, revision)
print(f"Security evaluation complete. Score: {security_score}")
except Exception as e:
print(f"Error during security evaluation: {str(e)}")
return
results = {
"results": {
"safetensors_check": {"compliant": safetensors_result},
"secure_coding": {"security_score": security_score}
}
}
print("\nFinal Results:")
print(f"Using safetensors: {results['results']['safetensors_check']['compliant']}")
print(f"Security score: {results['results']['secure_coding']['security_score']}")
if __name__ == "__main__":
main()