codesearchBase / validate_data.py
Forrest99's picture
Create validate_data.py
144fc9e verified
def validate_inputs(snippets):
"""数据验证工具"""
errors = []
valid_data = []
for idx, s in enumerate(snippets):
# 类型检查
if not isinstance(s, str):
errors.append(f"索引 {idx}: 类型错误 ({type(s)}) → 已转换")
s = str(s)
# 清理数据
cleaned = s.replace("...", "").strip()
# 有效性检查
if len(cleaned) < 10: # 最小代码长度
errors.append(f"索引 {idx}: 代码过短 ({len(cleaned)} 字符)")
continue
valid_data.append(cleaned)
# 输出报告
print(f"验证结果:\n成功 {len(valid_data)} 条\n错误 {len(errors)} 条")
for err in errors[:3]: # 显示前3个错误
print(f" - {err}")
if len(errors) > 3:
print(f" ...(共 {len(errors)} 个错误)")
return valid_data
if __name__ == "__main__":
# 测试数据
test_data = [
"def example(): pass",
12345, # 错误类型
"print(...)", # 包含...
" ", # 空字符串
"""def valid_func():
return '正确代码'"""
]
validated = validate_inputs(test_data)
print("\n有效数据示例:")
for i, data in enumerate(validated[:2]):
print(f"[{i}] {data[:50]}...")