import argparse import re from helpers import write_lines def filter_line(line): if "-LRB-" in line and "-RRB-" in line: rep = re.sub(r'\-.*?LRB.*?\-.*?\-.*?RRB.*?\-', '', line) line_cleaned = rep elif ("-LRB-" in line and "-RRB-" not in line) or ( "-LRB-" not in line and "-RRB-" in line): line_cleaned = line.replace("-LRB-", '"').replace("-RRB-", '"') else: line_cleaned = line return line_cleaned def main(args): with open(args.source) as f: data = [row.rstrip() for row in f] write_lines(args.output, [filter_line(row) for row in data]) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-s', '--source', help='Path to the source file', required=True) parser.add_argument('-o', '--output', help='Path to the output file', required=True) args = parser.parse_args() main(args)