import re import os if os.path.exists('text.txt'): os.remove('text.txt') with open('/home/dlindvai/work/train.txt', 'r') as input_file: with open('/home/dlindvai/work/text.txt', 'a') as output_file: for line in input_file: line = line.replace('\n', '') line = re.sub(r"([\w/'+$\s-]+|[^\w/'+$\s-]+)\s*", r"\1 ", line) line = line.lower() line = line.replace('.','.PER').replace(',',',COM').replace('?','?QUE') output_file.write(line)