zpwiki/pages/students/2016/darius_lindvai/dp2021/text.py
2020-05-05 20:37:06 +02:00

15 lines
454 B
Python

import re
import os
if os.path.exists('text.txt'):
os.remove('text.txt')
with open('/home/dlindvai/work/train.txt', 'r') as input_file:
with open('/home/dlindvai/work/text.txt', 'a') as output_file:
for line in input_file:
line = line.replace('\n', '')
line = re.sub(r"([\w/'+$\s-]+|[^\w/'+$\s-]+)\s*", r"\1 ", line)
line = line.lower()
line = line.replace('.','.PER').replace(',',',COM').replace('?','?QUE')
output_file.write(line)