forked from KEMT/zpwiki
15 lines
454 B
Python
15 lines
454 B
Python
|
import re
|
||
|
import os
|
||
|
|
||
|
if os.path.exists('text.txt'):
|
||
|
os.remove('text.txt')
|
||
|
|
||
|
with open('/home/dlindvai/work/train.txt', 'r') as input_file:
|
||
|
with open('/home/dlindvai/work/text.txt', 'a') as output_file:
|
||
|
for line in input_file:
|
||
|
line = line.replace('\n', '')
|
||
|
line = re.sub(r"([\w/'+$\s-]+|[^\w/'+$\s-]+)\s*", r"\1 ", line)
|
||
|
line = line.lower()
|
||
|
line = line.replace('.','.PER').replace(',',',COM').replace('?','?QUE')
|
||
|
output_file.write(line)
|