2024-10-23 11:36:14 +00:00
|
|
|
|
import csv
|
|
|
|
|
|
|
|
|
|
# Открываем файлы с правильными и неправильными строками
|
2024-11-08 16:59:22 +00:00
|
|
|
|
with open('cleaned_book.txt', 'r', encoding='utf-8', errors='replace') as correct_file:
|
2024-10-23 11:36:14 +00:00
|
|
|
|
correct_lines = correct_file.readlines()
|
|
|
|
|
|
2024-11-10 11:40:38 +00:00
|
|
|
|
with open('cleaned_book_typos.txt', 'r', encoding='utf-8', errors='replace') as incorrect_file:
|
2024-10-23 11:36:14 +00:00
|
|
|
|
incorrect_lines = incorrect_file.readlines()
|
|
|
|
|
|
|
|
|
|
# Убедимся, что оба списка строк имеют одинаковую длину
|
|
|
|
|
max_length = max(len(correct_lines), len(incorrect_lines))
|
|
|
|
|
|
|
|
|
|
# Дополняем более короткий список пустыми строками, если это необходимо
|
|
|
|
|
correct_lines += [''] * (max_length - len(correct_lines))
|
|
|
|
|
incorrect_lines += [''] * (max_length - len(incorrect_lines))
|
|
|
|
|
|
|
|
|
|
# Открываем CSV файл для записи
|
2024-11-10 11:40:38 +00:00
|
|
|
|
with open('dataset_book_typos.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
2024-10-23 11:36:14 +00:00
|
|
|
|
fieldnames = ['correct', 'incorrect']
|
|
|
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
|
|
|
|
|
|
|
|
# Записываем заголовок
|
|
|
|
|
writer.writeheader()
|
|
|
|
|
|
|
|
|
|
# Записываем строки в CSV файл
|
|
|
|
|
for correct_line, incorrect_line in zip(correct_lines, incorrect_lines):
|
|
|
|
|
writer.writerow({'correct': correct_line.strip(), 'incorrect': incorrect_line.strip()})
|
|
|
|
|
|
|
|
|
|
print("CSV файл успешно создан!")
|
|
|
|
|
|