book dataset was generated
This commit is contained in:
parent
6137eeef63
commit
794c7d4c4d
@ -1,10 +1,10 @@
|
||||
import csv
|
||||
|
||||
# Открываем файлы с правильными и неправильными строками
|
||||
with open('slovak_no_caps.txt', 'r', encoding='utf-8', errors='replace') as correct_file:
|
||||
with open('cleaned_book.txt', 'r', encoding='utf-8', errors='replace') as correct_file:
|
||||
correct_lines = correct_file.readlines()
|
||||
|
||||
with open('no_slovak_no_caps.txt', 'r', encoding='utf-8', errors='replace') as incorrect_file:
|
||||
with open('book_no_slovak_output.txt', 'r', encoding='utf-8', errors='replace') as incorrect_file:
|
||||
incorrect_lines = incorrect_file.readlines()
|
||||
|
||||
# Убедимся, что оба списка строк имеют одинаковую длину
|
||||
@ -15,7 +15,7 @@ correct_lines += [''] * (max_length - len(correct_lines))
|
||||
incorrect_lines += [''] * (max_length - len(incorrect_lines))
|
||||
|
||||
# Открываем CSV файл для записи
|
||||
with open('dataset.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
||||
with open('dataset_book.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
||||
fieldnames = ['correct', 'incorrect']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
|
12716
dataset_book.csv
Normal file
12716
dataset_book.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user