book dataset was generated
This commit is contained in:
parent
6137eeef63
commit
794c7d4c4d
@ -1,10 +1,10 @@
|
|||||||
import csv
|
import csv
|
||||||
|
|
||||||
# Открываем файлы с правильными и неправильными строками
|
# Открываем файлы с правильными и неправильными строками
|
||||||
with open('slovak_no_caps.txt', 'r', encoding='utf-8', errors='replace') as correct_file:
|
with open('cleaned_book.txt', 'r', encoding='utf-8', errors='replace') as correct_file:
|
||||||
correct_lines = correct_file.readlines()
|
correct_lines = correct_file.readlines()
|
||||||
|
|
||||||
with open('no_slovak_no_caps.txt', 'r', encoding='utf-8', errors='replace') as incorrect_file:
|
with open('book_no_slovak_output.txt', 'r', encoding='utf-8', errors='replace') as incorrect_file:
|
||||||
incorrect_lines = incorrect_file.readlines()
|
incorrect_lines = incorrect_file.readlines()
|
||||||
|
|
||||||
# Убедимся, что оба списка строк имеют одинаковую длину
|
# Убедимся, что оба списка строк имеют одинаковую длину
|
||||||
@ -15,7 +15,7 @@ correct_lines += [''] * (max_length - len(correct_lines))
|
|||||||
incorrect_lines += [''] * (max_length - len(incorrect_lines))
|
incorrect_lines += [''] * (max_length - len(incorrect_lines))
|
||||||
|
|
||||||
# Открываем CSV файл для записи
|
# Открываем CSV файл для записи
|
||||||
with open('dataset.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
with open('dataset_book.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
||||||
fieldnames = ['correct', 'incorrect']
|
fieldnames = ['correct', 'incorrect']
|
||||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||||
|
|
||||||
|
12716
dataset_book.csv
Normal file
12716
dataset_book.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user