book dataset was generated

2024-11-08 17:59:22 +01:00 · 2024-11-08 17:59:22 +01:00 · 794c7d4c4d
commit 794c7d4c4d
parent 6137eeef63
2 changed files with 12719 additions and 3 deletions
--- a/csv_create.py
+++ b/csv_create.py
@ -1,10 +1,10 @@
 import csv

 # Открываем файлы с правильными и неправильными строками
-with open('slovak_no_caps.txt', 'r', encoding='utf-8', errors='replace') as correct_file:
+with open('cleaned_book.txt', 'r', encoding='utf-8', errors='replace') as correct_file:
    correct_lines = correct_file.readlines()

-with open('no_slovak_no_caps.txt', 'r', encoding='utf-8', errors='replace') as incorrect_file:
+with open('book_no_slovak_output.txt', 'r', encoding='utf-8', errors='replace') as incorrect_file:
    incorrect_lines = incorrect_file.readlines()

 # Убедимся, что оба списка строк имеют одинаковую длину
@ -15,7 +15,7 @@ correct_lines += [''] * (max_length - len(correct_lines))
 incorrect_lines += [''] * (max_length - len(incorrect_lines))

 # Открываем CSV файл для записи
-with open('dataset.csv', 'w', newline='', encoding='utf-8') as csvfile:
+with open('dataset_book.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['correct', 'incorrect']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

--- a/dataset_book.csv
+++ b/dataset_book.csv