Bakalarska_praca/convert_html.py

from bs4 import BeautifulSoup

# Открываем HTML файл
with open('skoly_ludskych_vztah_59421.html', 'r', encoding='windows-1250') as file:
    html_content = file.read()

# Парсим HTML с BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Извлекаем текст, удаляя все HTML-теги
text = soup.get_text()

# Записываем текст в файл (или делаем с ним что-то другое)
with open('skoly.txt', 'w', encoding='utf-8') as output_file:
    output_file.write(text)

print("Текст успешно извлечён!")