DP2024/slovak_punction2.py

# import modulov
from transformers import pipeline
from nltk.tokenize import sent_tokenize
import nltk

nltk.download('punkt')

# funkcia na obnovu interpunkcie
def restore_punctuation(text):
    sents = sent_tokenize(text)
    new_text = ""
    labels = ['.', '!', ',', ':', '?', '-', ";"]

    for sent in sents:
        sent = ''.join(ch for ch in sent if ch not in labels)
        text_word = sent.split()
        words = text_word[:]

        unmasker = pipeline('fill-mask', model='gerulata/slovakbert')

        for i in range(1, len(text_word) + 1):
            text_word.insert(i, '<mask>')
            sent = " ".join(text_word)
            text_with_punc = unmasker(sent)

            if text_with_punc[0]['token_str'] in labels:
                words[i - 1] = words[i - 1] + text_with_punc[0]['token_str']

            text_word = words[:]

        new_text += " ".join(words)

    return new_text

# Zadanie textu pre opravu interpunkcie
input_text = input("Zadajte text na opravu interpunkcie: ")
output_text = restore_punctuation(input_text)

# Výpis pôvodného a opraveného textu
print("Pôvodný text:", input_text)
print("Opravený text:", output_text)
Upload files to '' 2023-11-28 13:31:11 +00:00			`# import modulov`
			`from transformers import pipeline`
			`from nltk.tokenize import sent_tokenize`
			`import nltk`

			`nltk.download('punkt')`

			`# funkcia na obnovu interpunkcie`
			`def restore_punctuation(text):`
			`sents = sent_tokenize(text)`
			`new_text = ""`
			`labels = ['.', '!', ',', ':', '?', '-', ";"]`

			`for sent in sents:`
			`sent = ''.join(ch for ch in sent if ch not in labels)`
			`text_word = sent.split()`
			`words = text_word[:]`

			`unmasker = pipeline('fill-mask', model='gerulata/slovakbert')`

			`for i in range(1, len(text_word) + 1):`
			`text_word.insert(i, '<mask>')`
			`sent = " ".join(text_word)`
			`text_with_punc = unmasker(sent)`

			`if text_with_punc[0]['token_str'] in labels:`
			`words[i - 1] = words[i - 1] + text_with_punc[0]['token_str']`

			`text_word = words[:]`

			`new_text += " ".join(words)`

			`return new_text`

			`# Zadanie textu pre opravu interpunkcie`
			`input_text = input("Zadajte text na opravu interpunkcie: ")`
			`output_text = restore_punctuation(input_text)`

			`# Výpis pôvodného a opraveného textu`
			`print("Pôvodný text:", input_text)`
			`print("Opravený text:", output_text)`