diff --git a/pages/students/2016/jakub_maruniak/dp2021/annotation/count.py b/pages/students/2016/jakub_maruniak/dp2021/annotation/count.py index c4f7fe0db..0178df39d 100644 --- a/pages/students/2016/jakub_maruniak/dp2021/annotation/count.py +++ b/pages/students/2016/jakub_maruniak/dp2021/annotation/count.py @@ -1,14 +1,60 @@ # load data -filename = 'ner/annotations.jsonl' + +filename = 'ner/skner/skner.jsonl' file = open(filename, 'rt', encoding='utf-8') text = file.read() -# count entity PER +# count articles +countAccept = text.count('accept') +countReject = text.count('reject') +countSkip = text.count('ignore') +countSpans = text.count('tokens') +# count entities countPER = text.count('PER') countLOC = text.count('LOC') countORG = text.count('ORG') countMISC = text.count('MISC') -print('Počet anotovaných entít typu PER:', countPER,'\n', - 'Počet anotovaných entít typu LOC:', countLOC,'\n', - 'Počet anotovaných entít typu ORG:', countORG,'\n', - 'Počet anotovaných entít typu MISC:', countMISC,'\n') \ No newline at end of file + +underline = '\033[04m' +reset = '\033[0m' +red = '\033[31m' +green='\033[32m' +gray='\033[37m' + +# table v1 +#from lib import TableIt +#table1 = [ +# ['Prijatých', countAccept], +# ['Zamietnutých', countReject], +# ['Preskočených', countSkip], +# ['------------', '------------'], +# ['Spolu', countSpans] +#] +# +#table = [ +# ['Entita', 'Počet'], +# ['PER', countPER], +# ['LOC', countLOC], +# ['ORG', countORG], +# ['MISC', countMISC] +#] +#print('\nPočet anotovaných článkov:') +#TableIt.printTable(table1) +#print('\nPočet jednotlivých entít:') +#TableIt.printTable(table, useFieldNames=True, color=(26, 156, 171)) + +# table v2 +print(underline + '\nPočet anotovaných článkov:' + reset) +print(green + "%-15s %-20s" %("Prijatých", countAccept) + reset) +print(red + "%-15s %-15s" %("Zamietnutých", countReject) + reset) +print(gray + "%-15s %-15s" %("Preskočených", countSkip) + reset) +print("%-15s" %("---------------------")) +print("%-15s %-15s" %("Spolu", countSpans)) + +print(underline + '\nPočet jednotlivých entít:' + reset) +print("%-10s %-10s" %("Entita:", "Počet:")) +print("%-10s" %("----------------")) +print("%-10s %-10s" %("PER", countPER)) +print("%-10s %-10s" %("LOC", countLOC)) +print("%-10s %-10s" %("ORG", countORG)) +print("%-10s %-10s" %("MISC", countMISC)) \ No newline at end of file