From 893a25d5f076344fb24f1b0cb8ccd5a497416c64 Mon Sep 17 00:00:00 2001 From: Jakub Maruniak Date: Tue, 24 Nov 2020 10:32:09 +0000 Subject: [PATCH] update --- .../jakub_maruniak/dp2021/annotation/count.py | 58 +++++++++++++++++-- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/pages/students/2016/jakub_maruniak/dp2021/annotation/count.py b/pages/students/2016/jakub_maruniak/dp2021/annotation/count.py index c4f7fe0dbe..0178df39d9 100644 --- a/pages/students/2016/jakub_maruniak/dp2021/annotation/count.py +++ b/pages/students/2016/jakub_maruniak/dp2021/annotation/count.py @@ -1,14 +1,60 @@ # load data -filename = 'ner/annotations.jsonl' + +filename = 'ner/skner/skner.jsonl' file = open(filename, 'rt', encoding='utf-8') text = file.read() -# count entity PER +# count articles +countAccept = text.count('accept') +countReject = text.count('reject') +countSkip = text.count('ignore') +countSpans = text.count('tokens') +# count entities countPER = text.count('PER') countLOC = text.count('LOC') countORG = text.count('ORG') countMISC = text.count('MISC') -print('Počet anotovaných entít typu PER:', countPER,'\n', - 'Počet anotovaných entít typu LOC:', countLOC,'\n', - 'Počet anotovaných entít typu ORG:', countORG,'\n', - 'Počet anotovaných entít typu MISC:', countMISC,'\n') \ No newline at end of file + +underline = '\033[04m' +reset = '\033[0m' +red = '\033[31m' +green='\033[32m' +gray='\033[37m' + +# table v1 +#from lib import TableIt +#table1 = [ +# ['Prijatých', countAccept], +# ['Zamietnutých', countReject], +# ['Preskočených', countSkip], +# ['------------', '------------'], +# ['Spolu', countSpans] +#] +# +#table = [ +# ['Entita', 'Počet'], +# ['PER', countPER], +# ['LOC', countLOC], +# ['ORG', countORG], +# ['MISC', countMISC] +#] +#print('\nPočet anotovaných článkov:') +#TableIt.printTable(table1) +#print('\nPočet jednotlivých entít:') +#TableIt.printTable(table, useFieldNames=True, color=(26, 156, 171)) + +# table v2 +print(underline + '\nPočet anotovaných článkov:' + reset) +print(green + "%-15s %-20s" %("Prijatých", countAccept) + reset) +print(red + "%-15s %-15s" %("Zamietnutých", countReject) + reset) +print(gray + "%-15s %-15s" %("Preskočených", countSkip) + reset) +print("%-15s" %("---------------------")) +print("%-15s %-15s" %("Spolu", countSpans)) + +print(underline + '\nPočet jednotlivých entít:' + reset) +print("%-10s %-10s" %("Entita:", "Počet:")) +print("%-10s" %("----------------")) +print("%-10s %-10s" %("PER", countPER)) +print("%-10s %-10s" %("LOC", countLOC)) +print("%-10s %-10s" %("ORG", countORG)) +print("%-10s %-10s" %("MISC", countMISC)) \ No newline at end of file