zpwiki/pages/students/2016/jakub_maruniak/dp2021/annotation/count.py

60 lines
1.7 KiB
Python
Raw Normal View History

# load data
2020-11-24 10:32:09 +00:00
filename = 'ner/skner/skner.jsonl'
file = open(filename, 'rt', encoding='utf-8')
text = file.read()
2020-11-24 10:32:09 +00:00
# count articles
countAccept = text.count('accept')
countReject = text.count('reject')
countSkip = text.count('ignore')
countSpans = text.count('tokens')
# count entities
countPER = text.count('PER')
countLOC = text.count('LOC')
countORG = text.count('ORG')
countMISC = text.count('MISC')
2020-11-24 10:32:09 +00:00
underline = '\033[04m'
reset = '\033[0m'
red = '\033[31m'
green='\033[32m'
gray='\033[37m'
# table v1
#from lib import TableIt
#table1 = [
# ['Prijatých', countAccept],
# ['Zamietnutých', countReject],
# ['Preskočených', countSkip],
# ['------------', '------------'],
# ['Spolu', countSpans]
#]
#
#table = [
# ['Entita', 'Počet'],
# ['PER', countPER],
# ['LOC', countLOC],
# ['ORG', countORG],
# ['MISC', countMISC]
#]
#print('\nPočet anotovaných článkov:')
#TableIt.printTable(table1)
#print('\nPočet jednotlivých entít:')
#TableIt.printTable(table, useFieldNames=True, color=(26, 156, 171))
# table v2
print(underline + '\nPočet anotovaných článkov:' + reset)
print(green + "%-15s %-20s" %("Prijatých", countAccept) + reset)
print(red + "%-15s %-15s" %("Zamietnutých", countReject) + reset)
print(gray + "%-15s %-15s" %("Preskočených", countSkip) + reset)
print("%-15s" %("---------------------"))
print("%-15s %-15s" %("Spolu", countSpans))
print(underline + '\nPočet jednotlivých entít:' + reset)
print("%-10s %-10s" %("Entita:", "Počet:"))
print("%-10s" %("----------------"))
print("%-10s %-10s" %("PER", countPER))
print("%-10s %-10s" %("LOC", countLOC))
print("%-10s %-10s" %("ORG", countORG))
print("%-10s %-10s" %("MISC", countMISC))