46 lines
1.1 KiB
Python
46 lines
1.1 KiB
Python
import json
|
|
|
|
squad = None
|
|
|
|
with open("squad-v2-dev.json", "r", encoding="utf-8") as f:
|
|
squad = json.load(f)
|
|
|
|
num_articles = len(squad['data'])
|
|
print(f"total articles: {num_articles}")
|
|
|
|
context_chars = 0
|
|
question_chars = 0
|
|
answer_chars = 0
|
|
|
|
total_paragraphs = 0
|
|
total_qas = 0
|
|
total_answers = 0
|
|
for article in squad['data']:
|
|
total_paragraphs += len(article['paragraphs'])
|
|
|
|
for paragraph in article['paragraphs']:
|
|
context_chars += len(paragraph['context'])
|
|
|
|
total_qas += len(paragraph['qas'])
|
|
|
|
for qas in paragraph['qas']:
|
|
question_chars += len(qas['question'])
|
|
|
|
total_answers += len(qas['answers'])
|
|
|
|
for answer in qas['answers']:
|
|
answer_chars += len(answer['text'])
|
|
|
|
print(f"total paragraphs: {total_paragraphs}")
|
|
print(f"total qas: {total_qas}")
|
|
print(f"total answers: {total_answers}")
|
|
|
|
print(f"chars in contexts: {context_chars}")
|
|
print(f"chars in questions: {question_chars}")
|
|
print(f"chars in answers: {answer_chars}")
|
|
|
|
total_chars = context_chars + question_chars + answer_chars
|
|
|
|
print(f"total chars: {total_chars}")
|
|
|