46 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			46 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import json
 | |
| 
 | |
| squad = None
 | |
| 
 | |
| with open("squad-v2-dev.json", "r", encoding="utf-8") as f:
 | |
|     squad = json.load(f)
 | |
| 
 | |
| num_articles = len(squad['data'])
 | |
| print(f"total articles: {num_articles}")
 | |
| 
 | |
| context_chars = 0
 | |
| question_chars = 0
 | |
| answer_chars = 0
 | |
| 
 | |
| total_paragraphs = 0
 | |
| total_qas = 0
 | |
| total_answers = 0
 | |
| for article in squad['data']:
 | |
|     total_paragraphs += len(article['paragraphs'])
 | |
| 
 | |
|     for paragraph in article['paragraphs']:
 | |
|         context_chars += len(paragraph['context'])
 | |
| 
 | |
|         total_qas += len(paragraph['qas'])
 | |
| 
 | |
|         for qas in paragraph['qas']:
 | |
|             question_chars += len(qas['question'])
 | |
| 
 | |
|             total_answers += len(qas['answers'])
 | |
| 
 | |
|             for answer in qas['answers']:
 | |
|                 answer_chars += len(answer['text'])
 | |
| 
 | |
| print(f"total paragraphs: {total_paragraphs}")
 | |
| print(f"total qas: {total_qas}")
 | |
| print(f"total answers: {total_answers}")
 | |
| 
 | |
| print(f"chars in contexts: {context_chars}")
 | |
| print(f"chars in questions: {question_chars}")
 | |
| print(f"chars in answers: {answer_chars}")
 | |
| 
 | |
| total_chars = context_chars + question_chars + answer_chars
 | |
| 
 | |
| print(f"total chars: {total_chars}")
 | |
| 
 |