working special character addition to context
This commit is contained in:
		
							parent
							
								
									ffc2e6fc51
								
							
						
					
					
						commit
						f5538d4882
					
				
							
								
								
									
										180
									
								
								squad-v2-dev-test.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										180
									
								
								squad-v2-dev-test.json
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,180 @@
 | 
				
			|||||||
 | 
					{
 | 
				
			||||||
 | 
					    "data": [
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "paragraphs": [
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    "qas": [
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "When did Beyonce start becoming popular?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 269,
 | 
				
			||||||
 | 
					                                    "text": "in the late 1990s"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56be85543aeaaa14008c9063"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "What areas did Beyonce compete in when she was growing up?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 207,
 | 
				
			||||||
 | 
					                                    "text": "singing and dancing"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56be85543aeaaa14008c9065"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "When did Beyonce leave Destiny's Child and become a solo singer?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 526,
 | 
				
			||||||
 | 
					                                    "text": "2003"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56be85543aeaaa14008c9066"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "In what city and state did Beyonce  grow up? ",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 166,
 | 
				
			||||||
 | 
					                                    "text": "Houston, Texas"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56bf6b0f3aeaaa14008c9601"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "In which decade did Beyonce become famous?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 276,
 | 
				
			||||||
 | 
					                                    "text": "late 1990s"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56bf6b0f3aeaaa14008c9602"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "In what R&B group was she the lead singer?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 320,
 | 
				
			||||||
 | 
					                                    "text": "Destiny's Child"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56bf6b0f3aeaaa14008c9603"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "What album made her a worldwide known artist?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 505,
 | 
				
			||||||
 | 
					                                    "text": "Dangerously in Love"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56bf6b0f3aeaaa14008c9604"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "Who managed the Destiny's Child group?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 360,
 | 
				
			||||||
 | 
					                                    "text": "Mathew Knowles"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56bf6b0f3aeaaa14008c9605"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "When did Beyonc\u00e9 rise to fame?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 276,
 | 
				
			||||||
 | 
					                                    "text": "late 1990s"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56d43c5f2ccc5a1400d830a9"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "What role did Beyonc\u00e9 have in Destiny's Child?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 290,
 | 
				
			||||||
 | 
					                                    "text": "lead singer"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56d43c5f2ccc5a1400d830aa"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "What was the first album Beyonc\u00e9 released as a solo artist?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 505,
 | 
				
			||||||
 | 
					                                    "text": "Dangerously in Love"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56d43c5f2ccc5a1400d830ab"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "When did Beyonc\u00e9 release Dangerously in Love?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 526,
 | 
				
			||||||
 | 
					                                    "text": "2003"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56d43c5f2ccc5a1400d830ac"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "How many Grammy awards did Beyonc\u00e9 win for her first solo album?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 590,
 | 
				
			||||||
 | 
					                                    "text": "five"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56d43c5f2ccc5a1400d830ad"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "What was Beyonc\u00e9's role in Destiny's Child?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 290,
 | 
				
			||||||
 | 
					                                    "text": "lead singer"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56d43ce42ccc5a1400d830b4"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            "is_impossible": false,
 | 
				
			||||||
 | 
					                            "question": "What was the name of Beyonc\u00e9's first solo album?",
 | 
				
			||||||
 | 
					                            "answers": [
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    "answer_start": 505,
 | 
				
			||||||
 | 
					                                    "text": "Dangerously in Love"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            ],
 | 
				
			||||||
 | 
					                            "id": "56d43ce42ccc5a1400d830b5"
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    ],
 | 
				
			||||||
 | 
					                    "context": "Beyonc\u00e9 Giselle Knowles-Carter (/bi\u02d0\u02c8j\u0252nse\u026a/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyonc\u00e9's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles \"Crazy in Love\" and \"Baby Boy\"."
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "title": "Beyonc\u00e9"
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					    "version": "v2.0"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@ -1,19 +1,23 @@
 | 
				
			|||||||
import json
 | 
					import json
 | 
				
			||||||
from dotenv import load_dotenv
 | 
					from dotenv import load_dotenv
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import six
 | 
					import six
 | 
				
			||||||
from google.cloud import translate_v2 as translate
 | 
					from google.cloud import translate_v2 as translate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load_squad(filename):
 | 
					
 | 
				
			||||||
 | 
					def load(filename):
 | 
				
			||||||
    with open(filename, "r") as f:
 | 
					    with open(filename, "r") as f:
 | 
				
			||||||
        squad = json.load(f)
 | 
					        squad = json.load(f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return squad
 | 
					    return squad
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def save_squad(filename, squad):
 | 
					 | 
				
			||||||
    with open(filename, "w") as f:
 | 
					 | 
				
			||||||
        json.dump(squad, f)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def print_squad(squad, article_limit=2, paragraph_limit=3, qas_limit=5):
 | 
					def save(filename, squad):
 | 
				
			||||||
 | 
					    with open(filename, "w") as f:
 | 
				
			||||||
 | 
					        json.dump(squad, f, indent=2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def print_squad(squad, article_limit=100, paragraph_limit=100, qas_limit=100):
 | 
				
			||||||
    for article in squad['data'][:article_limit]:
 | 
					    for article in squad['data'][:article_limit]:
 | 
				
			||||||
        print("="*40)
 | 
					        print("="*40)
 | 
				
			||||||
        print(f"Article title: {article['title']}\n\n")
 | 
					        print(f"Article title: {article['title']}\n\n")
 | 
				
			||||||
@ -21,16 +25,23 @@ def print_squad(squad, article_limit=2, paragraph_limit=3, qas_limit=5):
 | 
				
			|||||||
        for paragraph in article['paragraphs'][:paragraph_limit]:
 | 
					        for paragraph in article['paragraphs'][:paragraph_limit]:
 | 
				
			||||||
            print(f"{paragraph['context']}\n")
 | 
					            print(f"{paragraph['context']}\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for qas in paragraph['qas'][:qas_limit]:
 | 
					            # index = 0
 | 
				
			||||||
                print(f"Question: {qas['question']}")
 | 
					            # for qas in paragraph['qas'][:qas_limit]:
 | 
				
			||||||
 | 
					            #     print(f"Question: {qas['question']}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                print(f"Answers:")
 | 
					            #     print(f"Answers:")
 | 
				
			||||||
                for answer in qas['answers']:
 | 
					            #     answer = qas['answers'][0]
 | 
				
			||||||
                    print(f"\t{answer['text']}")
 | 
					            #     print(f"#{index} @{answer['answer_start']}: \t{answer['text']}")
 | 
				
			||||||
 | 
					            #     print(f"#{index} ends @{answer['answer_end']}")
 | 
				
			||||||
 | 
					            #     start = answer['answer_start']
 | 
				
			||||||
 | 
					            #     end = start + len(answer['text'])
 | 
				
			||||||
 | 
					            #     print(f"from context: \t{paragraph['context'][start:end]}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                print("\n")
 | 
					            #     print("\n")
 | 
				
			||||||
 | 
					            #     index += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def translate_text(target, text):
 | 
					
 | 
				
			||||||
 | 
					def translate_text(text):
 | 
				
			||||||
    """Translates text into the target language.
 | 
					    """Translates text into the target language.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Target must be an ISO 639-1 language code.
 | 
					    Target must be an ISO 639-1 language code.
 | 
				
			||||||
@ -44,17 +55,87 @@ def translate_text(target, text):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    # Text can also be a sequence of strings, in which case this method
 | 
					    # Text can also be a sequence of strings, in which case this method
 | 
				
			||||||
    # will return a sequence of results for each text.
 | 
					    # will return a sequence of results for each text.
 | 
				
			||||||
    result = translate_client.translate(text, target_language=target)
 | 
					    result = translate_client.translate(text, target_language="sk")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print(u"Text: {}".format(result["input"]))
 | 
					    print(u"Text: {}".format(result["input"]))
 | 
				
			||||||
    print(u"Translation: {}".format(result["translatedText"]))
 | 
					    print(u"Translation: {}".format(result["translatedText"]))
 | 
				
			||||||
    print(u"Detected source language: {}".format(result["detectedSourceLanguage"]))
 | 
					    print(u"Detected source language: {}".format(result["detectedSourceLanguage"]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def sort_qas_by_answer_index(squad):
 | 
				
			||||||
 | 
					    for article in squad['data']:
 | 
				
			||||||
 | 
					        for paragraph in article['paragraphs']:
 | 
				
			||||||
 | 
					            impossible_qas = list(filter(lambda qas: qas['is_impossible'] == True, paragraph['qas']))
 | 
				
			||||||
 | 
					            possible_qas = list(filter(lambda qas: qas['is_impossible'] == False, paragraph['qas']))
 | 
				
			||||||
 | 
					            sorted_qas = sorted(possible_qas, key=lambda qas: qas['answers'][0]['answer_start'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for qas in sorted_qas:
 | 
				
			||||||
 | 
					                a = qas['answers'][0]
 | 
				
			||||||
 | 
					                a['answer_end'] = a['answer_start'] + len(a['text'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            paragraph['qas'] = sorted_qas + impossible_qas
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def transform_squad(squad):
 | 
				
			||||||
 | 
					    for article in squad['data']:
 | 
				
			||||||
 | 
					        for paragraph in article['paragraphs']:
 | 
				
			||||||
 | 
					            add_special_chars_to_paragraph(paragraph)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def add_special_chars_to_paragraph(paragraph):
 | 
				
			||||||
 | 
					    for counter, qas in enumerate(paragraph['qas']):
 | 
				
			||||||
 | 
					        # Skip if impossible question
 | 
				
			||||||
 | 
					        if qas["is_impossible"] == True: continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        special_char = f"[{counter}]"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if len(qas['answers']) > 1 or len(qas['answers']) == 0: continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        current = qas['answers'][0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Get start index
 | 
				
			||||||
 | 
					        start = current['answer_start']
 | 
				
			||||||
 | 
					        # Calculate end index
 | 
				
			||||||
 | 
					        end = current['answer_end']
 | 
				
			||||||
 | 
					        # Add special chars to context
 | 
				
			||||||
 | 
					        context = paragraph['context']
 | 
				
			||||||
 | 
					        paragraph['context'] = f"{context[:start]}{special_char} {context[start:end]} {special_char}{context[end:]}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Recalculate indexes
 | 
				
			||||||
 | 
					        for q in paragraph['qas'][counter + 1:]: # Skip all answers before and current one
 | 
				
			||||||
 | 
					            if q["is_impossible"] == True: continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            other = q['answers'][0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if other['answer_start'] >= current['answer_start'] and other['answer_end'] <= current["answer_end"]: # Other is being enclosed by current
 | 
				
			||||||
 | 
					                other['answer_start'] += len(special_char) +1
 | 
				
			||||||
 | 
					                other['answer_end'] += 2*len(special_char) +2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            elif other['answer_start'] < current['answer_end']: # Other is enclosing the current one
 | 
				
			||||||
 | 
					                other['answer_start'] += len(special_char) +1
 | 
				
			||||||
 | 
					                other['answer_end'] += len(special_char) +1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            else: # Other is after current
 | 
				
			||||||
 | 
					                other['answer_start'] += 2*len(special_char) +2
 | 
				
			||||||
 | 
					                other['answer_end'] += 2*len(special_char) +2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Fix indexes in current answer
 | 
				
			||||||
 | 
					        other = paragraph['qas'][counter]['answers'][0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if other == current: # Other answer is the one im working on
 | 
				
			||||||
 | 
					            other['answer_start'] += len(special_char) +1
 | 
				
			||||||
 | 
					            other['answer_end'] += len(special_char) +1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
    load_dotenv()
 | 
					    load_dotenv()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    orig_squad = load_squad('./squad-v2-dev.json')
 | 
					    squad = load('./squad-v2-dev.json')
 | 
				
			||||||
    # print_squad(orig_squad)
 | 
					    sort_qas_by_answer_index(squad)
 | 
				
			||||||
 | 
					    transform_squad(squad)
 | 
				
			||||||
 | 
					    print_squad(squad)
 | 
				
			||||||
 | 
					    save("./squad-v2-dev-test-out.json", squad)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # translate_text("my name is tomas")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user