Compare commits
No commits in common. "f5538d4882c894b056b7e3c1705e9c205ac48895" and "9a9f30f5f6913107606af61f4213e9e886149a98" have entirely different histories.
f5538d4882
...
9a9f30f5f6
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +0,0 @@
|
|||||||
google_api_key.json
|
|
@ -1,7 +0,0 @@
|
|||||||
# Prerequisites
|
|
||||||
Install google SDK from: [https://cloud.google.com/sdk/docs/install](https://cloud.google.com/sdk/docs/install)
|
|
||||||
|
|
||||||
Set up a project in console.google.com
|
|
||||||
Create a service account
|
|
||||||
Create and download a key in json format
|
|
||||||
Export GOOGLE_APPLICATION_CREDENTIALS variable with a path to the key
|
|
@ -1,2 +0,0 @@
|
|||||||
python-dotenv==0.19.1
|
|
||||||
google-cloud-translate==2.0.1
|
|
@ -1,180 +0,0 @@
|
|||||||
{
|
|
||||||
"data": [
|
|
||||||
{
|
|
||||||
"paragraphs": [
|
|
||||||
{
|
|
||||||
"qas": [
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "When did Beyonce start becoming popular?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 269,
|
|
||||||
"text": "in the late 1990s"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56be85543aeaaa14008c9063"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "What areas did Beyonce compete in when she was growing up?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 207,
|
|
||||||
"text": "singing and dancing"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56be85543aeaaa14008c9065"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "When did Beyonce leave Destiny's Child and become a solo singer?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 526,
|
|
||||||
"text": "2003"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56be85543aeaaa14008c9066"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "In what city and state did Beyonce grow up? ",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 166,
|
|
||||||
"text": "Houston, Texas"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56bf6b0f3aeaaa14008c9601"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "In which decade did Beyonce become famous?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 276,
|
|
||||||
"text": "late 1990s"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56bf6b0f3aeaaa14008c9602"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "In what R&B group was she the lead singer?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 320,
|
|
||||||
"text": "Destiny's Child"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56bf6b0f3aeaaa14008c9603"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "What album made her a worldwide known artist?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 505,
|
|
||||||
"text": "Dangerously in Love"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56bf6b0f3aeaaa14008c9604"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "Who managed the Destiny's Child group?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 360,
|
|
||||||
"text": "Mathew Knowles"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56bf6b0f3aeaaa14008c9605"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "When did Beyonc\u00e9 rise to fame?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 276,
|
|
||||||
"text": "late 1990s"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56d43c5f2ccc5a1400d830a9"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "What role did Beyonc\u00e9 have in Destiny's Child?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 290,
|
|
||||||
"text": "lead singer"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56d43c5f2ccc5a1400d830aa"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "What was the first album Beyonc\u00e9 released as a solo artist?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 505,
|
|
||||||
"text": "Dangerously in Love"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56d43c5f2ccc5a1400d830ab"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "When did Beyonc\u00e9 release Dangerously in Love?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 526,
|
|
||||||
"text": "2003"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56d43c5f2ccc5a1400d830ac"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "How many Grammy awards did Beyonc\u00e9 win for her first solo album?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 590,
|
|
||||||
"text": "five"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56d43c5f2ccc5a1400d830ad"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "What was Beyonc\u00e9's role in Destiny's Child?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 290,
|
|
||||||
"text": "lead singer"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56d43ce42ccc5a1400d830b4"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"is_impossible": false,
|
|
||||||
"question": "What was the name of Beyonc\u00e9's first solo album?",
|
|
||||||
"answers": [
|
|
||||||
{
|
|
||||||
"answer_start": 505,
|
|
||||||
"text": "Dangerously in Love"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"id": "56d43ce42ccc5a1400d830b5"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"context": "Beyonc\u00e9 Giselle Knowles-Carter (/bi\u02d0\u02c8j\u0252nse\u026a/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyonc\u00e9's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles \"Crazy in Love\" and \"Baby Boy\"."
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"title": "Beyonc\u00e9"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"version": "v2.0"
|
|
||||||
}
|
|
@ -1,141 +0,0 @@
|
|||||||
import json
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
import six
|
|
||||||
from google.cloud import translate_v2 as translate
|
|
||||||
|
|
||||||
|
|
||||||
def load(filename):
|
|
||||||
with open(filename, "r") as f:
|
|
||||||
squad = json.load(f)
|
|
||||||
|
|
||||||
return squad
|
|
||||||
|
|
||||||
|
|
||||||
def save(filename, squad):
|
|
||||||
with open(filename, "w") as f:
|
|
||||||
json.dump(squad, f, indent=2)
|
|
||||||
|
|
||||||
|
|
||||||
def print_squad(squad, article_limit=100, paragraph_limit=100, qas_limit=100):
|
|
||||||
for article in squad['data'][:article_limit]:
|
|
||||||
print("="*40)
|
|
||||||
print(f"Article title: {article['title']}\n\n")
|
|
||||||
|
|
||||||
for paragraph in article['paragraphs'][:paragraph_limit]:
|
|
||||||
print(f"{paragraph['context']}\n")
|
|
||||||
|
|
||||||
# index = 0
|
|
||||||
# for qas in paragraph['qas'][:qas_limit]:
|
|
||||||
# print(f"Question: {qas['question']}")
|
|
||||||
|
|
||||||
# print(f"Answers:")
|
|
||||||
# answer = qas['answers'][0]
|
|
||||||
# print(f"#{index} @{answer['answer_start']}: \t{answer['text']}")
|
|
||||||
# print(f"#{index} ends @{answer['answer_end']}")
|
|
||||||
# start = answer['answer_start']
|
|
||||||
# end = start + len(answer['text'])
|
|
||||||
# print(f"from context: \t{paragraph['context'][start:end]}")
|
|
||||||
|
|
||||||
# print("\n")
|
|
||||||
# index += 1
|
|
||||||
|
|
||||||
|
|
||||||
def translate_text(text):
|
|
||||||
"""Translates text into the target language.
|
|
||||||
|
|
||||||
Target must be an ISO 639-1 language code.
|
|
||||||
See https://g.co/cloud/translate/v2/translate-reference#supported_languages
|
|
||||||
"""
|
|
||||||
|
|
||||||
translate_client = translate.Client()
|
|
||||||
|
|
||||||
if isinstance(text, six.binary_type):
|
|
||||||
text = text.decode("utf-8")
|
|
||||||
|
|
||||||
# Text can also be a sequence of strings, in which case this method
|
|
||||||
# will return a sequence of results for each text.
|
|
||||||
result = translate_client.translate(text, target_language="sk")
|
|
||||||
|
|
||||||
print(u"Text: {}".format(result["input"]))
|
|
||||||
print(u"Translation: {}".format(result["translatedText"]))
|
|
||||||
print(u"Detected source language: {}".format(result["detectedSourceLanguage"]))
|
|
||||||
|
|
||||||
|
|
||||||
def sort_qas_by_answer_index(squad):
|
|
||||||
for article in squad['data']:
|
|
||||||
for paragraph in article['paragraphs']:
|
|
||||||
impossible_qas = list(filter(lambda qas: qas['is_impossible'] == True, paragraph['qas']))
|
|
||||||
possible_qas = list(filter(lambda qas: qas['is_impossible'] == False, paragraph['qas']))
|
|
||||||
sorted_qas = sorted(possible_qas, key=lambda qas: qas['answers'][0]['answer_start'])
|
|
||||||
|
|
||||||
for qas in sorted_qas:
|
|
||||||
a = qas['answers'][0]
|
|
||||||
a['answer_end'] = a['answer_start'] + len(a['text'])
|
|
||||||
|
|
||||||
paragraph['qas'] = sorted_qas + impossible_qas
|
|
||||||
|
|
||||||
|
|
||||||
def transform_squad(squad):
|
|
||||||
for article in squad['data']:
|
|
||||||
for paragraph in article['paragraphs']:
|
|
||||||
add_special_chars_to_paragraph(paragraph)
|
|
||||||
|
|
||||||
|
|
||||||
def add_special_chars_to_paragraph(paragraph):
|
|
||||||
for counter, qas in enumerate(paragraph['qas']):
|
|
||||||
# Skip if impossible question
|
|
||||||
if qas["is_impossible"] == True: continue
|
|
||||||
|
|
||||||
special_char = f"[{counter}]"
|
|
||||||
|
|
||||||
if len(qas['answers']) > 1 or len(qas['answers']) == 0: continue
|
|
||||||
|
|
||||||
current = qas['answers'][0]
|
|
||||||
|
|
||||||
# Get start index
|
|
||||||
start = current['answer_start']
|
|
||||||
# Calculate end index
|
|
||||||
end = current['answer_end']
|
|
||||||
# Add special chars to context
|
|
||||||
context = paragraph['context']
|
|
||||||
paragraph['context'] = f"{context[:start]}{special_char} {context[start:end]} {special_char}{context[end:]}"
|
|
||||||
|
|
||||||
# Recalculate indexes
|
|
||||||
for q in paragraph['qas'][counter + 1:]: # Skip all answers before and current one
|
|
||||||
if q["is_impossible"] == True: continue
|
|
||||||
|
|
||||||
other = q['answers'][0]
|
|
||||||
|
|
||||||
if other['answer_start'] >= current['answer_start'] and other['answer_end'] <= current["answer_end"]: # Other is being enclosed by current
|
|
||||||
other['answer_start'] += len(special_char) +1
|
|
||||||
other['answer_end'] += 2*len(special_char) +2
|
|
||||||
|
|
||||||
elif other['answer_start'] < current['answer_end']: # Other is enclosing the current one
|
|
||||||
other['answer_start'] += len(special_char) +1
|
|
||||||
other['answer_end'] += len(special_char) +1
|
|
||||||
|
|
||||||
else: # Other is after current
|
|
||||||
other['answer_start'] += 2*len(special_char) +2
|
|
||||||
other['answer_end'] += 2*len(special_char) +2
|
|
||||||
|
|
||||||
# Fix indexes in current answer
|
|
||||||
other = paragraph['qas'][counter]['answers'][0]
|
|
||||||
|
|
||||||
if other == current: # Other answer is the one im working on
|
|
||||||
other['answer_start'] += len(special_char) +1
|
|
||||||
other['answer_end'] += len(special_char) +1
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
squad = load('./squad-v2-dev.json')
|
|
||||||
sort_qas_by_answer_index(squad)
|
|
||||||
transform_squad(squad)
|
|
||||||
print_squad(squad)
|
|
||||||
save("./squad-v2-dev-test-out.json", squad)
|
|
||||||
|
|
||||||
# translate_text("my name is tomas")
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user