dp2022/squad_translate_google.py

61 lines
1.7 KiB
Python
Raw Normal View History

import json
from dotenv import load_dotenv
import six
from google.cloud import translate_v2 as translate
def load_squad(filename):
with open(filename, "r") as f:
squad = json.load(f)
return squad
def save_squad(filename, squad):
with open(filename, "w") as f:
json.dump(squad, f)
def print_squad(squad, article_limit=2, paragraph_limit=3, qas_limit=5):
for article in squad['data'][:article_limit]:
print("="*40)
print(f"Article title: {article['title']}\n\n")
for paragraph in article['paragraphs'][:paragraph_limit]:
print(f"{paragraph['context']}\n")
for qas in paragraph['qas'][:qas_limit]:
print(f"Question: {qas['question']}")
print(f"Answers:")
for answer in qas['answers']:
print(f"\t{answer['text']}")
print("\n")
def translate_text(target, text):
"""Translates text into the target language.
Target must be an ISO 639-1 language code.
See https://g.co/cloud/translate/v2/translate-reference#supported_languages
"""
translate_client = translate.Client()
if isinstance(text, six.binary_type):
text = text.decode("utf-8")
# Text can also be a sequence of strings, in which case this method
# will return a sequence of results for each text.
result = translate_client.translate(text, target_language=target)
print(u"Text: {}".format(result["input"]))
print(u"Translation: {}".format(result["translatedText"]))
print(u"Detected source language: {}".format(result["detectedSourceLanguage"]))
if __name__ == "__main__":
load_dotenv()
orig_squad = load_squad('./squad-v2-dev.json')
# print_squad(orig_squad)