From ffc2e6fc51403592198ebb82e5bdd0670c302909 Mon Sep 17 00:00:00 2001 From: Tomas Kucharik Date: Thu, 4 Nov 2021 21:07:23 +0100 Subject: [PATCH] added initial script with squad loading, printing, saving and with example function for google translation, added markdown file with basic setup instructions --- .env | 1 + .gitignore | 1 + google_translate_howto.md | 7 +++++ requirements.txt | 2 ++ squad_translate_google.py | 60 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+) create mode 100644 .env create mode 100644 .gitignore create mode 100644 google_translate_howto.md create mode 100644 requirements.txt create mode 100644 squad_translate_google.py diff --git a/.env b/.env new file mode 100644 index 0000000..3533db2 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +GOOGLE_APPLICATION_CREDENTIALS="./google_api_key.json" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6dcf8d6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +google_api_key.json diff --git a/google_translate_howto.md b/google_translate_howto.md new file mode 100644 index 0000000..c23fc1e --- /dev/null +++ b/google_translate_howto.md @@ -0,0 +1,7 @@ +# Prerequisites +Install google SDK from: [https://cloud.google.com/sdk/docs/install](https://cloud.google.com/sdk/docs/install) + +Set up a project in console.google.com +Create a service account +Create and download a key in json format +Export GOOGLE_APPLICATION_CREDENTIALS variable with a path to the key diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1eeb3e2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +python-dotenv==0.19.1 +google-cloud-translate==2.0.1 diff --git a/squad_translate_google.py b/squad_translate_google.py new file mode 100644 index 0000000..e46ae57 --- /dev/null +++ b/squad_translate_google.py @@ -0,0 +1,60 @@ +import json +from dotenv import load_dotenv +import six +from google.cloud import translate_v2 as translate + +def load_squad(filename): + with open(filename, "r") as f: + squad = json.load(f) + + return squad + +def save_squad(filename, squad): + with open(filename, "w") as f: + json.dump(squad, f) + +def print_squad(squad, article_limit=2, paragraph_limit=3, qas_limit=5): + for article in squad['data'][:article_limit]: + print("="*40) + print(f"Article title: {article['title']}\n\n") + + for paragraph in article['paragraphs'][:paragraph_limit]: + print(f"{paragraph['context']}\n") + + for qas in paragraph['qas'][:qas_limit]: + print(f"Question: {qas['question']}") + + print(f"Answers:") + for answer in qas['answers']: + print(f"\t{answer['text']}") + + print("\n") + +def translate_text(target, text): + """Translates text into the target language. + + Target must be an ISO 639-1 language code. + See https://g.co/cloud/translate/v2/translate-reference#supported_languages + """ + + translate_client = translate.Client() + + if isinstance(text, six.binary_type): + text = text.decode("utf-8") + + # Text can also be a sequence of strings, in which case this method + # will return a sequence of results for each text. + result = translate_client.translate(text, target_language=target) + + print(u"Text: {}".format(result["input"])) + print(u"Translation: {}".format(result["translatedText"])) + print(u"Detected source language: {}".format(result["detectedSourceLanguage"])) + + +if __name__ == "__main__": + load_dotenv() + + orig_squad = load_squad('./squad-v2-dev.json') + # print_squad(orig_squad) + +