added initial script with squad loading, printing, saving and with example function for google translation, added markdown file with basic setup instructions

This commit is contained in:
Tomas Kucharik 2021-11-04 21:07:23 +01:00
parent 9a9f30f5f6
commit ffc2e6fc51
5 changed files with 71 additions and 0 deletions

1
.env Normal file
View File

@ -0,0 +1 @@
GOOGLE_APPLICATION_CREDENTIALS="./google_api_key.json"

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
google_api_key.json

View File

@ -0,0 +1,7 @@
# Prerequisites
Install google SDK from: [https://cloud.google.com/sdk/docs/install](https://cloud.google.com/sdk/docs/install)
Set up a project in console.google.com
Create a service account
Create and download a key in json format
Export GOOGLE_APPLICATION_CREDENTIALS variable with a path to the key

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
python-dotenv==0.19.1
google-cloud-translate==2.0.1

60
squad_translate_google.py Normal file
View File

@ -0,0 +1,60 @@
import json
from dotenv import load_dotenv
import six
from google.cloud import translate_v2 as translate
def load_squad(filename):
with open(filename, "r") as f:
squad = json.load(f)
return squad
def save_squad(filename, squad):
with open(filename, "w") as f:
json.dump(squad, f)
def print_squad(squad, article_limit=2, paragraph_limit=3, qas_limit=5):
for article in squad['data'][:article_limit]:
print("="*40)
print(f"Article title: {article['title']}\n\n")
for paragraph in article['paragraphs'][:paragraph_limit]:
print(f"{paragraph['context']}\n")
for qas in paragraph['qas'][:qas_limit]:
print(f"Question: {qas['question']}")
print(f"Answers:")
for answer in qas['answers']:
print(f"\t{answer['text']}")
print("\n")
def translate_text(target, text):
"""Translates text into the target language.
Target must be an ISO 639-1 language code.
See https://g.co/cloud/translate/v2/translate-reference#supported_languages
"""
translate_client = translate.Client()
if isinstance(text, six.binary_type):
text = text.decode("utf-8")
# Text can also be a sequence of strings, in which case this method
# will return a sequence of results for each text.
result = translate_client.translate(text, target_language=target)
print(u"Text: {}".format(result["input"]))
print(u"Translation: {}".format(result["translatedText"]))
print(u"Detected source language: {}".format(result["detectedSourceLanguage"]))
if __name__ == "__main__":
load_dotenv()
orig_squad = load_squad('./squad-v2-dev.json')
# print_squad(orig_squad)