Reputation: 515
I have a csv file of several thousands of rows in multiple languages and I am thinking of using google cloud translate API to translate foreign language text into English. I have used a simple code to find out if everything works properly and the code is running smoothly.
from google.cloud import translate_v2 as translate
from time import sleep
from tqdm.notebook import tqdm
import multiprocessing as mp
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "file path.py"
translate_client = translate.Client()
text = "Good Morning, My Name is X."
target ="ja"
output = translate_client.translate(text, target_language=target)
print(output)
I want to now import csv file (using pandas) and translate the text and save the output as a csv file. But don't know how should I do that. Most of the examples I found stop at translating sample text just like above.
Can anyone suggest how can I do this?
Upvotes: 1
Views: 4267
Reputation: 1820
To translate the text in csv file and save the output in same CSV file using Google Cloud Translation API, you can use below code:
import csv
from pathlib import Path
def translate_text(target, text):
"""Translates text into the target language.
Target must be an ISO 639-1 language code.
See https://g.co/cloud/translate/v2/translate-reference#supported_languages
"""
import six
from google.cloud import translate_v2 as translate
translate_client = translate.Client()
if isinstance(text, six.binary_type):
text = text.decode("utf-8")
# Text can also be a sequence of strings, in which case this method
# will return a sequence of results for each text.
result = translate_client.translate(text, target_language=target)
# print(u"Text: {}".format(result["input"]))
# print(u"Translation: {}".format(result["translatedText"]))
# print(u"Detected source language: {}".format(result["detectedSourceLanguage"]))
return result["translatedText"]
def main(input_file, translate_to):
"""
Translate a text file and save as a CSV file
using Google Cloud Translation API
"""
input_file_path = Path(input_file)
target_lang = translate_to
output_file_path = input_file_path.with_suffix('.csv')
with open(input_file_path) as f:
list_lines = f.readlines()
total_lines = len(list_lines)
with open(output_file_path, 'w') as csvfile:
my_writer = csv.writer(csvfile, delimiter=',', quotechar='"')
my_writer.writerow(['id', 'original_text', 'translated_text'])
for i, each_line in enumerate(list_lines):
line_id = f'{i + 1:04}'
original_text = each_line.strip('\n') # Strip for the writer(*).
translated_text = translate_text(
target=target_lang,
text=each_line)
my_writer.writerow([line_id, original_text, translated_text]) # (*)
# Progress monitor, non-essential.
print(f"""
{line_id}/{total_lines:04}
{original_text}
{translated_text}""")
if __name__ == '__main__':
origin_file = input('Input text file? >> ')
output_lang = input('Output language? >> ')
main(input_file=origin_file,
translate_to=output_lang)
Example:
Translated text in input file to target language “es”, the output got stored in the same csv file.
Input:
new.csv
How are you doing,Is everything fine there
Do it today
Output:
new.csv
id,original_text,translated_text
0001,"How are you doing,Is everything fine there",¿Cómo estás? ¿Está todo bien allí?
0002,Do it today,Hazlo hoy
Upvotes: 1