Reputation: 12502
I'm trying to use google translate from a python script:
#!/usr/bin/env python
from urllib2 import urlopen
from urllib import urlencode
base_url = "http://www.google.com/translate?"
params = (('langpair','en|es'), ('u','http://asdf.com'),)
url = base_url+urlencode(params)
print "Encoded URL: %s" % url
print urlopen(url).read()
I'm getting the error 403 when I use it.
# ./1.py
Encoded URL: http://www.google.com/translate?langpair=en%7Ces&u=http%3A%2F%2Fasdf.com
Traceback (most recent call last):
...
urllib2.HTTPError: HTTP Error 403: Forbidden
However, the same URL works fine when accessed from browser. Could anyone spot the error? Or is it that google does not allow this type of usage?
Upvotes: 4
Views: 10028
Reputation: 1
#your use socks5 proxy
#python 3.11
# set socks5 proxy
import socket
import sockssocks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 10808) socket.socket = socks.socksocket #使用socks建立连接
Upvotes: 0
Reputation: 1053
you can use a much better python code for translating with google:
from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
import requests
import sys
import os
class UnsortedAttributes(HTMLFormatter):
def attributes(self, tag):
for k, v in tag.attrs.items():
yield k, v
files_from_folder = r"c:\Folder2"
use_translate_folder = True
destination_language = 'vi' #aici schimbi limba in care vrei sa traduci
extension_file = ".html"
directory = os.fsencode(files_from_folder)
def translate(text, target_language):
url = "https://translate.google.com/translate_a/single"
headers = {
"Host": "translate.google.com",
"Accept": "*/*",
"Cookie": "",
"User-Agent": "GoogleTranslate/5.9.59004 (iPhone; iOS 10.2; ja; iPhone9,1)",
"Accept-Language": "fr",
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
}
sentence = text
params = {
"client": "it",
"dt": ["t", "rmt", "bd", "rms", "qca", "ss", "md", "ld", "ex"],
"otf": "2",
"dj": "1",
"q": sentence,
"hl": "ja",
"ie": "UTF-8",
"oe": "UTF-8",
"sl": "en",
"tl": target_language,
}
res = requests.get(
url=url,
headers=headers,
params=params,
)
res = res.json()
paragraph = ''
for i in range(0, len(res["sentences"])):
paragraph += res["sentences"][i]["trans"]
return paragraph
def recursively_translate(node):
for x in range(len(node.contents)):
if isinstance(node.contents[x], str):
if node.contents[x].strip() != '':
try:
node.contents[x].replaceWith(translate(text=node.contents[x], target_language=destination_language))
except:
pass
elif node.contents[x] != None:
recursively_translate(node.contents[x])
for file in os.listdir(directory):
filename = os.fsdecode(file)
print(filename)
if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html': #ignore this 2 files
continue
if filename.endswith(extension_file):
with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
for title in soup.findAll('title'):
recursively_translate(title)
for meta in soup.findAll('meta', {'name':'description'}):
try:
meta['content'] = translate(text=meta['content'], target_language=destination_language)
except:
pass
for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(h1)) < end_comment:
recursively_translate(h1)
for p in soup.findAll('p', class_='text_obisnuit'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(p)) < end_comment:
recursively_translate(p)
for p in soup.findAll('p', class_='text_obisnuit2'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(p)) < end_comment:
recursively_translate(p)
for span in soup.findAll('span', class_='text_obisnuit2'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(span)) < end_comment:
recursively_translate(span)
for li in soup.findAll('li', class_='text_obisnuit'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(li)) < end_comment:
recursively_translate(li)
for a in soup.findAll('a', class_='linkMare'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(a)) < end_comment:
recursively_translate(a)
for h4 in soup.findAll('h4', class_='text_obisnuit2'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(h4)) < end_comment:
recursively_translate(h4)
for h5 in soup.findAll('h5', class_='text_obisnuit2'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(h5)) < end_comment:
recursively_translate(h5)
for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_webinar'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(h1)) < end_comment:
recursively_translate(h1)
print(f'{filename} translated')
soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
if use_translate_folder:
try:
with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
new_html.write(soup[5:-6])
except:
os.mkdir(files_from_folder+r'\translated')
with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
new_html.write(soup[5:-6])
else:
with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
html.write(soup[5:-6])
Upvotes: 0
Reputation: 1092
your problem is because you have no headers
(which tells google what is your browser and compatibility)
I had this error before when I made my google translate api
you can find it here: https://github.com/mouuff/Google-Translate-API
Upvotes: 0
Reputation: 3059
You should be using the google API. I found and tested this code, it works:
#!/usr/bin/env python
from urllib2 import urlopen
from urllib import urlencode
import sys
lang1=sys.argv[1]
lang2=sys.argv[2]
langpair='%s|%s'%(lang1,lang2)
text=' '.join(sys.argv[3:])
base_url='http://ajax.googleapis.com/ajax/services/language/translate?'
params=urlencode( (('v',1.0),
('q',text),
('langpair',langpair),) )
url=base_url+params
content=urlopen(url).read()
start_idx=content.find('"translatedText":"')+18
translation=content[start_idx:]
end_idx=translation.find('"}, "')
translation=translation[:end_idx]
print translation
Upvotes: 4
Reputation: 298146
If Google doesn't let you do this, you could programatically translate the normal website's source via the Google's APIs.
I wrote a function for this a little while back:
def translate(text, src = '', to = 'en'):
parameters = ({'langpair': '{0}|{1}'.format(src, to), 'v': '1.0' })
translated = ''
for text in (text[index:index + 4500] for index in range(0, len(text), 4500)):
parameters['q'] = text
response = json.loads(urllib.request.urlopen('http://ajax.googleapis.com/ajax/services/language/translate', data = urllib.parse.urlencode(parameters).encode('utf-8')).read().decode('utf-8'))
try:
translated += response['responseData']['translatedText']
except:
pass
return translated
Upvotes: 4
Reputation:
You want to use the offical Google Translate APIs:
http://code.google.com/intl/de-DE/apis/language/translate/overview.html
Apart from that:
http://www.catonmat.net/blog/python-library-for-google-search/
Upvotes: 1