Python爬虫视频教程零基础小白到scrapy爬虫高手-轻松入门
中文翻译为英语容易出错,还是提供API接口方便些,不容易出错。爬虫抓取容易出错
# -*- coding: utf-8 -*-"""Created on Tue Apr 19 09:05:56 2016有道翻译爬虫(英译中)@author: Administrator"""import requests,bs4,time#有中文会出错,原因不明#words_list=["python","job","hello world"," amoxicillin","阿莫西林","clarithromycin","克拉霉素"]words_list=["metformin hydrochloride","amoxicillin","clarithromycin","Viagra","sildenafil"]translation_list=[]word="python"word2='n. 巨蟒;大蟒n. (法)皮东(人名)'#单词去除空格,名词符号,等等def word_format(word): word1=word.strip() #去掉空格 word2=word1.replace('\n','') word3=word2.replace('n.','') word4=word3.replace(" ",'') return word4'''word2='n. 巨蟒;大蟒n. (法)皮东(人名)'word_format(word2)Out[90]: '巨蟒;大蟒 (法)皮东(人名)''''#翻译一个单词全面版本 def Get_full_translation(word): url="http://dict.youdao.com/w/%s/#keyfrom=dict.index"%(word) res=requests.get(url) soup=bs4.BeautifulSoup(res.text,"lxml") elems=soup.select('.trans-container') translation=elems[0].text translation1=translation.replace('\n','') return translation1 #批量翻译所有单词全面版本 def Get_all_full_translation(words_list): for word in words_list: try: translation=Get_full_translation(word) translation_list.append(translation) except: print("exception:",word) continue#翻译简单版本def Get_simple_translation(word): url="http://dict.youdao.com/w/%s/#keyfrom=dict.index"%(word) res=requests.get(url) soup=bs4.BeautifulSoup(res.text,"lxml") elems=soup.select('.trans-container') translation=elems[0].text translation1=word_format(translation) #分割解释的意思 wordTranslation_list=translation1.split(";") #索取第一个翻译意思 translation2=wordTranslation_list[0] return translation2
'''Get_simple_translation(word)Out[108]: '巨蟒''''
#批量翻译所有单词精简版本 def Get_all_simple_translation(words_list): for word in words_list: try: translation=Get_simple_translation(word) translation_list.append(translation) except: print("exception:",word) continue time.sleep(3) print("congradulation!") '''
Get_all_simple_translation(words_list)
congradulation!translation_list
Out[126]: ['盐酸二甲双胍', '阿莫西林', '克拉霉素', '万艾可', '西地那非']'''