Selenium+PhantomJS系列教程:
Selenium+PhantomJS(系列六:元素区域截图,OCR),针对元素区域截图,然后可以利用ocr做验证码识别,或做一些图形图像分析,等等。
import base64 import sys import pyocr from StringIO import StringIO from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from PIL import Image driver = webdriver.Remote( command_executor='http://10.10.10.140:8910', desired_capabilities=DesiredCapabilities.PHANTOMJS) driver.get('目标网址') # 获取元素位置 element = driver.find_element_by_class_name('conttxt') location = element.location size = element.size # 计算出元素位置图像坐标 img = Image.open(StringIO(base64.decodestring(driver.get_screenshot_as_base64()))) driver.quit() left = location['x'] top = location['y'] right = location['x'] + size['width'] bottom = location['y'] + size['height'] img = img.crop((int(left), int(top), int(right), int(bottom))) # img.save('screenshot.png') 是否保存图像 # 利用pyocr库 推荐引擎tesseract进行图像识别 tools = pyocr.get_available_tools()[:] print tools[0].image_to_string(img,lang='chi_sim')