python 获取图片验证码中文字

根据现成的开源项目 http://code.google.com/p/pytesser/改写

在window上用easy_install安装不上  看了下源码发现代码很少  于是就想自己改写一下

 

添加支持网络图片的直接解析

 

#coding:utf-8 
#import sys 
#reload(sys) 
#sys.setdefaultencoding('utf-8')
import subprocess,urllib2,tempfile
import os,time

tesseract_exe_name = 'tesseract' # Name of executable to be called at command line
temp_dir_name = tempfile.gettempdir() # Name of executable to be called at command line

def call_tesseract(input_filename, output_filename):
    args = [tesseract_exe_name, input_filename, output_filename]
    proc = subprocess.Popen(args)
    retcode = proc.wait()
        
def image_file_to_string(filename):
    image_file_path = filename
    text_file_path = '%s%s%sout' %(temp_dir_name,os.path.sep,time.time())
    try:
        call_tesseract(image_file_path,text_file_path);
        text = file(text_file_path+'.txt').read()
        if text.find("Error") != -1:
            text = None
        return text
    finally:
        if(os.path.exists(text_file_path+'.txt')):
            os.remove(text_file_path+'.txt')

def image_url_to_string(url):
    image_file_path = '%s%s%sin' %(temp_dir_name,os.path.sep,time.time())
    try:
        urlinfo = urllib2.urlopen(url, None,3);
        if not urlinfo.headers['Content-Type'].startswith('image'):
            return None
        f = open(image_file_path,'wb')
        f.write(urlinfo.read())
        f.close()
        return image_file_to_string(image_file_path);
    finally:
        if(os.path.exists(image_file_path)):
            os.remove(image_file_path);
            
print image_file_to_string('D:\\pytesser_v0.0.1\\fonts_test.png')
 

你可能感兴趣的