H.P 琥珀
Free Software, Python, Linux etc.
python实现一个简单的web browser - [python]
canri62 发表于 2008-12-09 23:19:20
版权声明:转载时请以超链接形式标明文章原始出处和作者信息及本声明
http://blozer.blogbus.com/logs/32328603.html
超级简单的browser,能干什么呢?能获取HTML,有cookie的处理,还能处理纯HTML的forms。
用到ClientForm.py
参考了:http://wwwsearch.sourceforge.net/ (在这里学了很多,主要是关于python的web client-side programming)
以及Python Module References
Python语言: 临时自用代码@代码发芽网
from urllib2 import build_opener, HTTPCookieProcessor
import copy
from cookielib import CookieJar
from ClientForm import ParseFile
class Browser:
'''A simple browser supporting filling forms and handling cookies'''
cj = CookieJar()
opener = build_opener(HTTPCookieProcessor(cj))
def __init__(self):
self.page = u'no page'
self.url = u'blank'
self.forms = None
def go(self, url='blank'):
'''url should be a str or urllib2.Request object'''
if url == 'blank':
self.page = u'welcome to Browser'
response = self.opener.open(url)
self.url = response.geturl()
raw_page = response.read()
self._dump_file(raw_page, 'feedform')
self.page = raw_page.decode('utf-8')
print "=> ", self.url
self.forms = self.getForms()
response.close()
def show(self):
'''return current page HTML source'''
return self.page
def getForms(self):
'''return all forms of current page.'''
fp = open('feedform', 'r')
self.forms = ParseFile(fp, self.url, backwards_compat=False)
fp.close()
return self.forms
def setForm(self, position=0, **kargs):
"""Example:
browser.setForm(0, user='jason', password='secret'"""
for key in kargs.keys():
self.forms[position][key] = kargs[key]
def submitForm(self, position=0):
"""Submit the form"""
self.go(self.forms[position].click())
def _copy_response(self, response):
"""Copy a response-like object"""
cresponse = copy.copy(response)
def _dump_file(self, html, filename='temp'):
"""Write the self.page into a file object"""
fp = open(filename, 'w')
fp.write(html)
fp.close()
import copy
from cookielib import CookieJar
from ClientForm import ParseFile
class Browser:
'''A simple browser supporting filling forms and handling cookies'''
cj = CookieJar()
opener = build_opener(HTTPCookieProcessor(cj))
def __init__(self):
self.page = u'no page'
self.url = u'blank'
self.forms = None
def go(self, url='blank'):
'''url should be a str or urllib2.Request object'''
if url == 'blank':
self.page = u'welcome to Browser'
response = self.opener.open(url)
self.url = response.geturl()
raw_page = response.read()
self._dump_file(raw_page, 'feedform')
self.page = raw_page.decode('utf-8')
print "=> ", self.url
self.forms = self.getForms()
response.close()
def show(self):
'''return current page HTML source'''
return self.page
def getForms(self):
'''return all forms of current page.'''
fp = open('feedform', 'r')
self.forms = ParseFile(fp, self.url, backwards_compat=False)
fp.close()
return self.forms
def setForm(self, position=0, **kargs):
"""Example:
browser.setForm(0, user='jason', password='secret'"""
for key in kargs.keys():
self.forms[position][key] = kargs[key]
def submitForm(self, position=0):
"""Submit the form"""
self.go(self.forms[position].click())
def _copy_response(self, response):
"""Copy a response-like object"""
cresponse = copy.copy(response)
def _dump_file(self, html, filename='temp'):
"""Write the self.page into a file object"""
fp = open(filename, 'w')
fp.write(html)
fp.close()
随机文章:
校内爬虫 0点1 2008-12-15
BeautifulSoup解析实例,获取校内个人页面的好友列表 2008-12-09
urllib2的urlopen 2008-12-05
urllib2初尝 2008-12-05
校内好友漫游 2008-10-25
收藏到:Del.icio.us
博客大巴 提供程序服务器托管支持,GimpStyle theme design by Horacio Bella
版权声明:本站文章使用《署名 3.0 Unported》授权,转载时请注意标明文章原始出处和作者信息及本声明。