Tag Archive for 'python'

Python类例子

1
2
3
4
5
6
7
8
9
10
11
12
class Message:
    def __init__(self, aString):
        self.text = aString
    def printIt(self):
        print self.text

m1 = Message("Hello world")
m2 = Message("So long, it was short but sweet")

note = [m1, m2] # put the objects in a list
for msg in note:
    msg.printIt() # print each message in turn

详细指南

极速 Twisted Web 60秒(6): 定制返回代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
from twisted.web.server import Site  
from twisted.web.resource import Resource  
from twisted.internet import reactor  

class PaymentRequired(Resource):  
    def render_GET(self, request):  
        request.setResponseCode(402)  
        return "<html><body>Please swipe your credit card.</body></html>"  

root = Resource()
root.putChild("buy", PaymentRequired())
factory = Site(root)
reactor.listenTCP(8880, factory)
reactor.run()

极速 Twisted Web 60秒(5): 错误句柄

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from twisted.web.server import Site
from twisted.web.resource import Resource
from twisted.internet import reactor
from twisted.web.error import NoResource

from calendar import calendar

class YearPage(Resource):
    def __init__(self, year):
        Resource.__init__(self)
        self.year = year

    def render_GET(self, request):
        return "<html><body><pre>%s</pre></body></html>" % (calendar(self.year),)

class Calendar(Resource):
    def getChild(self, name, request):
        try:
            year = int(name)
        except ValueError:
            return NoResource()
        else:
            return YearPage(year)

root = Calendar()
factory = Site(root)
reactor.listenTCP(8880, factory)
reactor.run()

极速 Twisted Web 60秒(4): 动态URL处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from twisted.web.server import Site
from twisted.web.resource import Resource
from twisted.internet import reactor

from calendar import calendar

class YearPage(Resource):
    def __init__(self, year):
        Resource.__init__(self)
        self.year = year

    def render_GET(self, request):
        return "<html><body><pre>%s</pre></body></html>" % (calendar(self.year),)

class Calendar(Resource):
  def getChild(self, name, request):
      return YearPage(int(name))

root = Calendar()
factory = Site(root)
reactor.listenTCP(8880, factory)
reactor.run()

极速 Twisted Web 60秒(3): 静态URL处理

1
2
3
4
5
6
7
8
9
10
11
12
13
from twisted.web.server import Site
from twisted.web.resource import Resource
from twisted.internet import reactor
from twisted.web.static import File

root = Resource()
root.putChild("foo", File("/tmp"))
root.putChild("bar", File("/lost+found"))
root.putChild("baz", File("/opt"))

factory = Site(root)
reactor.listenTCP(8880, factory)
reactor.run()

极速 Twisted Web 60秒(2): 生成动态页面

1
2
3
4
5
6
7
8
9
10
11
12
13
14
from twisted.internet import reactor
from twisted.web.server import Site
from twisted.web.resource import Resource
import time

class ClockPage(Resource):
    isLeaf = True
    def render_GET(self, request):
        return "<html><body>%s</body></html>" % (time.ctime(),)

resource = ClockPage()
factory = Site(resource)
reactor.listenTCP(8880, factory)
reactor.run()

极速 Twisted Web 60秒(1): 访问静态目录文件

1
2
3
4
5
6
7
8
from twisted.web.server import Site
from twisted.web.static import File
from twisted.internet import reactor

resource = File('/tmp')
factory = Site(resource)
reactor.listenTCP(8888, factory)
reactor.run()

使用python的lxml库解析html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import urllib2
import lxml.html as H

def getjarinfo(url):
    c=urllib2.urlopen(url)
   
    f=c.read()
    doc = H.document_fromstring(f)
    tables=doc.xpath("//table[@id='download']")    
    pinpais=doc.xpath("//td[@id='music']")
    jixings=doc.xpath("//div[@id='game']")
    jars = doc.xpath("//table[@id='download']//tr[2]/td[1]/a[1]")
    for j in range(len(pinpais)):
      print jars[j].get('href')
      print pinpais[j].text_content()        
      print jixings[j].text_content()
    e=doc.xpath(u"//div[text()='%s']" % u"游戏")
    describe=e[0].getnext().text_content()
    #r = doc.xpath("//table[@id='download']//tr[2]/td[1]/a[1]")[0]
    #jarurl=r.get('href')
   
if __name__ == '__main__':
    url='http://google.com/'
    getjarinfo(url)

1ting.com音乐下载程序

类型: 脚本
语言: Python
测试环境: Linux, Windows.

使用方法:

  • 先确认已经与安装了PyCURL与BeautifulSoup这两个模块
  • 1
    python down-1ting.python http://www.1ting.com/album/17/album_43525.html
一听音乐下载脚本 download
版本: 1.3.0
大小: 2.36 KB
下载次数: 583
文件类型: zip
发布日期: 2010-03-23

pyCurl获取网页问题

终于解决了这个问题,原来是我的代码中构造HTTP header的时候多了可以接受gzip压缩,支持gzip压缩的网页就下载了也不能用BeautifulSoup分析了,原来1ting.com现在支持gzip压缩了,还换了一个nProxy,多半是把ngnix的代码改了配置重新编译了~ 真是很~~

1
2
3
4
5
6
7
8
9
10
11
12
13
# Use Pycurl
def buildHeaders(browser, referer=""):
    """
    Build HTTP Headers, So we can download wma files.
    Arguments:
    - `browser`: Which browser will use
    - `referer`: Referer url
    """

    if referer != "":
        buildHeaders = ['User-Agent: ' + browser, 'Accept: text/html, application/xml;q=0.9, audio/x-ms-wma, application/xhtml+xml, image/png, gzip, x-gzip, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1', 'Accept-Language: en-us', 'Accept-Encoding: deflate, identity, *;q=0', 'Accept-Charset: iso-8859-1, utf-8, utf-16, *;q=0.1', 'Cookie: PIN=G39J3kmH2AU0SBieDgavAg==', 'Referer:' + referer]
    else:
        buildHeaders = ['User-agent: ' + browser, 'Accept: text/html, application/xml;q=0.9, audio/x-ms-wma, application/xhtml+xml, image/png, gzip, x-gzip, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1', 'Accept-Language: en-us', 'Accept-Encoding: deflate, identity, *;q=0', 'Accept-Charset: iso-8859-1, utf-8, utf-16, *;q=0.1', 'Cookie: PIN=G39J3kmH2AU0SBieDgavAg==']
    return buildHeaders