1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | import urllib2 import lxml.html as H def getjarinfo(url): c=urllib2.urlopen(url) f=c.read() doc = H.document_fromstring(f) tables=doc.xpath("//table[@id='download']") pinpais=doc.xpath("//td[@id='music']") jixings=doc.xpath("//div[@id='game']") jars = doc.xpath("//table[@id='download']//tr[2]/td[1]/a[1]") for j in range(len(pinpais)): print jars[j].get('href') print pinpais[j].text_content() print jixings[j].text_content() e=doc.xpath(u"//div[text()='%s']" % u"游戏") describe=e[0].getnext().text_content() #r = doc.xpath("//table[@id='download']//tr[2]/td[1]/a[1]")[0] #jarurl=r.get('href') if __name__ == '__main__': url='http://google.com/' getjarinfo(url) |
Recent Comments