Python爬虫利器三之Xpath语法与lxml库的用法 http://cuiqingcai.com/2621.html
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
#! /usr/bin/python3 from lxml import etree text = ''' <html> <body><div> <ul> <li class="item-0"><a href="link1.html">first item</a></li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-inactive"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-3"><a href="link4.html">fourth item</a></li> <li class="item-4"><a href="link5.html">fifth item</a> </li></ul> </div> </body> </html> ''' html = etree.HTML(text) result = etree.tostring(html,pretty_print=True).decode("utf-8") print(result) result = html.xpath('/html/body/div/ul/li/@class') print(result) result = html.xpath('//li//span/@class') print(result) result = html.xpath('/html//li[1]/@class') print(result) result = html.xpath('//li[last()-1]/@class') print(result) result = html.xpath('//li[position()<=3]/@class') print(result) |