import urllib.request import urllib.parse # 导入包 from lxml import etree header = {'User-Agent':' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50'} response = urllib.request.urlopen("http://www.lctvu.sd.cn",data=None) # data=None为不传入值,即为不带值请求(只看不操作) html = response.read().decode("UTF-8") # 读取响应,编码格式为“UTF-8”,将此值存入“html”。 # print(html) # 输出“html”。 selector = etree.HTML(html) # print(html) content = [selector.xpath('//*[@id="conter"]/div[3]/div[2]/div[2]/div[2]/*/li//text()')] # for i in content: # print(i) print(content) # result =html.xpath('xx_dt')
项目下载