看到群友一个美图站,于是整了个爬虫爬了下来
运行环境: python3
#!/usr/bin/env python3 import requests from lxml import etree import os def get_requests(url): headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" } html = requests.get(url=url,headers=headers).content.decode() result = etree.HTML(html) img_list = result.xpath("//div[@class='post row']/div/img/@data-original") name_list = result.xpath("//div[@class='post row']/div/img/@title") dir = result.xpath("//div[@class='post-info']/div[1]/span[3]/text()") dir = ''.join(dir) for img,name in zip(img_list,name_list): # img = 'https:' + img download_img_url =requests.get(url=img, headers=headers).content print("下载的图像: %s 路径: %s" % (name, img)) path = os.path.join(os.getcwd(), '阿七美图馆/{}/'.format(dir)) if not os.path.exists(path): os.makedirs(path) folder_path = path + name + '.jpg' with open(folder_path, 'wb') as file: file.write(download_img_url) if __name__ == '__main__': for item in range(3,1000): try: url = "http://a7a7.net/index.php/archives/{}/".format(str(item)) get_requests(url) except: continue
本文作者为远梦,转载请注明。