标签:links 批量 Python self url each images response 下载
1 from email import header 2 import requests 3 from lxml import etree 4 import sys 5 import optparse 6 from urllib import parse 7 import time 8 9 class ImagesDownloader: 10 def __init__(self) -> None: 11 self.url = self.get_params() 12 self.iamges_store = 'images/' 13 self.header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0'} 14 15 def get_params(self): 16 parser = optparse.OptionParser("Usage: <Program> -u url") 17 parser.add_option('-u', '--url', dest='url', type='string', help='Specify url') 18 options, args = parser.parse_args() 19 if options.url is None: 20 print(parser.usage) 21 sys.exit(0) 22 return options.url 23 24 def get_webpage(self, url): 25 try: 26 response = requests.get(url=url, headers=self.header).text 27 return response 28 except Exception as e: 29 print(e) 30 sys.exit(0) 31 32 def download_image(self, url, filename): 33 try: 34 response = requests.get(url=url, headers=self.header).content 35 with open(filename, 'wb') as f: 36 f.write(response) 37 print("Downloaded image successfully: %s" % filename.split('/')[-1]) 38 except Exception as e: 39 print(e) 40 sys.exit(0) 41 42 43 def extract_images_links(self, response): 44 images_links = [] 45 try: 46 html = etree.HTML(response) 47 res = html.xpath('//img/@src') 48 for each in res: 49 if each.startswith('//'): 50 images_links.append('http:'+each) 51 elif each.startswith('http://') or each.startswith('https://'): 52 images_links.append(each) 53 else: 54 images_links.append(parse.urljoin(self.url, each)) 55 56 return images_links 57 except Exception as e: 58 print(e) 59 sys.exit(0) 60 61 def run(self): 62 response = self.get_webpage(self.url) 63 images_links = self.extract_images_links(response) 64 # print(images_links) 65 for link in images_links: 66 print("Download image from : %s" % link) 67 filename = link.split('/')[-1] 68 filepath = self.iamges_store + filename 69 self.download_image(link, filepath) 70 time.sleep(2) 71 72 if __name__ == "__main__": 73 imagedownload = ImagesDownloader() 74 imagedownload.run() 75 76
标签:links,批量,Python,self,url,each,images,response,下载 来源: https://www.cnblogs.com/jason-huawen/p/16307656.html
本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享; 2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关; 3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关; 4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除; 5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。