代码
import requests
import re
http_url = 'http://www.netbian.com/s/lol/index.htm'
tp = []
num = 2
while num <= 5: #表示爬取1-5页的内容
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0'
}
r = requests.get(http_url, headers=headers)
html = r.text
http_url = 'http://www.netbian.com/s/lol/index.htm'
urls = re.findall('src="(.*?)"', html)
for url in urls:
if url.split('.')[-1] not in ['jpg', 'png']:
continue
else:
tp.append(url)
for i in tp:
file_name = i.split('/')[-1]
print(file_name)
r = requests.get(i, headers=headers)
with open(file_name, 'wb') as f:
f.write(r.content)
http_url = http_url.replace('index', 'index_%d' % num)
num += 1
视频演示