import requests
import re
from urllib import parse
import time
import random
import xlwt
book = xlwt.Workbook(encoding='utf-8', style_compression=0)
sheet = book.add_sheet('test_sheet', cell_overwrite_ok=True)
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"}
url = 'https://search.jd.com/Search?keyword={}&page={}'
j = 0
num = 0
sj = parse.quote('手机')
page_end = int(input('抓取到多少页: '))
for page in range(1, page_end, 2):
page_url = url.format(sj, page)
html = requests.get(page_url, headers=headers).text
xinxi = re.findall('<div class="p-price">.*?<em>¥</em><i>(.*?)</i>.*?<em>(.*?)<font class="skcolor_ljg">手机</font>.*?</em>.*?<i class="promo-words"', html, re.S)
for i in xinxi:
jiage1 = float(i[0])
jiage2 = '%s 元' % jiage1
xinghao = i[1].strip()
if len(xinghao) > 100:
continue
# print(jiage)
# print(xinghao)
# print('价格: %d元' % jiage)
# print('手机型号: ', xinghao)
# print('*' * 80)
sheet.write(j, 0, jiage2)
sheet.write(j, 1, xinghao)
j += 1
time.sleep(random.randint(1, 3))
book.save('手机.xlsx')