import requests
import re
from urllib import parse
import time
import random
import xlwt

book = xlwt.Workbook(encoding='utf-8', style_compression=0)
sheet = book.add_sheet('test_sheet', cell_overwrite_ok=True)

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"}
url = 'https://search.jd.com/Search?keyword={}&page={}'

j = 0
num = 0
sj = parse.quote('手机')
page_end = int(input('抓取到多少页: '))
for page in range(1, page_end, 2):
    page_url = url.format(sj, page)
    html = requests.get(page_url, headers=headers).text
    xinxi = re.findall('<div class="p-price">.*?<em>¥</em><i>(.*?)</i>.*?<em>(.*?)<font class="skcolor_ljg">手机</font>.*?</em>.*?<i class="promo-words"', html, re.S)
    for i in xinxi:
        jiage1 = float(i[0])
        jiage2 = '%s 元' % jiage1
        xinghao = i[1].strip()
        if len(xinghao) > 100:
            continue
        # print(jiage)
        # print(xinghao)
        # print('价格: %d元' % jiage)
        # print('手机型号: ', xinghao)
        # print('*' * 80)
        sheet.write(j, 0, jiage2)
        sheet.write(j, 1, xinghao)
        j += 1
    time.sleep(random.randint(1, 3))

book.save('手机.xlsx')