import requests
import time
import re
import csv
import random
from fake_useragent import UserAgent

f = open('瓜子二手车.csv', 'w', encoding='utf-8', newline='')
write = csv.writer(f)
write.writerow(['车型', '上牌时间', '表显里程', '价格/万'])

class carSpider:
    def __init__(self):
        self.url = 'http://www.guazi.com/bj/buy/o{}r8'

    def get_url(self, choose):
        urls = []
        for page in range(1, choose + 1):
            headers = {"User-Agent": UserAgent().random}
            time.sleep(random.randint(1, 2))
            page_url = self.url.format(page)
            urls.append(page_url)
        return urls

    def save_one_car_info(self, urls):
        for url in urls:
            headers = {"User-Agent": UserAgent().random}
            time.sleep(random.randint(1, 2))
            html = requests.get(url, headers=headers).content.decode('utf-8')
            car_infos = re.findall('<li data-scroll-track=.*?<a title="(.*?)".*?<div class="t-i">(.*?)<span class="icon-pad">.*?</span>(.*?)<span.*?>.*?<p>(.*?)<span>万</span>.*?</p>', html, re.S)
            for car_info in car_infos:
                print(car_info)
                car_name = car_info[0]
                car_year = car_info[1]
                car_km = car_info[2]
                car_money = car_info[3]
                write.writerow([car_name, car_year, car_km, car_money])


    def run_main(self):
        choose = int(input('爬取到第几页(从第一页开始): '))
        urls = self.get_url(choose)
        self.save_one_car_info(urls)

if __name__ == '__main__':
    CarSpider = carSpider()
    CarSpider.run_main()
    f.close()