麻豆小视频在线观看_中文黄色一级片_久久久成人精品_成片免费观看视频大全_午夜精品久久久久久久99热浪潮_成人一区二区三区四区

首頁 > 編程 > Python > 正文

python實現爬取千萬淘寶商品的方_法

2019-11-02 14:15:26
字體:
來源:轉載
供稿:網友

   本文實例講述了python實現爬取千萬淘寶商品的方法。分享給大家供大家參考。具體實現方法如下:

  ?

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 import time import leveldb from urllib.parse import quote_plus import re import json import itertools import sys import requests from queue import Queue from threading import Thread URL_BASE = 'http://s.m.taobao.com/search?q={}&n=200&m=api4h5&style=list&page={}' def url_get(url): # print('GET ' + url) header = dict() header['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' header['Accept-Encoding'] = 'gzip,deflate,sdch' header['Accept-Language'] = 'en-US,en;q=0.8'
五個字網名[www.la240.com/html2017/1/10/]
header['Connection'] = 'keep-alive' header['DNT'] = '1' #header['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36' header['User-Agent'] = 'Mozilla/12.0 (compatible; MSIE 8.0; Windows NT)' return requests.get(url, timeout = 5, headers = header).text def item_thread(cate_queue, db_cate, db_item): while True: try: cate = cate_queue.get() post_exist = True try: state = db_cate.Get(cate.encode('utf-8')) if state != b'OK': post_exist = False except: post_exist = False if post_exist == True: print('cate-{}: {} already exists ... Ignore'.format(cate, title)) continue db_cate.Put(cate.encode('utf-8'), b'crawling') for item_page in itertools.count(1): url = URL_BASE.format(quote_plus(cate), item_page) for tr in range(5): try: items_obj = json.loads(url_get(url)) break except KeyboardInterrupt: quit() except Exception as e: if tr == 4: raise e if len(items_obj['listItem']) == 0: break for item in items_obj['listItem']: item_obj = dict( _id = int(item['itemNumId']), name = item['name'], price = float(item['price']), query = cate, category = int(item['category']) if item['category'] != '' else 0, nick = item['nick'], area = item['area']) db_item.Put(str(item_obj['_id']).encode('utf-8'), json.dumps(item_obj, ensure_ascii = False).encode('utf-8')) print('Get {} items from {}: {}'.format(len(items_obj['listItem']), cate, item_page)) if 'nav' in items_obj: for na in items_obj['nav']['navCatList']: try: db_cate.Get(na['name'].encode('utf-8')) except: db_cate.Put(na['name'].encode('utf-8'), b'waiting') db_cate.Put(cate.encode('utf-8'), b'OK') print(cate, 'OK') except KeyboardInterrupt: break except Exception as e: print('An {} exception occured'.format(e)) def cate_thread(cate_queue, db_cate): while True: try: for key, value in db_cate.RangeIter(): if value != b'OK': print('CateThread: put {} into queue'.format(key.decode('utf-8'))) cate_queue.put(key.decode('utf-8')) time.sleep(10) except KeyboardInterrupt: break except Exception as e: print('CateThread: {}'.format(e)) if __name__ == '__main__': db_cate = leveldb.LevelDB('./taobao-cate') db_item = leveldb.LevelDB('./taobao-item') orig_cate = '正裝' try: db_cate.Get(orig_cate.encode('utf-8')) except: db_cate.Put(orig_cate.encode('utf-8'), b'waiting') cate_queue = Queue(maxsize = 1000) cate_th = Thread(target = cate_thread, args = (cate_queue, db_cate)) cate_th.start() item_th = [Thread(target = item_thread, args = (cate_queue, db_cate, db_item)) for _ in range(5)] for item_t in item_th: item_t.start() cate_th.join()
發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 看毛片免费 | 视频在线91 | 久久久www视频 | 爱草在线| 精品久久久久久久久久久久久久久久久久久 | 欧美性猛交xxxxx按摩国内 | 97久色 | 色妞欧美 | 中文字幕在线观看亚洲 | 欧美成人免费一级 | 日本黄色免费播放 | 国产色视频免费 | 精品国产一区二区三区四区在线 | av在线免费看片 | 午夜视频成人 | 热re91久久精品国产99热 | 久草视频福利在线观看 | 国产日韩线路一线路二 | 一本一道久久久a久久久精品91 | 亚洲欧美成aⅴ人在线观看 av免费在线播放 | 一本精品999爽爽久久久 | 91成人一区二区三区 | 作爱在线观看 | 黄色电影免费提供 | 精品xxxx户外露出视频 | 久久综合久久美利坚合众国 | 国产精品视频1区 | 黄色大片网 | 免费a级毛片大学生免费观看 | 日本欧美一区 | 亚洲网在线| 亚洲成年人免费网站 | 成人店女老板视频在线看 | 性少妇videosexfreexx入片 | 91精品国产综合久久久动漫日韩 | 国产成人精品二区 | mmmwww| 欧美日韩手机在线观看 | 久久成人视屏 | 国产91九色在线播放 | 亚洲人成免费 |