#!/usr/bin/env python # -*- encoding: utf-8 -*- # Created on 2015-01-04 03:35:01 # Project: tutorial_pinterest from pyspider.libs.base_handler import * class Handler(BaseHandler): """ This is a sample scrape script for: Level 3: Render with PhantomJS http://docs.pyspider.org/en/latest/tutorial/Render-with-PhantomJS/#running-javascript-on-page """ def on_start(self): self.crawl('http://www.pinterest.com/categories/popular/', fetch_type='js', js_script=""" function() { window.scrollTo(0,document.body.scrollHeight); } """, callback=self.index_page) def index_page(self, response): return { "url": response.url, "p_w_picpaths": [{ "title": x('.richPinGridTitle').text(), "img": x('.pinImg').attr('src'), "author": x('.creditName').text(), } for x in response.doc('.item').items() if x('.pinImg')] }
pyspider采集例子(js)
转载上一篇:pyspider采集例子
下一篇:pyspider采集例子
![](https://ucenter.51cto.com/images/noavatar_middle.gif)
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
python协程(asyncio)实现爬虫例子
使用python协程实现异步爬取网站。
python 协程 爬虫