环境使用:
python 3.8 解释器
pycharm 编辑器
模块使用:
第三方模块 需要安装
requests —> 发送 HTTP请求
内置模块 不需要安装
csv —> 数据处理中经常会用到的一种文件格式
第三方模块安装:
win + R 输入cmd 输入安装命令 pip install 模块名 (如果你觉得安装速度比较慢, 你可以切换国内镜像源)
基本流程思路:
一. 数据来源分析
明确需求
明确采集网站以及数据
数据: 商品信息
抓包分析 --> 通过浏览器自带工具: 开发者工具
打开开发者工具: F12 / 右键点击检查选择network
刷新网页: 让网页数据重新加载一遍
搜索关键字: 搜索数据在哪里
找到数据包: 50条商品数据信息
整页数据内容: 120条 --> 分成三个数据包
前50条数据 --> 前50个商品ID
中50条数据 --> 中50个商品ID
后20条数据 --> 后20个商品ID
已知: 数据分为三组 --> 对比三组数据包请求参数变化规律
请求参数变化规律: 商品ID
分析找一下 是否存在一个数据包, 包含所有商品ID
如果想要获取商品信息 --> 先获取所有商品ID --> ID存在数据包
二. 代码实现步骤: 发送请求 -> 获取数据 -> 解析数据 -> 保存数据
第一次请求 --> 获取商品ID
发送请求, 模拟浏览器对于url地址发送请求
请求链接: 商品ID数据
获取数据, 获取服务器返回响应数据
开发者工具: response
解析数据, 提取我们想要的数据内容
商品ID
第二次请求 --> 获取商品信息
发送请求, 模拟浏览器对于url地址发送请求
请求链接: 商品信息数据包
获取数据, 获取服务器返回响应数据
开发者工具: response
解析数据, 提取我们想要的数据内容
商品信息
保存数据, 把信息保存本地文件 csv表格
多页数据采集
数据展示: 点击此处跳转获取测试链接
{
"item": {
"num_iid": "652874751412",
"title": "奶油风布艺沙发现代简约轻奢小户型客厅直排可拆洗沙发原木可定制",
"desc_short": "",
"price": 480,
"total_price": "",
"suggestive_price": "",
"orginal_price": 480,
"nick": "惜情yqq1127",
"num": 1600,
"detail_url": "https://item.taobao.com/item.htm?id=652874751412",
"pic_url": "//gd1.alicdn.com/imgextra/i4/2568161054/O1CN01aYBriY1Jem9UDtt9e_!!2568161054.jpg",
"brand": "#0 工厂",
"brandId": "",
"rootCatId": "",
"cid": 50020632,
"desc": "<div > \n <div >\n <img src=\"http://img.alicdn.com/imgextra/i3/2568161054/O1CN01LFmSOU1Jem9QOjMPb_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i3/2568161054/O1CN014vyOOT1Jem9DpHz3Y_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01B3PpsA1Jem9N8V7uf_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i2/2568161054/O1CN015JbyeY1Jem9MZshUt_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01HXSoxx1Jem9RvgzHN_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i3/2568161054/O1CN01IEultA1Jem9MdEx8R_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i3/2568161054/O1CN0176K98O1Jem9QOjE69_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i4/2568161054/O1CN013Pxp1O1Jem9RvgeTv_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01SfyZ8M1Jem9QOi1Gx_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i4/2568161054/O1CN01bb1POa1Jem9Sdgve2_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i3/2568161054/O1CN018Eo9dV1Jem9KV0y79_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01vuEofr1Jem9Nzy9xY_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i4/2568161054/O1CN01qw9sAi1Jem8wkNKpy_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01HeFhFw1Jem8rLnjBY_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i2/2568161054/O1CN01SNgjoi1Jem9QOil15_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i2/2568161054/O1CN01RXf3RA1Jem9DpHVwj_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i4/2568161054/O1CN01gZmZjt1Jem9ISThgm_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i2/2568161054/O1CN01YL0FHM1Jem9PQTjX9_!!2568161054.jpg\" />\n <img src=\"http://img.alicdn.com/imgextra/i4/2568161054/O1CN01UhsEhZ1Jem8yvJIhZ_!!2568161054.jpg\" />\n </div> \n </div><img src=\"https://www.o0b.cn/i.php?t.png&rid=gw-1.65dec45204283&p=1778787400&k=81159&t=1709098066\" style=\"display:none\" />",
"item_imgs": [
{
"url": "//gd1.alicdn.com/imgextra/i4/2568161054/O1CN01aYBriY1Jem9UDtt9e_!!2568161054.jpg"
},
{
"url": "//gd3.alicdn.com/imgextra/i3/2568161054/O1CN01kjOfNb1Jem9DmWn8Y_!!2568161054.jpg"
},
{
"url": "//gd1.alicdn.com/imgextra/i1/2568161054/O1CN01HoB9ha1Jem9DmWn8r_!!2568161054.jpg"
},
{
"url": "//gd4.alicdn.com/imgextra/i4/2568161054/O1CN011PjP2P1Jem9MXEUFT_!!2568161054.jpg"
},
{
"url": "//gd3.alicdn.com/imgextra/i3/2568161054/O1CN01KUfBFL1Jem9KTTMn1_!!2568161054.jpg"
}
],
"item_weight": "",
"post_fee": "",
"freight": "",
"express_fee": "",
"ems_fee": "",
"shipping_to": "",
"video": {
"url": "http://cloud.video.taobao.com/play/u/p/1/e/6/t/1/428224913062.mp4"
},
"sample_id": "",
"props_name": "31480:14306495906:几人坐:脚踏90*60*48cm;31480:14306495907:几人坐:双人165*95*67cm;31480:14306495908:几人坐:三人210*95*67cm;31480:14306495909:几人坐:单人100*95*67cm;31480:21480914361:几人坐:四人位240*95*67cm;31480:21480914362:几人坐:大四人320*95*76cm;31480:1387571900:几人坐:3米贵妃沙发;31480:32527954:几人坐:定制尺寸;1627207:28321:颜色分类:乳白色 尺寸颜色可定制;1627207:28321:颜色分类:乳白色 尺寸颜色可定制;1627207:28321:颜色分类:乳白色 尺寸颜色可定制;1627207:28321:颜色分类:乳白色 尺寸颜色可定制;1627207:28321:颜色分类:乳白色 尺寸颜色可定制;1627207:28321:颜色分类:乳白色 尺寸颜色可定制;1627207:28321:颜色分类:乳白色 尺寸颜色可定制;1627207:28321:颜色分类:乳白色 尺寸颜色可定制",
"prop_imgs": {
"prop_img": [
{
"properties": "1627207:28321",
"url": "//gd4.alicdn.com/imgextra/i1/2568161054/O1CN017GTZ4h1Jem9Qra1ap_!!2568161054.jpg"
}
]
},
"props_imgs": {
"prop_img": [
{
"properties": "1627207:28321",
"url": "//gd4.alicdn.com/imgextra/i1/2568161054/O1CN017GTZ4h1Jem9Qra1ap_!!2568161054.jpg"
}
]
},
"property_alias": "",
"props": [
{
"name": "品牌",
"value": "#0 工厂"
},
{
"name": "型号",
"value": "520"
},
{
"name": "材质",
"value": "木"
},
{
"name": "木质材质",
"value": "松木"
},
{
"name": "面料",
"value": "绒布"
},
{
"name": "风格",
"value": "北欧"
},
{
"name": "几人坐",
"value": "脚踏90*60*48cm 双人165*95*67cm 三人210*95*67cm 单人100*95*67cm 四人位240*95*67cm 大四人320*95*76cm 3米贵妃沙发 定制尺寸"
},
{
"name": "颜色分类",
"value": "乳白色"
},
{
"name": "填充物",
"value": "海绵"
},
{
"name": "结构工艺",
"value": "木质工艺"
},
{
"name": "是否可定制",
"value": "是"
},
{
"name": "沙发组合形式",
"value": "U形"
},
{
"name": "是否可拆洗",
"value": "是"
},
{
"name": "适用对象",
"value": "成年人"
},
{
"name": "是否带储物空间",
"value": "否"
},
{
"name": "产地",
"value": "上海"
},
{
"name": "地市",
"value": "上海市"
},
{
"name": "区县",
"value": "奉贤区"
},
{
"name": "是否组装",
"value": "否"
},
{
"name": "出租车是否可运输",
"value": "否"
},
{
"name": "填充物硬度",
"value": "软"
},
{
"name": "款式定位",
"value": "经济型"
}
],
"total_sold": "-1",
"skus": {
"sku": [
{
"price": 480,
"total_price": 0,
"orginal_price": 480,
"properties": "31480:14306495906;1627207:28321",
"properties_name": "31480:14306495906:几人坐:脚踏90*60*48cm;1627207:28321:颜色分类:乳白色 尺寸颜色可定制",
"quantity": 200,
"sku_id": "4881047531343"
},
{
"price": 1688,
"total_price": 0,
"orginal_price": 1688,
"properties": "31480:14306495907;1627207:28321",
"properties_name": "31480:14306495907:几人坐:双人165*95*67cm;1627207:28321:颜色分类:乳白色 尺寸颜色可定制",
"quantity": 200,
"sku_id": "4881047531344"
},
{
"price": 2088,
"total_price": 0,
"orginal_price": 2088,
"properties": "31480:14306495908;1627207:28321",
"properties_name": "31480:14306495908:几人坐:三人210*95*67cm;1627207:28321:颜色分类:乳白色 尺寸颜色可定制",
"quantity": 200,
"sku_id": "4881047531345"
},
{
"price": 968,
"total_price": 0,
"orginal_price": 968,
"properties": "31480:14306495909;1627207:28321",
"properties_name": "31480:14306495909:几人坐:单人100*95*67cm;1627207:28321:颜色分类:乳白色 尺寸颜色可定制",
"quantity": 200,
"sku_id": "4881047531346"
},
{
"price": 2388,
"total_price": 0,
"orginal_price": 2388,
"properties": "31480:21480914361;1627207:28321",
"properties_name": "31480:21480914361:几人坐:四人位240*95*67cm;1627207:28321:颜色分类:乳白色 尺寸颜色可定制",
"quantity": 200,
"sku_id": "5039985183001"
},
{
"price": 3188,
"total_price": 0,
"orginal_price": 3188,
"properties": "31480:21480914362;1627207:28321",
"properties_name": "31480:21480914362:几人坐:大四人320*95*76cm;1627207:28321:颜色分类:乳白色 尺寸颜色可定制",
"quantity": 200,
"sku_id": "5039985183002"
},
{
"price": 3400,
"total_price": 0,
"orginal_price": 3400,
"properties": "31480:1387571900;1627207:28321",
"properties_name": "31480:1387571900:几人坐:3米贵妃沙发;1627207:28321:颜色分类:乳白色 尺寸颜色可定制",
"quantity": 200,
"sku_id": "5039984824000"
},
{
"price": 3000,
"total_price": 0,
"orginal_price": 3000,
"properties": "31480:32527954;1627207:28321",
"properties_name": "31480:32527954:几人坐:定制尺寸;1627207:28321:颜色分类:乳白色 尺寸颜色可定制",
"quantity": 200,
"sku_id": "5039985183003"
}
]
},
"seller_id": "2568161054",
"sales": 0,
"shop_id": "567158267",
"props_list": {
"31480:14306495906": "几人坐:脚踏90*60*48cm",
"31480:14306495907": "几人坐:双人165*95*67cm",
"31480:14306495908": "几人坐:三人210*95*67cm",
"31480:14306495909": "几人坐:单人100*95*67cm",
"31480:21480914361": "几人坐:四人位240*95*67cm",
"31480:21480914362": "几人坐:大四人320*95*76cm",
"31480:1387571900": "几人坐:3米贵妃沙发",
"31480:32527954": "几人坐:定制尺寸",
"1627207:28321": "颜色分类:乳白色 尺寸颜色可定制"
},
"seller_info": {
"nick": "惜情yqq1127",
"item_score": 5,
"score_p": 5,
"delivery_score": 5,
"shop_type": "",
"user_num_id": "2568161054",
"sid": null,
"title": "",
"zhuy": "https://shop567158267.taobao.com",
"cert": null,
"open_time": "",
"credit_score": "tb-rank-blue:4",
"shop_name": "现代布艺沙发"
},
"tmall": false,
"error": "",
"location": null,
"data_from": "ha",
"has_discount": "false",
"is_promotion": "false",
"promo_type": null,
"props_img": {
"1627207:28321": "//gd4.alicdn.com/imgextra/i1/2568161054/O1CN017GTZ4h1Jem9Qra1ap_!!2568161054.jpg"
},
"format_check": "ok",
"desc_img": [
"http://img.alicdn.com/imgextra/i3/2568161054/O1CN01LFmSOU1Jem9QOjMPb_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i3/2568161054/O1CN014vyOOT1Jem9DpHz3Y_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01B3PpsA1Jem9N8V7uf_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i2/2568161054/O1CN015JbyeY1Jem9MZshUt_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01HXSoxx1Jem9RvgzHN_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i3/2568161054/O1CN01IEultA1Jem9MdEx8R_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i3/2568161054/O1CN0176K98O1Jem9QOjE69_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i4/2568161054/O1CN013Pxp1O1Jem9RvgeTv_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01SfyZ8M1Jem9QOi1Gx_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i4/2568161054/O1CN01bb1POa1Jem9Sdgve2_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i3/2568161054/O1CN018Eo9dV1Jem9KV0y79_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01vuEofr1Jem9Nzy9xY_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i4/2568161054/O1CN01qw9sAi1Jem8wkNKpy_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i1/2568161054/O1CN01HeFhFw1Jem8rLnjBY_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i2/2568161054/O1CN01SNgjoi1Jem9QOil15_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i2/2568161054/O1CN01RXf3RA1Jem9DpHVwj_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i4/2568161054/O1CN01gZmZjt1Jem9ISThgm_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i2/2568161054/O1CN01YL0FHM1Jem9PQTjX9_!!2568161054.jpg",
"http://img.alicdn.com/imgextra/i4/2568161054/O1CN01UhsEhZ1Jem8yvJIhZ_!!2568161054.jpg"
],
"shop_item": [],
"relate_items": []
},
"error": "",
"secache": "4ad7ad2480af253fec9c2fd4daa266bb",
"secache_time": 1709098066,
"secache_date": "2024-02-28 13:27:46",
"translate_status": "",
"translate_time": 0,
"language": {
"default_lang": "cn",
"current_lang": "cn"
},