最近对网络爬虫非常感兴趣,想学习下Python,笔者本身是做C++开发的,故需要在VS平台下,配置Python开发环境,用C++调用Python。
安装Python
- 去官网下载最新Python安装包,默认安装即可。
- 将python.exe所在路径,添加到系统环境变量path中,我的路径:D:\Program Files (x86)\Python38。
- 找到pip所在目录,一般为Scripts,添加到系统环境变量path中,我的路径:D:\Program Files (x86)\Python38\Scripts。
配置开发环境
1. 打开Python安装目录,拷贝include和libs到项目中,即代码目录
2. 设置头文件路径及库路径
3. 添加Lib文件
4. 拷贝dll到Debug(或Release)目录
运行Hello,Word!
环境配置好了,运行一下最简单的python代码:print('hello word')
#include <python.h>
int main(int argc, char** argv)
{
Py_Initialize();
// 检查初始化是否成功
if (!Py_IsInitialized()) {
return -1;
}
PyRun_SimpleString("print('hello word')");
// 关闭Python
Py_Finalize();
return 0;
}
第一个爬虫程序
学习python的目的,源于一遍博文,附上链接:用Python爬取历年基金数据。现用C++调用其Python代码实现其功能
C++代码
#include <python.h>
int main(int argc, char** argv)
{
Py_Initialize();
// 检查初始化是否成功
if (!Py_IsInitialized()) {
return -1;
}
PyRun_SimpleString("import sys");
PyRun_SimpleString("print('---import sys---')");
PyRun_SimpleString("sys.path.append('./')");
PyObject *pModule, *pDict, *pFunc, *pArgs;
// 载入脚本
pModule = PyImport_ImportModule("getts");
if (!pModule) {
printf("can't find gett.py");
getchar();
return -1;
}
pDict = PyModule_GetDict(pModule);
if (!pDict) {
return -1;
}
pFunc = PyDict_GetItemString(pDict, "getAllCode");
if (!pFunc || !PyCallable_Check(pFunc)) {
printf("can't find function [update]");
getchar();
return -1;
}
pArgs = PyTuple_New(0);
PyTuple_SetItem(pArgs, 0, Py_BuildValue(""));
PyObject_CallObject(pFunc, pArgs);
// 关闭Python
Py_Finalize();
return 0;
}
Python代码
import requests
import time
import execjs
'''
更多Python学习资料以及源码教程资料,可以在群821460695 免费获取
'''
def getUrl(fscode):
head = 'http://fund.eastmoney.com/pingzhongdata/'
tail = '.js?v='+ time.strftime("%Y%m%d%H%M%S",time.localtime())
return head+fscode+tail
# 根据基金代码获取净值
def getWorth(fscode):
content = requests.get(getUrl(fscode))
jsContent = execjs.compile(content.text)
name = jsContent.eval('fS_name')
code = jsContent.eval('fS_code')
#单位净值走势
netWorthTrend = jsContent.eval('Data_netWorthTrend')
#累计净值走势
ACWorthTrend = jsContent.eval('Data_ACWorthTrend')
netWorth = []
ACWorth = []
for dayWorth in netWorthTrend[::-1]:
netWorth.append(dayWorth['y'])
for dayACWorth in ACWorthTrend[::-1]:
ACWorth.append(dayACWorth[1])
print(name,code)
return netWorth, ACWorth
def getAllCode():
url = 'http://fund.eastmoney.com/js/fundcode_search.js'
content = requests.get(url)
jsContent = execjs.compile(content.text)
rawData = jsContent.eval('r')
allCode = []
for code in rawData:
allCode.append(code[0])
return allCode
allCode = getAllCode()
netWorthFile = open('./netWorth.csv','w')
ACWorthFile = open('./ACWorth.csv','w')
for code in allCode:
try:
netWorth, ACWorth = getWorth(code)
except:
continue
if len(netWorth)<=0 or len(ACWorth)<0:
print(code+"'s' data is empty.")
continue
netWorthFile.write("\'"+code+"\',")
netWorthFile.write(",".join(list(map(str, netWorth))))
netWorthFile.write("\n")
ACWorthFile.write("\'"+code+"\',")
ACWorthFile.write(",".join(list(map(str, ACWorth))))
ACWorthFile.write("\n")
print("write "+code+"'s data success.")
netWorthFile.close()
ACWorthFile.close()
运行结果
注:若运行过程提示:PyImport_ImportModule返回NULL,请参考:路径对了,PyImport_ImportModule为啥还是返回NULL呢?