python 成交量数据下载

转载

mob64ca1409d8ea 2024-12-22 17:11:07

文章标签 python 成交量数据下载 python 获取数据字段数据 文章分类 Python 后端开发

背景

今日我的Python心路历程是股票实战之数据可视化散点）

分析

上代码：

#风险分析操练，散点图
def operationSNS(comName):
    # 获取绝对路径，data为当前文件夹
    curpath = os.path.join(os.path.dirname(__file__), 'data')

    #初始化list
    top_tech_dfAdj = []

    #初始化index
    i = 1

    #从不同的股票数据csv文件中整合Adj Close
    # 获取公司名执行代码
    for comnames in comName:
        # 获取公司名
        fileName = comnames + '.csv'
        file_object_path = os.path.join(curpath, fileName)

        # 获取数据from csv文件中 [Date	Open	High	Low	Close	Adj Close	Volume]
        top_tech_df = pd.read_csv(file_object_path)
        #打印top_tech_df.head()数据
        #print top_tech_df.head()

        #如果是第一家公司，那么就将Date一起获取
        if comName.index(comnames) == 0:
            #提取Date和Adj Close
            top_tech_dfAdj = pd.merge(top_tech_df['Date'], top_tech_df['Adj Close'], how='outer', left_index=True, right_index=True)
            #print top_tech_dfAdj.head()
        else:
            # 提取Adj Close
            top_tech_dfAdj = pd.merge(top_tech_dfAdj, top_tech_df['Adj Close'], how='outer', left_index=True, right_index=True)

        #赋值字段名称为comnames
        #top_tech_dfAdj.axes[1][i] = comnames
        i += 1

    #重置表单的列名
    colSet = ['Date'] + comName
    #colSet[-1] = 'd'
    top_tech_dfAdj.columns = colSet

    #print top_tech_dfAdj.head()

    # 输出csv文件名
    topfile_path = os.path.join(curpath, 'top.csv')
    top_tech_dfAdj.to_csv(topfile_path)

    # 获取数据from csv文件中 [Adj Close],
    top_tech_df = pd.read_csv(topfile_path, index_col=1)#设置index_col可实现从列x开始读取，是为了避免下面的pct_change操作无法针对Date字段列
    print top_tech_df.head()

    #每日差距分析
    top_tech_dr = top_tech_df.pct_change()
    #打印top_tech_dr.head()数据
    print top_tech_dr.head()

    # 散点图
    sns.jointplot(comName, comName, top_tech_dr, kind='scatter')

    #显示
    plt.show()
    return

def main():

    #公司股票代码名称
    company = ['GOOG', 'AAPL']  #['GOOG', 'AAPL', 'MSFT', 'AMZN', 'FB']  ['600000.SS', '002238.SZ']
    #yahoo的api是国际性的，是支持国内沪深股市的，但代码稍微变动一下，如浦发银行的代号是：600000.SS。规则是：上海市场末尾加.ss，深圳市场末尾加.sz。

    operationSNS(company)

    return

初步效果

运行效果图如下：

python 成交量数据下载_数据

需要说明的一点是，在读取时需要关注散点图数据的有效字段开始列，避免读取到Date等字符串字段导致pct_change操作失败。错误码为：TypeError: unsupported operand type(s) for /: 'str' and 'float'

编译错误截图如下：

python 成交量数据下载_python 成交量数据下载_02

方案

更改为从Adj Close列（如果有序号列需要考虑进来的，从-1计起）开始就可以了，代码如下：

# 获取数据from csv文件中 [Adj Close],
    top_tech_df = pd.read_csv(topfile_path, index_col=1)#设置index_col可实现从列x开始读取，是为了避免下面的pct_change操作无法针对Date字段列
    print top_tech_df.head()

后来去掉了序号列之后的完整代码为：

#风险分析操练，散点图
def operationSNS(comName):
    # 获取绝对路径，data为当前文件夹
    curpath = os.path.join(os.path.dirname(__file__), 'data')

    #初始化list
    top_tech_dfAdj = []

    #从不同的股票数据csv文件中整合Adj Close
    # 获取公司名执行代码
    for comnames in comName:
        # 获取公司名
        fileName = comnames + '.csv'
        file_object_path = os.path.join(curpath, fileName)

        # 获取数据from csv文件中 [Date	Open	High	Low	Close	Adj Close	Volume]
        top_tech_df = pd.read_csv(file_object_path)
        #打印top_tech_df.head()数据
        #print top_tech_df.head()

        #如果是第一家公司，那么就将Date一起获取
        if comName.index(comnames) == 0:
            #提取Date和Adj Close
            top_tech_dfAdj = pd.merge(top_tech_df['Date'], top_tech_df['Adj Close'], how='outer', left_index=True, right_index=True)
            #print top_tech_dfAdj.head()
        else:
            # 提取Adj Close
            top_tech_dfAdj = pd.merge(top_tech_dfAdj, top_tech_df['Adj Close'], how='outer', left_index=True, right_index=True)

        #赋值字段名称为comnames
        #top_tech_dfAdj.axes[1][i] = comnames   #i不能使用，因为编译提示此处不能使用变量

    #重置表单的列名(字段名)
    colSet = ['Date'] + comName
    top_tech_dfAdj.columns = colSet

    print top_tech_dfAdj.head()

    # 输出csv文件名
    topfile_path = os.path.join(curpath, 'top.csv')
    top_tech_dfAdj.to_csv(topfile_path, index=False) #加参数index以便保证写入时不写索引；如果不需要字段名则可以加上header=False

    # 获取数据from csv文件中 [Adj Close],
    top_tech_df = pd.read_csv(topfile_path, index_col=0) #设置index_col可实现从列x开始读取，是为了避免下面的pct_change操作无法针对Date字段列
    print top_tech_df.head()

    #每日差距分析
    top_tech_dr = top_tech_df.pct_change()
    #打印top_tech_dr.head()数据
    print top_tech_dr.head()

    # 散点图
    sns.jointplot(comName[0], comName[1], top_tech_dr, kind='scatter')

    #显示
    plt.show()
    return