准备:

import openpyxl
import xlrd
import pandas as pd
import numpy as np

readbook = xlrd.open_workbook(r'E:\Users\lxr\Documents\公司\钉钉\工资模板替换财务工资表.xlsx')
sheet = readbook.sheet_by_index(0)
nrows = sheet.nrows#行
ncols = sheet.ncols#列

Pathresult = r'E:\Users\lxr\Documents\公司\钉钉\工资整理结果.xlsx'
Path2 = r'E:\Users\lxr\Documents\公司\钉钉\merge结果.xlsx'

xlrd读取数据源表(工资模板替换财务工资表.xlsx)的相关数据

columnheads = ["姓名", "应发工资合计", "养老保险", "医疗保险", "失业保险", "住房公积金"]
dfsalasy = pd.DataFrame(columns = columnheads)

for i in range(0, nrows):#
    if sheet.cell(i,0).ctype == 2:
        name = sheet.cell(i,4).value
        salary = sheet.cell(i,15).value
        endowment = sheet.cell(i,19).value
        medical = sheet.cell(i,21).value
        unemployment = sheet.cell(i,20).value
        housing = sheet.cell(i,23).value
        s2 = pd.Series([name, salary, endowment, medical, unemployment, housing],index = columnheads)
        dfsalasy = dfsalasy.append(s2, ignore_index=True)

#dfsalasy.set_index(["姓名"], inplace=True)
dfsalasy.to_excel(Pathresult)

将上步读取到的数据按表2(工资导出模板.xls)格式写入目标表

#df=pd.read_excel(r'E:\Users\lxr\Documents\公司\钉钉\工资导出模板.xls',index_col=u'姓名')
df=pd.read_excel(r'E:\Users\lxr\Documents\公司\钉钉\工资导出模板.xls')
#dfmg = pd.merge(df,dfsalasy,how='outer',left_index=True,right_index=True)
# 参数on='姓名',表示已两张表中共有的“姓名”列为合并条件。
dfmg = pd.merge(df,dfsalasy,on='姓名',how='outer')
colname = ["本期收入","基本养老保险费","基本医疗保险费","失业保险费","住房公积金_x"]
order = ["工号","姓名","证照类型","证照号码","所得期间起","所得期间止","应发工资合计", "本期免税收入","养老保险","医疗保险","失业保险","住房公积金_y","累计子女教育","累计继续教育","累计住房贷款利息","累计住房租金","累计赡养老人","企业(职业)年金","商业健康保险","税延养老保险","其他","准予扣除的捐赠额","税前扣除项目合计","减免税额","减除费用标准","已缴税额","备注"]
dfmg = dfmg[order]
df["证照号码"] = df["证照号码"].astype(str)
dfmg.columns=["工号","姓名","证照类型","证照号码","所得期间起","所得期间止","本期收入","本期免税收入","基本养老保险费","基本医疗保险费","失业保险费","住房公积金","累计子女教育","累计继续教育","累计住房贷款利息","累计住房租金","累计赡养老人","企业(职业)年金","商业健康保险","税延养老保险","其他","准予扣除的捐赠额","税前扣除项目合计","减免税额","减除费用标准","已缴税额","备注"]
dfmg.to_excel(Path2)

其中使用的操作:

1、pd.merge(df,dfsalasy,on='姓名',how='outer')

参数on='姓名',表示已两张表中共有的“姓名”列为合并条件。

参数how='outer'表示外连接,取交集

pandas数据合并之一文弄懂pd.merge() - 知乎


2、通过表的index索引合并

dfsalasy.set_index(["姓名"], inplace=True)  # 将“姓名”列设为索引

df=pd.read_excel(r'E:\Users\lxr\Documents\公司\钉钉\工资导出模板.xls',index_col=u'姓名')

# 读取excel表时,直接将“姓名”列作为索引

dfmg = pd.merge(df,dfsalasy,how='outer',left_index=True,right_index=True)

参数left_index=True,right_index=True表示以左右表的索引为合并条件

 

3.dfmg.columns=["工号","姓名","证照类型","证照号码","所得期间起","所得期间止","本期收入","本期免税收入","基本养老保险费","基本医疗保险费","失业保险费","住房公积金","累计子女教育","累计继续教育","累计住房贷款利息","累计住房租金","累计赡养老人","企业(职业)年金","商业健康保险","税延养老保险","其他","准予扣除的捐赠额","税前扣除项目合计","减免税额","减除费用标准","已缴税额","备注"]

pandas 对列进行重命名 - 简书 

4.order = ["工号","姓名","证照类型","证照号码","所得期间起","所得期间止","应发工资合计", "本期免税收入","养老保险","医疗保险","失业保险","住房公积金_y","累计子女教育","累计继续教育","累计住房贷款利息","累计住房租金","累计赡养老人","企业(职业)年金","商业健康保险","税延养老保险","其他","准予扣除的捐赠额","税前扣除项目合计","减免税额","减除费用标准","已缴税额","备注"]
dfmg = dfmg[order]

 5.df.index列出表的index属性

python 钉钉表格 钉钉编辑excel_Python

 

6.查询列的数据类型

dfmg["证照号码"].dtypes

7.修改列的数据类型为字符型

dfmg["证照号码"] = dfmg["证照号码"].astype(str)

dfmg["证照号码"] = dfmg["证照号码"].astype("str")