准备:
import openpyxl
import xlrd
import pandas as pd
import numpy as np
readbook = xlrd.open_workbook(r'E:\Users\lxr\Documents\公司\钉钉\工资模板替换财务工资表.xlsx')
sheet = readbook.sheet_by_index(0)
nrows = sheet.nrows#行
ncols = sheet.ncols#列
Pathresult = r'E:\Users\lxr\Documents\公司\钉钉\工资整理结果.xlsx'
Path2 = r'E:\Users\lxr\Documents\公司\钉钉\merge结果.xlsx'
xlrd读取数据源表(工资模板替换财务工资表.xlsx)的相关数据
columnheads = ["姓名", "应发工资合计", "养老保险", "医疗保险", "失业保险", "住房公积金"]
dfsalasy = pd.DataFrame(columns = columnheads)
for i in range(0, nrows):#
if sheet.cell(i,0).ctype == 2:
name = sheet.cell(i,4).value
salary = sheet.cell(i,15).value
endowment = sheet.cell(i,19).value
medical = sheet.cell(i,21).value
unemployment = sheet.cell(i,20).value
housing = sheet.cell(i,23).value
s2 = pd.Series([name, salary, endowment, medical, unemployment, housing],index = columnheads)
dfsalasy = dfsalasy.append(s2, ignore_index=True)
#dfsalasy.set_index(["姓名"], inplace=True)
dfsalasy.to_excel(Pathresult)
将上步读取到的数据按表2(工资导出模板.xls)格式写入目标表
#df=pd.read_excel(r'E:\Users\lxr\Documents\公司\钉钉\工资导出模板.xls',index_col=u'姓名')
df=pd.read_excel(r'E:\Users\lxr\Documents\公司\钉钉\工资导出模板.xls')
#dfmg = pd.merge(df,dfsalasy,how='outer',left_index=True,right_index=True)
# 参数on='姓名',表示已两张表中共有的“姓名”列为合并条件。
dfmg = pd.merge(df,dfsalasy,on='姓名',how='outer')
colname = ["本期收入","基本养老保险费","基本医疗保险费","失业保险费","住房公积金_x"]
order = ["工号","姓名","证照类型","证照号码","所得期间起","所得期间止","应发工资合计", "本期免税收入","养老保险","医疗保险","失业保险","住房公积金_y","累计子女教育","累计继续教育","累计住房贷款利息","累计住房租金","累计赡养老人","企业(职业)年金","商业健康保险","税延养老保险","其他","准予扣除的捐赠额","税前扣除项目合计","减免税额","减除费用标准","已缴税额","备注"]
dfmg = dfmg[order]
df["证照号码"] = df["证照号码"].astype(str)
dfmg.columns=["工号","姓名","证照类型","证照号码","所得期间起","所得期间止","本期收入","本期免税收入","基本养老保险费","基本医疗保险费","失业保险费","住房公积金","累计子女教育","累计继续教育","累计住房贷款利息","累计住房租金","累计赡养老人","企业(职业)年金","商业健康保险","税延养老保险","其他","准予扣除的捐赠额","税前扣除项目合计","减免税额","减除费用标准","已缴税额","备注"]
dfmg.to_excel(Path2)
其中使用的操作:
1、pd.merge(df,dfsalasy,on='姓名',how='outer')
参数on='姓名',表示已两张表中共有的“姓名”列为合并条件。
参数how='outer'表示外连接,取交集
pandas数据合并之一文弄懂pd.merge() - 知乎
2、通过表的index索引合并
dfsalasy.set_index(["姓名"], inplace=True) # 将“姓名”列设为索引
df=pd.read_excel(r'E:\Users\lxr\Documents\公司\钉钉\工资导出模板.xls',index_col=u'姓名')
# 读取excel表时,直接将“姓名”列作为索引
dfmg = pd.merge(df,dfsalasy,how='outer',left_index=True,right_index=True)
参数left_index=True,right_index=True表示以左右表的索引为合并条件
3.dfmg.columns=["工号","姓名","证照类型","证照号码","所得期间起","所得期间止","本期收入","本期免税收入","基本养老保险费","基本医疗保险费","失业保险费","住房公积金","累计子女教育","累计继续教育","累计住房贷款利息","累计住房租金","累计赡养老人","企业(职业)年金","商业健康保险","税延养老保险","其他","准予扣除的捐赠额","税前扣除项目合计","减免税额","减除费用标准","已缴税额","备注"]
4.order = ["工号","姓名","证照类型","证照号码","所得期间起","所得期间止","应发工资合计", "本期免税收入","养老保险","医疗保险","失业保险","住房公积金_y","累计子女教育","累计继续教育","累计住房贷款利息","累计住房租金","累计赡养老人","企业(职业)年金","商业健康保险","税延养老保险","其他","准予扣除的捐赠额","税前扣除项目合计","减免税额","减除费用标准","已缴税额","备注"]
dfmg = dfmg[order]
5.df.index列出表的index属性
6.查询列的数据类型
dfmg["证照号码"].dtypes
7.修改列的数据类型为字符型
dfmg["证照号码"] = dfmg["证照号码"].astype(str)
或
dfmg["证照号码"] = dfmg["证照号码"].astype("str")