上篇文章简单介绍了一下加法,乘法,除法贡献度的计算方法,并手动实现了一下过程,文章链接:加法、乘法、除法贡献度计算实现
这篇文章想要把贡献度计算方法封装成一个函数,在传入json输入的时候,自动计算输出结果,其中乘法贡献度的计算方法有做更改,具体如下
一、加载需要的包
import pandas as pd
import numpy as np
import json
二、读取数据(加法,其中A001+A002+A003 = A01)
df_add = pd.read_excel("C:/Users/supaur/Desktop/value.xlsx")
df_add
index_num | index_name | time | period | value | |
0 | 1 | A01 | 2022-08-22 | day | 1000 |
1 | 2 | A001 | 2022-08-22 | day | 300 |
2 | 3 | A002 | 2022-08-22 | day | 200 |
3 | 4 | A003 | 2022-08-22 | day | 500 |
4 | 1 | A01 | 2022-08-21 | day | 1900 |
5 | 2 | A001 | 2022-08-21 | day | 1000 |
6 | 3 | A002 | 2022-08-21 | day | 500 |
7 | 4 | A003 | 2022-08-21 | day | 400 |
三、查看传入的加法json并解析出相应的参数
json_file = "C:/Users/supaur/Desktop/add.json"
with open(json_file, "r") as fp:
data = json.load(fp)
print(data)
date = data["date"]
target1 = data["target1"]
target2 = data["target2"]
target2_1 = data["target2_1"]
target2_2 = data["target2_2"]
type = data["type"]
timetype = data["timetype"]
{'date': ['20220821', '20220822'], 'target1': 'A01', 'target2': ['A001', 'A002', 'A003'], 'target2_1': [], 'target2_2': [], 'type': 'add', 'timetype': 'day'}
四、编写加法贡献度函数
# 生成两个df,分别为参数date里的两个日期,打印出来看看结果
df1_add = df_add[(df_add["time"] == date[0]) & (df_add["period"] == timetype)]
df2_add = df_add[(df_add["time"] == date[1]) & (df_add["period"] == timetype)]
print(df1_add)
print(df2_add)
index_num index_name time period value
4 1 A01 2022-08-21 day 1900
5 2 A001 2022-08-21 day 1000
6 3 A002 2022-08-21 day 500
7 4 A003 2022-08-21 day 400
index_num index_name time period value
0 1 A01 2022-08-22 day 1000
1 2 A001 2022-08-22 day 300
2 3 A002 2022-08-22 day 200
3 4 A003 2022-08-22 day 500
# df1与df2 按照字段名称拼接成下面格式
df3_add = pd.merge(df1_add, df2_add, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
df3_add
index_num_1 | index_name | time_1 | period_1 | value_1 | index_num_2 | time_2 | period_2 | value_2 | |
0 | 1 | A01 | 2022-08-21 | day | 1900 | 1 | 2022-08-22 | day | 1000 |
1 | 2 | A001 | 2022-08-21 | day | 1000 | 2 | 2022-08-22 | day | 300 |
2 | 3 | A002 | 2022-08-21 | day | 500 | 3 | 2022-08-22 | day | 200 |
3 | 4 | A003 | 2022-08-21 | day | 400 | 4 | 2022-08-22 | day | 500 |
# 添加一列字段,得到对应字段两个日期的差值
df3_add["D_value"] = df3_add.loc[:, "value_2"] - df3_add.loc[:, "value_1"]
df3_add
index_num_1 | index_name | time_1 | period_1 | value_1 | index_num_2 | time_2 | period_2 | value_2 | D_value | |
0 | 1 | A01 | 2022-08-21 | day | 1900 | 1 | 2022-08-22 | day | 1000 | -900 |
1 | 2 | A001 | 2022-08-21 | day | 1000 | 2 | 2022-08-22 | day | 300 | -700 |
2 | 3 | A002 | 2022-08-21 | day | 500 | 3 | 2022-08-22 | day | 200 | -300 |
3 | 4 | A003 | 2022-08-21 | day | 400 | 4 | 2022-08-22 | day | 500 | 100 |
# 利用for循环,计算每个target2的贡献度,其中计算方法为target2所在的行,D_value所在列 /target1所在的行,D_value所在列,得到每个target2的贡献度
for i in target2:
df3_add["%s_contribution" % i] = round(df3_add.loc[df3_add["index_name"] == i, "D_value"].tolist()[0] / df3_add.loc[df3_add["index_name"] == target1, "D_value"].tolist()[0] *100,2)
df3_add
index_num_1 | index_name | time_1 | period_1 | value_1 | index_num_2 | time_2 | period_2 | value_2 | D_value | A001_contribution | A002_contribution | A003_contribution | |
0 | 1 | A01 | 2022-08-21 | day | 1900 | 1 | 2022-08-22 | day | 1000 | -900 | 77.78 | 33.33 | -11.11 |
1 | 2 | A001 | 2022-08-21 | day | 1000 | 2 | 2022-08-22 | day | 300 | -700 | 77.78 | 33.33 | -11.11 |
2 | 3 | A002 | 2022-08-21 | day | 500 | 3 | 2022-08-22 | day | 200 | -300 | 77.78 | 33.33 | -11.11 |
3 | 4 | A003 | 2022-08-21 | day | 400 | 4 | 2022-08-22 | day | 500 | 100 | 77.78 | 33.33 | -11.11 |
# 把以上代码组合封装成functionadd函数,创建一个空列表,把贡献度值添加到列表里,并调用该函数打印结果如下:
def functionadd(date,target1,target2,timetype):
df1_add = df_add[(df_add["time"] == date[0]) & (df_add["period"] == timetype)]
df2_add = df_add[(df_add["time"] == date[1]) & (df_add["period"] == timetype)]
df3_add = pd.merge(df1, df2, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
df3_add["D_value"] = df3_add.loc[:, "value_2"] - df3_add.loc[:, "value_1"]
re = []
for i in target2:
df3_add["%s_contribution" % i] = round(df3_add.loc[df3_add["index_name"] == i, "D_value"].tolist()[0] / df3_add.loc[df3_add["index_name"] == target1, "D_value"].tolist()[0] *100,2)
re.append(("%s_contribution" % i, df3_add["%s_contribution" % i].tolist()[0]))
return re
print(functionadd(date,target1,target2,timetype))
[('A001_contribution', 29.94), ('A002_contribution', 19.97), ('A003_contribution', 49.99)]
五、乘法跟加法类似,再开始加法之前先对各指标求log,变成加法计算
# 查看传入的乘法json并解析出相应的参数
json_file = "C:/Users/supaur/Desktop/times.json"
with open(json_file, "r") as fp:
data = json.load(fp)
print(data)
date = data["date"]
target1 = data["target1"]
target2 = data["target2"]
target2_1 = data["target2_1"]
target2_2 = data["target2_2"]
type = data["type"]
timetype = data["timetype"]
{'date': ['20220801', '20220901'], 'target1': 'B01', 'target2': ['B001', 'B002'], 'target2_1': [], 'target2_2': [], 'type': 'times', 'timetype': 'month'}
# 查看乘法数据,其中A001*A002*A003 = A01 , B001*B002 = B01
df_times = pd.read_excel("C:/Users/supaur/Desktop/times.xlsx")
df_times
index_num | index_name | time | period | value | |
0 | 1 | A01 | 2022-08-22 | day | 30000.00 |
1 | 2 | A001 | 2022-08-22 | day | 0.30 |
2 | 3 | A002 | 2022-08-22 | day | 200.00 |
3 | 4 | A003 | 2022-08-22 | day | 500.00 |
4 | 1 | A01 | 2022-08-21 | day | 20000.00 |
5 | 2 | A001 | 2022-08-21 | day | 0.10 |
6 | 3 | A002 | 2022-08-21 | day | 500.00 |
7 | 4 | A003 | 2022-08-21 | day | 400.00 |
8 | 1 | A01 | 2022-08-20 | day | 32000.00 |
9 | 2 | A001 | 2022-08-20 | day | 0.20 |
10 | 3 | A002 | 2022-08-20 | day | 400.00 |
11 | 4 | A003 | 2022-08-20 | day | 400.00 |
12 | 5 | B01 | 2022-08-01 | month | 150000.00 |
13 | 6 | B001 | 2022-08-01 | month | 0.30 |
14 | 7 | B002 | 2022-08-01 | month | 500000.00 |
15 | 5 | B01 | 2022-09-01 | month | 107500.00 |
16 | 6 | B001 | 2022-09-01 | month | 0.25 |
17 | 7 | B002 | 2022-09-01 | month | 430000.00 |
18 | 5 | B01 | 2022-10-01 | month | 185500.00 |
19 | 6 | B001 | 2022-10-01 | month | 0.35 |
20 | 7 | B002 | 2022-10-01 | month | 530000.00 |
# 编写乘法函数,加法是直接求两日期差值,乘法是先取对数log,再变成加法计算
def functiontimes(date,target1,target2,timetype):
df1_times = df_times[(df_times["time"] == date[0]) & (df_times["period"] == timetype)]
df2_times = df_times[(df_times["time"] == date[1]) & (df_times["period"] == timetype)]
df3_times = pd.merge(df1_times, df2_times, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
df3_times["D_value"] = np.log10(df3_times.loc[:, "value_2"]) - np.log10(df3_times.loc[:, "value_1"])
re = []
for i in target2:
df3_times["%s_contribution" % i] = round(df3_times.loc[df3_times["index_name"] == i, "D_value"].tolist()[0] / df3_times.loc[df3_times["index_name"] == target1, "D_value"].tolist()[0] *100,2)
re.append(("%s_contribution" % i, df3_times["%s_contribution" % i].tolist()[0]))
return re
print(functiontimes(date,target1,target2,timetype))
[('B001_contribution', 54.73), ('B002_contribution', 45.27)]
六、除法贡献度函数
# 查看传入的除法json并解析出相应的参数,除法要比加法和乘法多两个参数target2_1,target2_2
json_file = "C:/Users/supaur/Desktop/div.json"
with open(json_file, "r") as fp:
data = json.load(fp)
print(data)
date = data["date"]
target1 = data["target1"]
target2 = data["target2"]
target2_1 = data["target2_1"]
target2_2 = data["target2_2"]
type = data["type"]
timetype = data["timetype"]
{'date': ['20220822', '20220823'], 'target1': 'A01', 'target2': ['A001', 'A002', 'A003'], 'target2_1': ['A001_1', 'A002_1', 'A003_1'], 'target2_2': ['A001_2', 'A002_2', 'A003_2'], 'type': 'div', 'timetype': 'day'}
# 查看除法数据,其中A001 = A001_1/A001_2 , A002 = A002_1/A002_2 , 以此类推,A01 = (A001_1 + A002_1 + A003_1) / (A001_2 + A002_2 + A003_2)
df_div = pd.read_excel("C:/Users/supaur/Desktop/div.xlsx")
df_div
index_num | index_name | time | period | value | |
0 | 1 | A01 | 2022-08-22 | day | 0.840546 |
1 | 2 | A001 | 2022-08-22 | day | 0.882581 |
2 | 2 | A001_1 | 2022-08-22 | day | 684.000000 |
3 | 2 | A001_2 | 2022-08-22 | day | 775.000000 |
4 | 3 | A002 | 2022-08-22 | day | 0.732308 |
5 | 3 | A002_1 | 2022-08-22 | day | 714.000000 |
6 | 3 | A002_2 | 2022-08-22 | day | 975.000000 |
7 | 4 | A003 | 2022-08-22 | day | 0.930061 |
8 | 4 | A003_1 | 2022-08-22 | day | 758.000000 |
9 | 4 | A003_2 | 2022-08-22 | day | 815.000000 |
10 | 1 | A01 | 2022-08-23 | day | 0.448011 |
11 | 2 | A001 | 2022-08-23 | day | 0.696589 |
12 | 2 | A001_1 | 2022-08-23 | day | 388.000000 |
13 | 2 | A001_2 | 2022-08-23 | day | 557.000000 |
14 | 3 | A002 | 2022-08-23 | day | 0.415882 |
15 | 3 | A002_1 | 2022-08-23 | day | 309.000000 |
16 | 3 | A002_2 | 2022-08-23 | day | 743.000000 |
17 | 4 | A003 | 2022-08-23 | day | 0.322368 |
18 | 4 | A003_1 | 2022-08-23 | day | 294.000000 |
19 | 4 | A003_2 | 2022-08-23 | day | 912.000000 |
# 与加法和乘法一样 生成两个df,分别为参数date里的两个日期
df1_div = df_div[(df_div["time"] == date[0]) & (df_div["period"] == timetype)]
df2_div = df_div[(df_div["time"] == date[1]) & (df_div["period"] == timetype)]
print(df1_div)
print(df2_div)
index_num index_name time period value
0 1 A01 2022-08-22 day 0.840546
1 2 A001 2022-08-22 day 0.882581
2 2 A001_1 2022-08-22 day 684.000000
3 2 A001_2 2022-08-22 day 775.000000
4 3 A002 2022-08-22 day 0.732308
5 3 A002_1 2022-08-22 day 714.000000
6 3 A002_2 2022-08-22 day 975.000000
7 4 A003 2022-08-22 day 0.930061
8 4 A003_1 2022-08-22 day 758.000000
9 4 A003_2 2022-08-22 day 815.000000
index_num index_name time period value
10 1 A01 2022-08-23 day 0.448011
11 2 A001 2022-08-23 day 0.696589
12 2 A001_1 2022-08-23 day 388.000000
13 2 A001_2 2022-08-23 day 557.000000
14 3 A002 2022-08-23 day 0.415882
15 3 A002_1 2022-08-23 day 309.000000
16 3 A002_2 2022-08-23 day 743.000000
17 4 A003 2022-08-23 day 0.322368
18 4 A003_1 2022-08-23 day 294.000000
19 4 A003_2 2022-08-23 day 912.000000
# 求所有target2_1 的和,新增列为sum_1
sum_1 = 0
for i in target2_1:
sum_1 = sum_1 + df1_div.loc[df1_div["index_name"] == i , "value"].tolist()[0]
df1_div["sum_1"] = sum_1
df1_div
index_num | index_name | time | period | value | sum_1 | |
0 | 1 | A01 | 2022-08-22 | day | 0.840546 | 2156.0 |
1 | 2 | A001 | 2022-08-22 | day | 0.882581 | 2156.0 |
2 | 2 | A001_1 | 2022-08-22 | day | 684.000000 | 2156.0 |
3 | 2 | A001_2 | 2022-08-22 | day | 775.000000 | 2156.0 |
4 | 3 | A002 | 2022-08-22 | day | 0.732308 | 2156.0 |
5 | 3 | A002_1 | 2022-08-22 | day | 714.000000 | 2156.0 |
6 | 3 | A002_2 | 2022-08-22 | day | 975.000000 | 2156.0 |
7 | 4 | A003 | 2022-08-22 | day | 0.930061 | 2156.0 |
8 | 4 | A003_1 | 2022-08-22 | day | 758.000000 | 2156.0 |
9 | 4 | A003_2 | 2022-08-22 | day | 815.000000 | 2156.0 |
# 求所有target2_2 的和,新增列为sum_2
sum_2 = 0
for i in target2_2:
sum_2 = sum_2 + df1_div.loc[df1_div["index_name"] == i , "value"].tolist()[0]
df1_div["sum_2"] = sum_2
df1_div
index_num | index_name | time | period | value | sum_1 | sum_2 | |
0 | 1 | A01 | 2022-08-22 | day | 0.840546 | 2156.0 | 2565.0 |
1 | 2 | A001 | 2022-08-22 | day | 0.882581 | 2156.0 | 2565.0 |
2 | 2 | A001_1 | 2022-08-22 | day | 684.000000 | 2156.0 | 2565.0 |
3 | 2 | A001_2 | 2022-08-22 | day | 775.000000 | 2156.0 | 2565.0 |
4 | 3 | A002 | 2022-08-22 | day | 0.732308 | 2156.0 | 2565.0 |
5 | 3 | A002_1 | 2022-08-22 | day | 714.000000 | 2156.0 | 2565.0 |
6 | 3 | A002_2 | 2022-08-22 | day | 975.000000 | 2156.0 | 2565.0 |
7 | 4 | A003 | 2022-08-22 | day | 0.930061 | 2156.0 | 2565.0 |
8 | 4 | A003_1 | 2022-08-22 | day | 758.000000 | 2156.0 | 2565.0 |
9 | 4 | A003_2 | 2022-08-22 | day | 815.000000 | 2156.0 | 2565.0 |
# df1_div, df2_div 按照字段名称拼接成下面格式
df3_div = pd.merge(df1_div, df2_div, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
df3_div
index_num_1 | index_name | time_1 | period_1 | value_1 | sum_1 | sum_2 | index_num_2 | time_2 | period_2 | value_2 | |
0 | 1 | A01 | 2022-08-22 | day | 0.840546 | 2156.0 | 2565.0 | 1 | 2022-08-23 | day | 0.448011 |
1 | 2 | A001 | 2022-08-22 | day | 0.882581 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 0.696589 |
2 | 2 | A001_1 | 2022-08-22 | day | 684.000000 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 388.000000 |
3 | 2 | A001_2 | 2022-08-22 | day | 775.000000 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 557.000000 |
4 | 3 | A002 | 2022-08-22 | day | 0.732308 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 0.415882 |
5 | 3 | A002_1 | 2022-08-22 | day | 714.000000 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 309.000000 |
6 | 3 | A002_2 | 2022-08-22 | day | 975.000000 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 743.000000 |
7 | 4 | A003 | 2022-08-22 | day | 0.930061 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 0.322368 |
8 | 4 | A003_1 | 2022-08-22 | day | 758.000000 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 294.000000 |
9 | 4 | A003_2 | 2022-08-22 | day | 815.000000 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 912.000000 |
# 添加一列字段,得到对应字段两个日期的差值
df3_div["D_value"] = df3_div.loc[:, "value_2"] - df3_div.loc[:, "value_1"]
df3_div
index_num_1 | index_name | time_1 | period_1 | value_1 | sum_1 | sum_2 | index_num_2 | time_2 | period_2 | value_2 | D_value | |
0 | 1 | A01 | 2022-08-22 | day | 0.840546 | 2156.0 | 2565.0 | 1 | 2022-08-23 | day | 0.448011 | -0.392535 |
1 | 2 | A001 | 2022-08-22 | day | 0.882581 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 0.696589 | -0.185992 |
2 | 2 | A001_1 | 2022-08-22 | day | 684.000000 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 388.000000 | -296.000000 |
3 | 2 | A001_2 | 2022-08-22 | day | 775.000000 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 557.000000 | -218.000000 |
4 | 3 | A002 | 2022-08-22 | day | 0.732308 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 0.415882 | -0.316426 |
5 | 3 | A002_1 | 2022-08-22 | day | 714.000000 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 309.000000 | -405.000000 |
6 | 3 | A002_2 | 2022-08-22 | day | 975.000000 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 743.000000 | -232.000000 |
7 | 4 | A003 | 2022-08-22 | day | 0.930061 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 0.322368 | -0.607693 |
8 | 4 | A003_1 | 2022-08-22 | day | 758.000000 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 294.000000 | -464.000000 |
9 | 4 | A003_2 | 2022-08-22 | day | 815.000000 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 912.000000 | 97.000000 |
# 求每个target2 的贡献度,计算方法为每个 target2_1 的 D_value + sum_1 的和 / 每个 target2_2 的 D_value + sum_2 的和 - target1 的 value_1
# 再对target2 的贡献度求和
sum = 0
for (a ,i ,j) in zip(target2,target2_1,target2_2):
df3_div["%s_contribution" % a] = (df3_div.loc[df3_div["index_name"] == i, "sum_1"].tolist()[0] + df3_div.loc[df3_div["index_name"] == i,"D_value"].tolist()[0]) / (df3_div.loc[df3_div["index_name"] == j, "sum_2"].tolist()[0] + df3_div.loc[df3_div["index_name"] == j,"D_value"].tolist()[0]) - df3_div.loc[df3_div["index_name"] == target1, "value_1"].tolist()[0]
sum = sum + df3_div["%s_contribution" % a]
df3_div["sum"] = sum
df3_div
index_num_1 | index_name | time_1 | period_1 | value_1 | sum_1 | sum_2 | index_num_2 | time_2 | period_2 | value_2 | D_value | A001_contribution | A002_contribution | A003_contribution | sum | |
0 | 1 | A01 | 2022-08-22 | day | 0.840546 | 2156.0 | 2565.0 | 1 | 2022-08-23 | day | 0.448011 | -0.392535 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
1 | 2 | A001 | 2022-08-22 | day | 0.882581 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 0.696589 | -0.185992 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
2 | 2 | A001_1 | 2022-08-22 | day | 684.000000 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 388.000000 | -296.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
3 | 2 | A001_2 | 2022-08-22 | day | 775.000000 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 557.000000 | -218.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
4 | 3 | A002 | 2022-08-22 | day | 0.732308 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 0.415882 | -0.316426 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
5 | 3 | A002_1 | 2022-08-22 | day | 714.000000 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 309.000000 | -405.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
6 | 3 | A002_2 | 2022-08-22 | day | 975.000000 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 743.000000 | -232.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
7 | 4 | A003 | 2022-08-22 | day | 0.930061 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 0.322368 | -0.607693 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
8 | 4 | A003_1 | 2022-08-22 | day | 758.000000 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 294.000000 | -464.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
9 | 4 | A003_2 | 2022-08-22 | day | 815.000000 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 912.000000 | 97.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 |
# 标准化,每个target2贡献度值/ 所有target2 总和 得到每个 target2 标准化后的贡献度
for b in target2:
df3_div["%s_nor_contribution" % b] = round(df3_div.loc[df3_div["index_name"] == b, "%s_contribution" % b].tolist()[0] / df3_div.loc[df3_div["index_name"] == b, "sum"].tolist()[0] * 100, 2)
df3_div
index_num_1 | index_name | time_1 | period_1 | value_1 | sum_1 | sum_2 | index_num_2 | time_2 | period_2 | value_2 | D_value | A001_contribution | A002_contribution | A003_contribution | sum | A001_nor_contribution | A002_nor_contribution | A003_nor_contribution | |
0 | 1 | A01 | 2022-08-22 | day | 0.840546 | 2156.0 | 2565.0 | 1 | 2022-08-23 | day | 0.448011 | -0.392535 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
1 | 2 | A001 | 2022-08-22 | day | 0.882581 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 0.696589 | -0.185992 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
2 | 2 | A001_1 | 2022-08-22 | day | 684.000000 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 388.000000 | -296.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
3 | 2 | A001_2 | 2022-08-22 | day | 775.000000 | 2156.0 | 2565.0 | 2 | 2022-08-23 | day | 557.000000 | -218.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
4 | 3 | A002 | 2022-08-22 | day | 0.732308 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 0.415882 | -0.316426 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
5 | 3 | A002_1 | 2022-08-22 | day | 714.000000 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 309.000000 | -405.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
6 | 3 | A002_2 | 2022-08-22 | day | 975.000000 | 2156.0 | 2565.0 | 3 | 2022-08-23 | day | 743.000000 | -232.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
7 | 4 | A003 | 2022-08-22 | day | 0.930061 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 0.322368 | -0.607693 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
8 | 4 | A003_1 | 2022-08-22 | day | 758.000000 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 294.000000 | -464.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
9 | 4 | A003_2 | 2022-08-22 | day | 815.000000 | 2156.0 | 2565.0 | 4 | 2022-08-23 | day | 912.000000 | 97.000000 | -0.048045 | -0.09001 | -0.204933 | -0.342988 | 14.01 | 26.24 | 59.75 |
# 对上述代码封装成除法贡献度函数
def functiondiv(date,target1,target2,target2_1,target2_2,timetype):
df1_div = df_div[(df_div["time"] == date[0]) & (df_div["period"] == timetype)]
df2_div = df_div[(df_div["time"] == date[1]) & (df_div["period"] == timetype)]
sum_1 = 0
for i in target2_1:
sum_1 = sum_1 + df1_div.loc[df1_div["index_name"] == i , "value"].tolist()[0]
df1_div["sum_1"] = sum_1
sum_2 = 0
for i in target2_2:
sum_2 = sum_2 + df1_div.loc[df1_div["index_name"] == i , "value"].tolist()[0]
df1_div["sum_2"] = sum_2
df3_div = pd.merge(df1_div, df2_div, how="inner", left_on=["index_name"], right_on=["index_name"],suffixes=("_1", "_2"))
df4_div = df3_div.loc[:, ["index_num_1", "index_name", "time_1", "time_2", "period_1", "value_1", "value_2","sum_1","sum_2"]]
df4_div["D_value"] = df4_div.loc[:, "value_2"] - df4_div.loc[:, "value_1"]
sum = 0
for (a ,i ,j) in zip(target2,target2_1,target2_2):
df4_div["%s_contribution" % a] = (df4_div.loc[df4_div["index_name"] == i, "sum_1"].tolist()[0] + df4_div.loc[df4_div["index_name"] == i,"D_value"].tolist()[0]) / (df4_div.loc[df4_div["index_name"] == j, "sum_2"].tolist()[0] + df4_div.loc[df4_div["index_name"] == j,"D_value"].tolist()[0]) - df4_div.loc[df4_div["index_name"] == target1, "value_1"].tolist()[0]
sum = sum + df4_div["%s_contribution" % a]
df4_div["sum"] = sum
re = []
for b in target2:
df4_div["%s_nor_contribution" % b] = round(df4_div.loc[df4_div["index_name"] == b, "%s_contribution" % b].tolist()[0] / df4_div.loc[df4_div["index_name"] == b, "sum"].tolist()[0] * 100, 2)
re.append(("%s_nor_contribution" % b, df4_div["%s_nor_contribution" % b].tolist()[0]))
return re
print(functiondiv(date, target1, target2,target2_1,target2_2, timetype))
[('A001_nor_contribution', 14.01), ('A002_nor_contribution', 26.24), ('A003_nor_contribution', 59.75)]