import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
"""
熵值法是根据指标所含信息有序程度的差异性来确定指标权重的客观赋权方法'
熵用于度量不确定性,仅依赖于数据本身的离散程度;
指标的离散程度越大则熵值越大,表明指标值提供的信息量越多,则该指标的权重也应越大

step:
0.指标同向化(本文指标均为极大型指标,因此忽略本步骤)
1.归一化(权重计算前的归一化不能是同向化后的矩阵,会损失原始信息)
2.计算各指标的熵值
3.计算各指标的权系数
4.计算加权后数据集
5.得到正负理想解
6.得到样本与正负理想解间的距离
7.计算评分
"""
###读取数据
def read_data(path,code = "gb18030",is_train = True):
	"""
	读取csv文件 并标准化 其中最后一列为标签(舍去)
	:param path: 文件路径
	:param code: 文件编码
	:return: dataP,df ,标准化后的数据集
	"""
	#读取数据集
	data = pd.read_csv(path, encoding=code)
	data = data[data.columns.tolist()[:-1]]
	data = np.array(data)
	#标准化
	if is_train == True:
		dataP = scaler.fit_transform(data)
	else:
		dataP = scaler.transform(data)
	return dataP

###定义权重计算函数
def entropy(data):
	"""
	data为标准化后的数据集
	:param data: df
	:return: ndarray,1D,weight
	"""
	E = np.nansum(-data * np.log(data) / np.log(len(data)), axis=0)
	#计算权系数
	return (1 - E) / (1 - E).sum()



###得到加权后数据集
def weight_data(weight,data):
	"""
	计算数据集与权重的乘积
	:param weight:
	:param data:
	:return:
	"""
	R = data * weight
	return R

###定义正负理想解
def ideal_data(R):
	r_max = np.max(R, axis=0)  # 每个指标的最大值为正理想解
	r_min = np.min(R, axis=0)  # 每个指标的最小值为负理想解
	return r_max,r_min


###计算各方案到正负理想解的距离
def ideal_distance(R,r_max,r_min):
	d_z = np.sqrt(np.sum(np.square((R - np.tile(r_max, (R.shape[0], 1)))), axis=1))  # d+向量
	d_f = np.sqrt(np.sum(np.square((R - np.tile(r_min, (R.shape[0], 1)))), axis=1))  # d-向量
	return d_z,d_f

###得到评分
def figure_score(d_z,d_f):
	s = d_f / (d_z + d_f)
	Score = 100 * s / max(s)
	return Score

def train(path,code = "gb18030"):
	"""
	训练模型;将指标权重、将正理想解、负理想解 作为参数保存到topsis_params.csv
	:param path:
	:param code:
	:return:
	"""
	data = read_data(path,code = code)
	weight = entropy(data)
	R = weight_data(weight,data)
	r_max, r_min = ideal_data(R)
	d_z,d_f = ideal_distance(R,r_max, r_min)
	Score = figure_score(d_z, d_f)
	# for j in range(0, len(Score)):
	# 	print(f"第{j + 1}个得分为:{Score[j]}")
	print("weight:",weight) #1*cols np.array
	print("r_max:",r_max.tolist())   #cols*1 series
	print("r_min:",r_min.tolist())   #cols*1 series
	df_paras = pd.DataFrame()
	df_paras["weight"] = weight
	df_paras["r_max"] = r_max.tolist()
	df_paras["r_min"] = r_min.tolist()
	df_paras.to_csv(os.getcwd()+r"\params\topsis_params.csv",index=False)
	return Score,weight,r_max, r_min

def predict(path,weight,r_max, r_min,code ="utf-8"):
	"""
	预测模型;载入 指标权重、将正理想解、负理想解 预测新数据
	:param path: 
	:param weight: 
	:param r_max: 
	:param r_min: 
	:param code: 
	:return: 
	"""
	data = read_data(path,code = code)
	weight = np.array(weight)
	R = weight_data(data,weight)
	d_z,d_f = ideal_distance(R,r_max, r_min)
	Score = figure_score(d_z, d_f)
	for j in range(0, len(Score)):
		print(f"第{j + 1}个得分为:{Score[j]}")
	return Score

if __name__ =="__main__":
	#训练、测试集路径
	train_path = r"D:\baseline\datas\data_Train.csv"
	test_path = r"D:\baseline\datas\data_Test.csv"
	#训练-保存参数
	# Scoretr,weight,r_max, r_min = train(train_path, code="gb18030")
	#预测-读取参数
	df_params = pd.read_csv(os.getcwd()+r"\params\topsis_params.csv")
	Scorete = predict(test_path,df_params['weight'],df_params['r_max'], df_params['r_min'], code="gb18030")

部分数据

topsis熵权法python 熵权法topsis模型的python_python