基于 networkx 的路径查找算法和中心性算法
- 1.工具的形成过程
- 1.1工具的简单理解
- 1.2工具的封装方法
- 2.工具的功能展示
- 2.1单源节点关系查询和关系图绘制
- 2.2单目标节点关系查询和关系图绘制
- 2.3两节点间关系查找
- 3.完整版Python脚本执行
- 3.1Python执行环境
- 3.2数据格式样例
- 3.3完整Python脚本
- 3.4exe封装
1.工具的形成过程
1.1工具的简单理解
业务场景、设计思路和简单实现,见本人另一篇文章 Python图形用户界面展示客户关联关系
1.2工具的封装方法
python脚本封装成exe文件,具体方法见本人另一篇文章 windows下python的pip安装和虚拟环境使用
2.工具的功能展示
关系图均为有向图
2.1单源节点关系查询和关系图绘制
“单源节点"为"起始端名称”,点击"查询"即可展示其可达节点个数
点击"图形"按钮即可展示10层内(具体层数自己可在代码中设置)可达节点的具体节点名称。如果图形有遮挡可关闭图形界面,点击"图形"重新生成。
2.2单目标节点关系查询和关系图绘制
“结束端名称"就是"目标节点”,其他操作同2.1,不再赘述
2.3两节点间关系查找
输入起始和结束端名称,即可查找两节点的最短路径长度、是否关联、是否反向关联 3种信息。
图形绘制同2.1,不赘述,需要反向关系,将"起始端名称"和"结束端名称"调换即可
3.完整版Python脚本执行
3.1Python执行环境
pip install pandas==1.2.4 networkx==2.5 matplotlib==3.3.4 decorator==4.4.2 pyinstaller openpyxl -i https://pypi.tuna.tsinghua.edu.cn/simple
3.2数据格式样例
样例数据就是2.工具功能展示 的数据,瞎编乱造的。数据可以是3列可以4列,边也可以展示更多的信息,这些全都来源于业务需求,根据需求调整代码即可。
3.3完整Python脚本
import tkinter as tk
import tkinter.messagebox as tm
import pandas as pd
import networkx as nx
import os
import matplotlib.pyplot as plt
import numpy as np
import warnings
import operator
warnings.filterwarnings('ignore')
class TkSet:
"""
窗口设置
"""
def __init__(self, init_window_name):
"""窗口大小及位置设置"""
self.init_window_name = init_window_name
# 得到屏幕宽度
self.sw = init_window_name.winfo_screenwidth()
# 得到屏幕高度
self.sh = init_window_name.winfo_screenheight()
# 窗口宽 和 高
self.ww = 320
self.wh = 120
# 窗口位于屏幕x轴 和 y轴
self.x = int((self.sw - self.ww) / 2.5)
self.y = int((self.sh - self.wh) / 3.5)
def set_window(self):
"""窗口内容设置"""
self.init_window_name.title("关联关系分析")
# 位置设置 窗口宽x窗口高+窗口位于屏幕x轴+窗口位于屏幕y轴
self.init_window_name.geometry('{}x{}+{}+{}'.format(self.ww, self.wh, self.x, self.y))
# 查询结果提示
self.query_result = tk.Label(self.init_window_name, text='')
self.query_result.grid(row=0, columnspan=2) # 跨越两列显示
# 第一行起始节点输入框
self.source_tips = tk.Label(self.init_window_name, text='起始端名称:')
self.source_tips.grid(row=1, sticky=tk.W)
self.source = tk.Entry(self.init_window_name)
self.source.grid(row=1, column=1, sticky=tk.E, padx=3)
# 第二行目标节点输入框
self.target_tips = tk.Label(self.init_window_name, text='结束端名称:')
self.target_tips.grid(row=2, sticky=tk.E)
self.target = tk.Entry(self.init_window_name)
self.target.grid(row=2, column=1, sticky=tk.E, padx=3)
# 第三行查询按钮
# Frame框架控件;在屏幕上显示一个矩形区域,多用来作为容器
self.f_btn = tk.Frame(self.init_window_name)
self.b_query = tk.Button(self.f_btn, text='查询', width=6, command=self.button_query)
self.b_query.grid(row=0, column=0)
# self.b_cancel = tk.Button(self.f_btn, text='取消', width=6, command=self.init_window_name.quit)
# self.b_cancel.grid(row=0, column=1)
self.b_draw = tk.Button(self.f_btn, text='图形', width=6, command=self.button_draw)
self.b_draw.grid(row=0, column=1)
self.b_clearn = tk.Button(self.f_btn, text='清空', width=6, command=self.button_clearn)
self.b_clearn.grid(row=0, column=2)
self.f_btn.grid(row=3, columnspan=2, pady=10)
self.init_window_name.mainloop()
def button_query(self):
"""按键返回信息设置"""
# 首先:输入的信息获取
self.source_node = self.source.get()
self.target_node = self.target.get()
self.len_source = len(self.source_node)
self.len_target = len(self.target_node)
# 然后:查询结果显示
if self.len_source==0 and self.len_target==0:
self.query_result.configure(text='名称输入错误!', fg='red')
else:
try:
self.query_show()
self.query_result['text'] = '查询成功!'
self.query_result['fg'] = 'green'
except:
self.query_result.configure(text='名称输入错误!', fg='red')
def button_draw(self):
"""按键返回信息设置"""
# 首先:输入的信息获取
self.source_node = self.source.get()
self.target_node = self.target.get()
self.len_source = len(self.source_node)
self.len_target = len(self.target_node)
# 然后:查询结果显示
if self.len_source==0 and self.len_target==0:
self.query_result.configure(text='无相关关系,无法展示图形!', fg='red')
else:
try:
self.query_result['text'] = '图形展示成功!'
self.query_result['fg'] = 'green'
self.draw_show()
except:
self.query_result.configure(text='无相关关系,无法展示图形!', fg='red')
def button_clearn(self):
"""清空输入框内容"""
self.source_node = self.source.get()
self.target_node = self.target.get()
self.len_source = len(self.source_node)
self.len_target = len(self.target_node)
self.source.delete(0, self.len_source)
self.target.delete(0, self.len_target)
self.query_result.configure(text='清理成功!', fg='green')
def query_show(self):
"""查询结果展示"""
# 关联关系判断
if self.len_source!=0 and self.len_target!=0:
# 节点对
print_information = get_relation(graph, self.source_node, self.target_node)
# 文字信息打印
tm.showinfo("查询结果显示", print_information)
elif self.len_source!=0 and self.len_target==0:
# 单源节点
gs, edge_labels, source_information = source_shortest_paths(graph, data, self.source_node)
# 文字信息打印
tm.showinfo("查询结果显示", source_information)
elif self.len_source==0 and self.len_target!=0:
# 单目标节点
gs, edge_labels, target_information = target_shortest_paths(graph, data, self.target_node)
# 文字信息打印
tm.showinfo("查询结果显示", target_information)
def draw_show(self):
"""图形结果展示"""
# 关系图绘制
if self.len_source!=0 and self.len_target!=0:
# 节点对
draw_graph_sub(graph, data, self.source_node, self.target_node)
elif self.len_source!=0 and self.len_target==0:
# 单源节点
gs, edge_labels, source_information = source_shortest_paths(graph, data, self.source_node)
draw_graph_sub_single_node(gs, edge_labels, self.source_node)
elif self.len_source==0 and self.len_target!=0:
# 单目标节点
gs, edge_labels, target_information = target_shortest_paths(graph, data, self.target_node)
draw_graph_sub_single_node(gs, edge_labels, self.target_node)
def get_data():
"""
获取数据并生成图
"""
# 数据获取
file_list = os.listdir()
file_path = [x for x in file_list if x.find('.xlsx') != -1 and x != '图关键信息.xlsx'][0]
data_str = pd.read_excel(file_path)
# Dataframe数据生成字典格式
data_keys = data_str.iloc[:, 0:2].values.tolist()
data_values = data_str.iloc[:, 2:].values.tolist()
data_weight = data_str.iloc[:, 2].values.tolist()
# 权重数据变化
relation_weight = {tuple(k): v for k, v in zip(data_keys, data_weight)}
relation_dict = {tuple(k): v for k, v in zip(data_keys, data_values)}
# relation_data = {k: v for k, v in relation_dict.items() if v[0] > 0}
relation_labels = {}
for k, v in relation_dict.items():
relation_labels[k] = {'频次': v[0], '金额': v[1]}
# 图形建立
gh = nx.MultiDiGraph()
for k, v in relation_weight.items():
gh.add_edge(k[0], k[1], weight=v)
return gh, relation_labels
def draw_graph(g, relation):
"""
画图操作
"""
plt.figure(figsize=(100, 60))
d = dict(g.degree)
# 节点间最佳距离
dist = 1.4 / np.sqrt(len(d))
# 画图
nx.draw(g,
pos=nx.spring_layout(g, k=dist, iterations=30), # 弹性布局,多中心放射状
node_size=[v * 200 for v in d.values()],
node_color=range(len(d)),
cmap=plt.cm.Parired,
with_labels=True,
edge_cmap=plt.cm.Dark2,
edge_color=range(len(relation.values())),
alpha=0.9,
width=[v / 3 for v in relation.values()]
)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.text(0.05, 0.95,
'可疑资金往来(点越大表示与之交易的人越多,线越宽表示交易次数越多)',
fontsize=60,
transform=plt.gca().transAxes
)
plt.savefig('可疑资金往来.png', dpi=120)
def get_relation(g, source, target):
"""
根据起始和终止节点返回信息
"""
# 判断是否有关联
has_path = '是' if nx.has_path(g, source, target) is True else '否'
has_path_re = '是' if nx.has_path(g, target, source) is True else '否'
if has_path == '是':
# 最短路径长度
shortest_path_length = nx.shortest_path_length(g, source, target) - 1
# 最短路径
shortest_path_length = '直连' if shortest_path_length == 0 else str(shortest_path_length)
print_information = '结果打印'.center(30, '=') \
+ '\n' + '{}和{}是否有关联:'.format(source, target) + has_path \
+ '\n' + '{}和{}关联最少经过几个人:'.format(source, target) + shortest_path_length \
+ '\n' + '{}和{}是否双向关联:'.format(source, target) + has_path_re \
+ '\n' + '打印完毕'.center(30, '=')
else:
print_information = '结果打印'.center(30, '=') \
+ '\n' + '{}和{}是否有关联:'.format(source, target) + has_path \
+ '\n' + '{}和{}是否反向关联:'.format(source, target) + has_path_re \
+ '\n' + '打印完毕'.center(30, '=')
return print_information
def paths_encode_edges(g, data_dict, source, target):
"""返回最短路径图"""
shortest_paths = list(nx.all_shortest_paths(g, source, target))
try:
# 反向关联关系获取
shortest_paths_re = list(nx.all_shortest_paths(g, target, source))
# 正反关联关系整理
shortest_paths = shortest_paths + shortest_paths_re
except nx.NetworkXNoPath:
pass
finally:
gs = nx.MultiDiGraph()
# 添加节点和边信息
edge_labels = {}
for paths in shortest_paths:
paths_pro = [(x, y) for x, y in zip(paths[:-1], paths[1:])]
for psp in paths_pro:
for k, v in data_dict.items():
if k == psp:
edge_labels[psp] = v
# 图形建立
gs.add_edges_from(edge_labels)
# 节点信息脱敏
i = 101
node_labels = {}
node_color = []
for node in gs.nodes:
if node == source or node == target:
node_labels[node] = node
node_color.append('#1E90FF')
else:
node_labels[node] = str(i)
node_color.append('#FFA500')
i += 1
return gs, edge_labels, node_labels, node_color
def special_edges_select(edge_labels, simple_node_paths):
"""对要求的边做特殊标记"""
# 通过次数最多的路径
max_adopt = [] # 最小通过次数
max_quota = [] # 最大额度
# 初始化最小通过次数和最大额度
min_adopt_freq = 0
max_quota_amt = 0
# 最大通过频率的最小通过次数和最大金额
min_freq_key = ()
max_quota_key = ()
for paths in simple_node_paths:
# 组成节点对
paths_pro = [(x, y) for x, y in zip(paths[:-1], paths[1:])]
max_adopt_sub = {}
max_quota_sub = {}
# 获取节点对频次信息
for io in paths_pro:
for k, v in edge_labels.items():
if k == io:
max_adopt_sub[io] = v['频次']
max_quota_sub[io] = v['金额']
# 计算最小值作为通过次数
min_freq = min(max_adopt_sub.values())
if min_freq > min_adopt_freq:
# 小于目前保留的通过次数
min_adopt_freq = min_freq
max_adopt = [paths_pro]
min_freq_key = min(max_adopt_sub.items(), key=operator.itemgetter(1))[0]
elif min_freq == min_adopt_freq:
max_adopt.append(paths_pro)
# 计算最大额度
max_amt = max(max_quota_sub.values())
if max_amt > max_quota_amt:
# 小于目前保留的通过次数
max_quota_amt = max_amt
max_quota = [paths_pro]
max_quota_key = max(max_quota_sub.items(), key=operator.itemgetter(1))[0]
elif max_amt == max_quota_amt:
max_quota.append(paths_pro)
max_adopt = list(set([m for ma in max_adopt for m in ma]))
max_quota = list(set([m for ma in max_quota for m in ma]))
return max_quota, max_adopt, min_freq_key, max_quota_key
def simple_paths_encode_edges(g, data_dict, source, target):
"""返回长度10及10以内的径图"""
# 步骤1:返回长度10及10以内的径图
simple_paths = list(nx.all_simple_paths(g, source, target, cutoff=10))
# 步骤2:构建有向图的数据
gs = nx.MultiDiGraph()
# 添加节点和边信息
edge_labels = {}
for paths in simple_paths:
paths_pro = [(x, y) for x, y in zip(paths[:-1], paths[1:])]
for psp in paths_pro:
# 构建图形数据
for k, v in data_dict.items():
if k == psp:
edge_labels[psp] = v
# 步骤3:图形建立
gs.add_edges_from(edge_labels)
# 步骤4:边的颜色和宽度确定
max_quota_path, max_adopt_path, min_freq_key, max_quota_key = special_edges_select(edge_labels, simple_paths) # 获取特殊边
edge_width_color = {}
for edge in gs.edges:
edge = edge[: 2]
# 构建最大通过次数边信息
for map in max_adopt_path:
if edge == map:
edge_width_color[edge] = {'width': 30, 'color': '#BC8F8F'}
break
else:
edge_width_color[edge] = {'width': 6, 'color': '#BC8F8F'}
# 构建最大额度边信息
for mqp in max_quota_path:
if edge == mqp:
edge_width_color[edge].update({'color': '#00FA9A'})
break
edge_width_color = pd.DataFrame([ewv for ewv in edge_width_color.values()])
# 步骤5:节点信息添加度 和 颜色区分
d = dict(gs.degree)
node_labels = {} # 节点标签和节点颜色不可以合并,draw_networkx_labels的labels参数必须是字典
node_color = []
for node, dr in zip(gs.nodes, d.values()):
if node == source or node == target:
node_labels[node] = node + ' ' + str(dr)
node_color.append('#1E90FF')
else:
node_labels[node] = node + ' ' + str(dr)
node_color.append('#FFA500')
return gs, edge_labels, node_labels, node_color, edge_width_color, min_freq_key, max_quota_key
def draw_graph_sub(g, data_dict, source, target):
"""子图绘制"""
# 调用最短路径图
# gs, edge_labels, node_labels, node_color = paths_encode_edges(g, data_dict, source, target)
# 调用10层内关系图
gs, edge_labels, node_labels, node_color, edge_width_color, min_freq_key, \
max_quota_key = simple_paths_encode_edges(g, data_dict, source, target)
# 绘制
plt.figure(figsize=(20, 10))
# 节点间最佳距离
d = dict(gs.degree)
dist = 1.8 / np.sqrt(len(d))
pos = nx.spring_layout(gs, k=dist, iterations=30) # 固定节点和边缘位置,确保draw和draw_networkx_edge_labels位置对应
nx.draw(gs,
pos=pos,
node_color=node_color,
node_size=[v * 600 for v in d.values()],
edge_color=list(edge_width_color['color']),
font_size=9,
alpha=0.65,
width=[v / 6 for v in edge_width_color['width']],
)
# 添加节点参数
nx.draw_networkx_labels(gs, pos, labels=node_labels, font_size=9, alpha=0.9)
# 添加边缘权值参数
nx.draw_networkx_edge_labels(gs, pos, edge_labels=edge_labels, font_size=8)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.text(0.05, 0.93,
'节点:蓝色节点为起始和中止节点,节点后数字和节点大小代表与该节点直接相连的节点数量\n'
'边:边线宽的代表通过频次最高路径,绿色边线代表金额最大路径\n'
'最大通过频次路径的最小通过次数对应的节点对%s, 最大金额对应的节点对%s' % (min_freq_key, max_quota_key),
fontsize=10,
transform=plt.gca().transAxes
)
plt.show()
def source_shortest_paths(g, data_dict, source):
"""
操作:单源节点最短路径
参数:g:graph
"""
# 最短路径
shortest_path = nx.shortest_path(g, source)
gs = nx.MultiDiGraph()
# 添加节点和边信息
edge_labels = {}
for paths in shortest_path.values():
paths_pro = [(x, y) for x, y in zip(paths[:-1], paths[1:])]
for psp in paths_pro:
for k, v in data_dict.items():
if k == psp:
edge_labels[psp] = v
# 图形建立
gs.add_edges_from(edge_labels)
# 输出信息打印
len_source_node = len(edge_labels.keys())
source_information = '结果打印'.center(30, '=') \
+ '\n' + '[{}]的可到节点个数:'.format(source) + str(len_source_node) \
+ '\n' + '打印完毕'.center(30, '=')
return gs, edge_labels, source_information
def target_shortest_paths(g, data_dict, target):
"""
操作:单目标节点最短路径
参数:g:graph
"""
# 最短路径
shortest_path = {}
for node in g.nodes:
has_path = 1 if nx.has_path(g, node, target) is True else 0
if has_path==1:
stpl = nx.shortest_path(g, node, target)
shortest_path[node] = stpl
gs = nx.MultiDiGraph()
# 添加节点和边信息
edge_labels = {}
for paths in shortest_path.values():
paths_pro = [(x, y) for x, y in zip(paths[:-1], paths[1:])]
for psp in paths_pro:
for k, v in data_dict.items():
if k == psp:
edge_labels[psp] = v
# 图形建立
gs.add_edges_from(edge_labels)
# 输出信息打印
len_target_node = len(edge_labels.keys())
target_information = '结果打印'.center(30, '=') \
+ '\n' + '可到达[{}]的节点个数:'.format(target) + str(len_target_node) \
+ '\n' + '打印完毕'.center(30, '=')
return gs, edge_labels, target_information
def draw_graph_sub_single_node(gs, edge_labels, st_node):
"""子图绘制:单源节点路径绘制"""
# 节点颜色
node_colors = []
for node in gs.nodes:
if node == st_node:
node_colors.append('#1E90FF')
else:
node_colors.append('#FFA500')
# 固定节点和边缘位置,确保draw和draw_networkx_edge_labels位置对应
pos = nx.spring_layout(gs)
nx.draw(gs,
pos=pos,
node_color=node_colors,
node_size=800,
edge_color='#BC8F8F',
font_size=9,
alpha=0.65,
with_labels=True
)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.text(0.05, 0.95,
'指定节点的所有可达节点(蓝色节点为指定节点),线条方向代表关系方向',
fontsize=10,
transform=plt.gca().transAxes
)
plt.show()
def relation_main():
"""
构建窗口主函数
"""
global graph, data
# 生成图
graph, data = get_data()
# 实例化一个出窗口
win = tk.Tk()
# 初始化一个实例
rltn = TkSet(win)
# 信息展示
rltn.set_window()
if __name__ == '__main__':
relation_main()
3.4exe封装
windows下python的pip安装和虚拟环境使用 (二.构建轻量级虚拟环境,封装exe文件使用),不再赘述。
声明:本文所载信息不保证准确性和完整性。文中所述内容和意见仅供参考,不构成实际商业建议,如有雷同纯属巧合。