如果本文帮助到了你,欢迎[点赞、收藏、关注]哦~

代码流程

  1. 先检查所有GPU之间是否支持P2P通信;
  2. 然后尝试启用GPU之间的P2P通信;
  3. 再次检查所有GPU之间是否支持P2P通信。

test.cu:

#include <stdio.h>
#include <cuda_runtime.h>

void checkP2P(int deviceCount) {
    printf("\nChecking initial P2P support:\n");
    for (int i = 0; i < deviceCount; i++) {
        for (int j = 0; j < deviceCount; j++) {
            if (i != j) {
                int canAccessPeer = 0;
                cudaDeviceCanAccessPeer(&canAccessPeer, i, j);
                printf("GPU %d -> GPU %d: %s\n", i, j, canAccessPeer ? "Supported" : "Not Supported");
            }
        }
    }
}

void enableP2P(int deviceCount) {
    printf("\nEnabling P2P:\n");
    for (int i = 0; i < deviceCount; i++) {
        cudaSetDevice(i);
        for (int j = 0; j < deviceCount; j++) {
            if (i != j) {
                int canAccessPeer = 0;
                cudaDeviceCanAccessPeer(&canAccessPeer, i, j);
                if (canAccessPeer) {
                    cudaError_t err = cudaDeviceEnablePeerAccess(j, 0);
                    if (err == cudaSuccess) {
                        printf("P2P enabled between GPU %d and GPU %d.\n", i, j);
                    } else {
                        printf("Failed to enable P2P between GPU %d and GPU %d: %s\n", i, j, cudaGetErrorString(err));
                    }
                }
            }
        }
    }
}

void recheckP2P(int deviceCount) {
    printf("\nRechecking P2P support after enabling:\n");
    for (int i = 0; i < deviceCount; i++) {
        for (int j = 0; j < deviceCount; j++) {
            if (i != j) {
                int canAccessPeer = 0;
                cudaDeviceCanAccessPeer(&canAccessPeer, i, j);
                printf("GPU %d -> GPU %d: %s\n", i, j, canAccessPeer ? "Enabled" : "Disabled");
            }
        }
    }
}

int main() {
    int deviceCount;
    cudaGetDeviceCount(&deviceCount);

    if (deviceCount < 2) {
        printf("At least two GPUs are required for P2P testing.\n");
        return 0;
    }

    printf("Detected %d GPUs.\n", deviceCount);

    // Step 1: Check initial P2P support
    checkP2P(deviceCount);

    // Step 2: Enable P2P
    enableP2P(deviceCount);

    // Step 3: Recheck P2P support
    recheckP2P(deviceCount);

    return 0;
}

编译:

nvcc -o test test.cu

执行:

./test.cu

注意事项

1、官方提供的cuda_tool里的simpleP2P、p2pBandwidthLatencyTest也都可以检测是否支持P2P。

2、Bridges间不支持P2P通信:deploying-managing-gpu-clusters

【知识】cuda检测GPU是否支持P2P通信及一些注意事项_p2p

3、相同PCIe complex下是支持P2P:ParallelComputing

【知识】cuda检测GPU是否支持P2P通信及一些注意事项_p2p_02


部分不支持P2P的类型

Nvidia Confirms GeForce Cards Lack P2P

【知识】cuda检测GPU是否支持P2P通信及一些注意事项_asp.net_03