diff --git a/json_output.cpp b/json_output.cpp index efe10e1..5bedbf6 100644 --- a/json_output.cpp +++ b/json_output.cpp @@ -159,7 +159,7 @@ void JsonOutput::recordError(const std::vector &errorParts) { } void JsonOutput::recordWarning(const std::string &warning) { - m_root[NVB_TITLE][NVB_WARNING] = warning; + m_root[NVB_TITLE][NVB_WARNING].append(warning); } void JsonOutput::addVersionInfo() { @@ -174,12 +174,24 @@ void JsonOutput::addCudaAndDriverInfo(int cudaVersion, const std::string &driver void JsonOutput::recordDevices(int deviceCount) { Json::Value deviceList; - +#ifdef MULTINODE + std::vector hostnameExchange(worldSize * STRING_LENGTH); + std::vector deviceNameExchange(worldSize * STRING_LENGTH, 0); + std::vector localDeviceIdExchange(worldSize, -1); + exchangeDeviceInfo(deviceCount, hostnameExchange, deviceNameExchange, localDeviceIdExchange); + for (int i = 0; i < worldSize; i++) { + char *deviceName = &deviceNameExchange[i * STRING_LENGTH]; + std::stringstream buf; + buf << "Process " << getPaddedProcessId(i) << " (" << &hostnameExchange[i * STRING_LENGTH] << "): device " << localDeviceIdExchange[i] << ": " << deviceName; + deviceList.append(buf.str()); + } +#else for (int iDev = 0; iDev < deviceCount; iDev++) { std::stringstream buf; buf << iDev << ": " << getDeviceDisplayInfo(iDev) << ": (" << localHostname << ")"; deviceList.append(buf.str()); } +#endif m_root[NVB_TITLE][NVB_DEVICE_LIST] = deviceList; } diff --git a/output.cpp b/output.cpp index f5c79e5..ed11388 100644 --- a/output.cpp +++ b/output.cpp @@ -96,17 +96,8 @@ std::string getDeviceDisplayInfo(int deviceOrdinal) { return sstream.str(); } -#ifdef MULTINODE -// Exchange and print information about all devices in MPI world -// Through this process each process learns about GPUs of other processes, as well as, -// determines its own GPU index -// Each process is allocated a dedicated GPU. It is advisable to initiate NUM_GPU processes per system, -// with each process autonomously selecting a GPU to utilize. To determine this selection, -// processes exchange their hostnames, and look for duplicates of own hostname among processes with lower value of worldRank. -// localRank is equal to number of processes with the same hostname, but lower worldRank. -static void printGPUsMultinode(int deviceCount) { +void exchangeDeviceInfo(int deviceCount, std::vector &hostnameExchange, std::vector &deviceNameExchange, std::vector &localDeviceIdExchange) { // Exchange hostnames - std::vector hostnameExchange(worldSize * STRING_LENGTH); MPI_Allgather(localHostname, STRING_LENGTH, MPI_BYTE, &hostnameExchange[0], STRING_LENGTH, MPI_BYTE, MPI_COMM_WORLD); // Find local rank based on hostnames @@ -140,36 +131,38 @@ static void printGPUsMultinode(int deviceCount) { ASSERT(localDeviceName.size() < STRING_LENGTH); localDeviceName.resize(STRING_LENGTH); - std::vector deviceNameExchange(worldSize * STRING_LENGTH, 0); MPI_Allgather(&localDeviceName[0], STRING_LENGTH, MPI_BYTE, &deviceNameExchange[0], STRING_LENGTH, MPI_BYTE, MPI_COMM_WORLD); // Exchange device ids - std::vector localDeviceIdExchange(worldSize, -1); MPI_Allgather(&localDevice, 1, MPI_INT, &localDeviceIdExchange[0], 1, MPI_INT, MPI_COMM_WORLD); +} +void Output::recordDevices(int deviceCount) { +#ifdef MULTINODE + // Exchange and print information about all devices in MPI world + // Through this process each process learns about GPUs of other processes, as well as, + // determines its own GPU index + // Each process is allocated a dedicated GPU. It is advisable to initiate NUM_GPU processes per system, + // with each process autonomously selecting a GPU to utilize. To determine this selection, + // processes exchange their hostnames, and look for duplicates of own hostname among processes with lower value of worldRank. + // localRank is equal to number of processes with the same hostname, but lower worldRank. + std::vector hostnameExchange(worldSize * STRING_LENGTH); + std::vector deviceNameExchange(worldSize * STRING_LENGTH, 0); + std::vector localDeviceIdExchange(worldSize, -1); + exchangeDeviceInfo(deviceCount, hostnameExchange, deviceNameExchange, localDeviceIdExchange); // Print gathered info for (int i = 0; i < worldSize; i++) { char *deviceName = &deviceNameExchange[i * STRING_LENGTH]; OUTPUT << "Process " << getPaddedProcessId(i) << " (" << &hostnameExchange[i * STRING_LENGTH] << "): device " << localDeviceIdExchange[i] << ": " << deviceName << std::endl; } - OUTPUT << std::endl; -} -#endif +#else -static void printGPUs() { OUTPUT << localHostname << std::endl; for (int iDev = 0; iDev < deviceCount; iDev++) { OUTPUT << "Device " << iDev << ": " << getDeviceDisplayInfo(iDev) << std::endl; } - OUTPUT << std::endl; -} - -void Output::recordDevices(int deviceCount) { -#ifdef MULTINODE - printGPUsMultinode(deviceCount); -#else - printGPUs(); #endif + OUTPUT << std::endl; } void Output::addTestcase(const std::string &name, const std::string &status, const std::string &msg) { diff --git a/output.h b/output.h index 1b54410..7e59384 100644 --- a/output.h +++ b/output.h @@ -99,5 +99,6 @@ class Output { extern Output *output; std::string getDeviceDisplayInfo(int deviceOrdinal); +void exchangeDeviceInfo(int deviceCount, std::vector &hostnameExchange, std::vector &deviceNameExchange, std::vector &localDeviceIdExchange); #endif // OUTPUT_H_