port from perforce

This commit is contained in:
2026-04-18 22:31:51 +02:00
commit 8d0ab5b7cc
8409 changed files with 3972376 additions and 0 deletions

View File

@@ -0,0 +1,107 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <stdint.h>
#include <vector>
#include "NvPerfInit.h"
#include "NvPerfMetricsConfigBuilder.h"
namespace nv { namespace perf {
struct CounterConfiguration
{
std::vector<uint8_t> configImage;
std::vector<uint8_t> counterDataPrefix;
size_t numPipelinedPasses;
size_t numIsolatedPasses;
};
/// Transforms configBuilder into configuration.
inline bool CreateConfiguration(
MetricsConfigBuilder& configBuilder,
CounterConfiguration& configuration)
{
bool res = false;
res = configBuilder.PrepareConfigImage();
if (!res)
{
//std::cerr << "FAILED: D3D12CreateConfiguration - failed PrepareConfigImage\n";
return false;
}
const size_t configImageSize = configBuilder.GetConfigImageSize();
if (!configImageSize)
{
// std::cerr << "FAILED: GetConfigImageSize - failed PrepareConfigImage\n";
return false;
}
configuration.configImage.resize(configImageSize);
if (!configBuilder.GetConfigImage(configuration.configImage.size(), &configuration.configImage[0]))
{
//std::cerr << "FAILED: GetConfigImage - failed PrepareConfigImage\n";
return false;
}
const size_t counterDataPrefixSize = configBuilder.GetCounterDataPrefixSize();
if (!counterDataPrefixSize)
{
//std::cerr << "FAILED: GetCounterDataPrefixSize - failed PrepareConfigImage\n";
return false;
}
configuration.counterDataPrefix.resize(counterDataPrefixSize);
if (!configBuilder.GetCounterDataPrefix(configuration.counterDataPrefix.size(), &configuration.counterDataPrefix[0]))
{
//std::cerr << "FAILED: GetCounterDataPrefix - failed PrepareConfigImage\n";
return false;
}
NVPW_Config_GetNumPasses_Params getNumPassesParams = { NVPW_Config_GetNumPasses_Params_STRUCT_SIZE };
getNumPassesParams.pConfig = &configuration.configImage[0];
NVPA_Status nvpaStatus = NVPW_Config_GetNumPasses(&getNumPassesParams);
if (nvpaStatus)
{
return false;
}
configuration.numPipelinedPasses = getNumPassesParams.numPipelinedPasses;
configuration.numIsolatedPasses = getNumPassesParams.numIsolatedPasses;
return true;
}
/// Adds pMetricNames[0..numMetrics-1] into configBuilder, then transforms configBuilder into configuration.
inline bool CreateConfiguration(
MetricsConfigBuilder& configBuilder,
size_t numMetrics,
const char* const pMetricNames[],
CounterConfiguration& configuration)
{
bool succeeded = configBuilder.AddMetrics(pMetricNames, numMetrics);
if (!succeeded)
{
return false;
}
succeeded = CreateConfiguration(configBuilder, configuration);
if (!succeeded)
{
return false;
}
return true;
}
}}

View File

@@ -0,0 +1,80 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "nvperf_host.h"
#include "nvperf_target.h"
#include <string>
#include <vector>
namespace nv { namespace perf {
inline size_t CounterDataGetNumRanges(const uint8_t* pCounterDataImage)
{
NVPW_CounterData_GetNumRanges_Params getNumRangeParams = { NVPW_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE };
getNumRangeParams.pCounterDataImage = pCounterDataImage;
NVPA_Status nvpaStatus = NVPW_CounterData_GetNumRanges(&getNumRangeParams);
if (nvpaStatus)
{
return 0;
}
return getNumRangeParams.numRanges;
}
// TODO: this function performs dynamic allocations; either need a non-malloc'ing variant, or move this to an appropriate place
inline std::string CounterDataGetRangeName(const uint8_t* pCounterDataImage, size_t rangeIndex, char delimiter, const char** ppLeafName = nullptr)
{
std::string rangeName;
NVPW_CounterData_GetRangeDescriptions_Params params = { NVPW_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE };
params.pCounterDataImage = pCounterDataImage;
params.rangeIndex = rangeIndex;
NVPA_Status nvpaStatus = NVPW_CounterData_GetRangeDescriptions(&params);
if (nvpaStatus)
{
return "";
}
if (!params.numDescriptions)
{
return "";
}
std::vector<const char*> descriptions;
descriptions.resize(params.numDescriptions);
params.ppDescriptions = descriptions.data();
nvpaStatus = NVPW_CounterData_GetRangeDescriptions(&params);
if (nvpaStatus)
{
return "";
}
rangeName += descriptions[0];
for (size_t descriptionIdx = 1; descriptionIdx < params.numDescriptions; ++descriptionIdx)
{
const char* pDescription = params.ppDescriptions[descriptionIdx];
rangeName += delimiter;
rangeName += pDescription;
}
if (ppLeafName)
{
*ppLeafName = descriptions.back();
}
return rangeName;
}
}}

View File

@@ -0,0 +1,82 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "NvPerfInit.h"
#include "NvPerfDeviceProperties.h"
#include <dxgi.h>
namespace nv { namespace perf {
inline bool DxgiIsNvidiaDevice(IDXGIAdapter* pAdapter)
{
DXGI_ADAPTER_DESC adapterDesc = {};
HRESULT hr = pAdapter->GetDesc(&adapterDesc);
if (FAILED(hr))
{
return false;
}
if (adapterDesc.VendorId != 0x10de)
{
return false;
}
return true;
}
inline size_t D3DGetNvperfDeviceIndex(IDXGIAdapter* pDXGIAdapter, size_t sliIndex = 0)
{
NVPW_Adapter_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_Adapter_GetDeviceIndex_Params_STRUCT_SIZE };
getDeviceIndexParams.pAdapter = pDXGIAdapter;
getDeviceIndexParams.sliIndex = sliIndex;
NVPA_Status nvpaStatus = NVPW_Adapter_GetDeviceIndex(&getDeviceIndexParams);
if (nvpaStatus)
{
return ~size_t(0);
}
return getDeviceIndexParams.deviceIndex;
}
inline DeviceIdentifiers D3DGetDeviceIdentifiers(IDXGIAdapter* pDXGIAdapter, size_t sliIndex = 0)
{
const size_t deviceIndex = D3DGetNvperfDeviceIndex(pDXGIAdapter, sliIndex);
DeviceIdentifiers deviceIdentifiers = GetDeviceIdentifiers(deviceIndex);
return deviceIdentifiers;
}
inline NVPW_Device_ClockStatus D3DGetDeviceClockState(IDXGIAdapter* pDXGIAdapter)
{
size_t nvperfDeviceIndex = D3DGetNvperfDeviceIndex(pDXGIAdapter);
return GetDeviceClockState(nvperfDeviceIndex);
}
inline bool D3DSetDeviceClockState(IDXGIAdapter* pDXGIAdapter, NVPW_Device_ClockSetting clockSetting)
{
size_t nvperfDeviceIndex = D3DGetNvperfDeviceIndex(pDXGIAdapter);
return SetDeviceClockState(nvperfDeviceIndex, clockSetting);
}
inline bool D3DSetDeviceClockState(IDXGIAdapter* pDXGIAdapter, NVPW_Device_ClockStatus clockStatus)
{
size_t nvperfDeviceIndex = D3DGetNvperfDeviceIndex(pDXGIAdapter);
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
}
}}

View File

@@ -0,0 +1,252 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "NvPerfD3D.h"
#include "nvperf_d3d11_host.h"
#include "nvperf_d3d11_target.h"
#include <D3D11.h>
#include <atlbase.h>
namespace nv { namespace perf {
//
// D3D11 Only Utilities
//
inline bool D3D11FindAdapterForDevice(ID3D11Device* pDevice, IDXGIAdapter** ppDXGIAdapter, DXGI_ADAPTER_DESC* pAdapterDesc = nullptr)
{
CComPtr<IDXGIDevice> pDXGIDevice;
HRESULT hr = pDevice->QueryInterface(IID_PPV_ARGS(&pDXGIDevice));
if (FAILED(hr))
{
return false;
}
hr = pDXGIDevice->GetAdapter(ppDXGIAdapter);
if (FAILED(hr))
{
return false;
}
if (pAdapterDesc)
{
hr = (*ppDXGIAdapter)->GetDesc(pAdapterDesc);
if (FAILED(hr))
{
return false;;
}
}
return true;
}
inline std::wstring D3D11GetDeviceName(ID3D11Device* pDevice)
{
DXGI_ADAPTER_DESC adapterDesc = {};
CComPtr<IDXGIAdapter> pDXGIAdapter;
if (!D3D11FindAdapterForDevice(pDevice, &pDXGIAdapter, &adapterDesc))
{
return L"";
}
return adapterDesc.Description;
}
inline bool D3D11IsNvidiaDevice(ID3D11Device* pDevice)
{
CComPtr<IDXGIAdapter> pDXGIAdapter;
if (!D3D11FindAdapterForDevice(pDevice, &pDXGIAdapter))
{
return false;
}
const bool isNvidiaDevice = DxgiIsNvidiaDevice(pDXGIAdapter);
return isNvidiaDevice;
}
inline bool D3D11IsNvidiaDevice(ID3D11DeviceContext* pDeviceContext)
{
CComPtr<ID3D11Device> pDevice;
pDeviceContext->GetDevice(&pDevice);
if (!pDevice)
{
return false;
}
const bool isNvidiaDevice = D3D11IsNvidiaDevice(pDevice);
return isNvidiaDevice;
}
//
// D3D11 NvPerf Utilities
//
inline bool D3D11LoadDriver()
{
NVPW_D3D11_LoadDriver_Params loadDriverParams = { NVPW_D3D11_LoadDriver_Params_STRUCT_SIZE };
NVPA_Status nvpaStatus = NVPW_D3D11_LoadDriver(&loadDriverParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_D3D11_LoadDriver failed\n");
return false;
}
return true;
}
inline size_t D3D11GetNvperfDeviceIndex(ID3D11Device* pDevice, size_t sliIndex = 0)
{
NVPW_D3D11_Device_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_D3D11_Device_GetDeviceIndex_Params_STRUCT_SIZE };
getDeviceIndexParams.pDevice = pDevice;
getDeviceIndexParams.sliIndex = sliIndex;
NVPA_Status nvpaStatus = NVPW_D3D11_Device_GetDeviceIndex(&getDeviceIndexParams);
if (nvpaStatus)
{
return ~size_t(0);
}
return getDeviceIndexParams.deviceIndex;
}
inline DeviceIdentifiers D3D11GetDeviceIdentifiers(ID3D11Device* pDevice, size_t sliIndex = 0)
{
CComPtr<IDXGIAdapter> pDXGIAdapter;
if (!D3D11FindAdapterForDevice(pDevice, &pDXGIAdapter))
{
return {};
}
return D3DGetDeviceIdentifiers(pDXGIAdapter, sliIndex);
}
inline NVPW_Device_ClockStatus D3D11GetDeviceClockState(ID3D11Device* pDevice)
{
size_t nvperfDeviceIndex = D3D11GetNvperfDeviceIndex(pDevice);
return GetDeviceClockState(nvperfDeviceIndex);
}
inline bool D3D11SetDeviceClockState(ID3D11Device* pDevice, NVPW_Device_ClockSetting clockSetting)
{
size_t nvperfDeviceIndex = D3D11GetNvperfDeviceIndex(pDevice);
return SetDeviceClockState(nvperfDeviceIndex, clockSetting);
}
inline bool D3D11SetDeviceClockState(ID3D11Device* pDevice, NVPW_Device_ClockStatus clockStatus)
{
size_t nvperfDeviceIndex = D3D11GetNvperfDeviceIndex(pDevice);
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
}
inline size_t D3D11CalculateMetricsEvaluatorScratchBufferSize(const char* pChipName)
{
NVPW_D3D11_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParams = { NVPW_D3D11_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE };
calculateScratchBufferSizeParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_D3D11_MetricsEvaluator_CalculateScratchBufferSize(&calculateScratchBufferSizeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_D3D11_MetricsEvaluator_CalculateScratchBufferSize failed\n");
return 0;
}
return calculateScratchBufferSizeParams.scratchBufferSize;
}
inline NVPW_MetricsEvaluator* D3D11CreateMetricsEvaluator(uint8_t* pScratchBuffer, size_t scratchBufferSize, const char* pChipName)
{
NVPW_D3D11_MetricsEvaluator_Initialize_Params initializeParams = { NVPW_D3D11_MetricsEvaluator_Initialize_Params_STRUCT_SIZE };
initializeParams.pScratchBuffer = pScratchBuffer;
initializeParams.scratchBufferSize = scratchBufferSize;
initializeParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_D3D11_MetricsEvaluator_Initialize(&initializeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_D3D11_MetricsEvaluator_Initialize failed\n");
return nullptr;
}
return initializeParams.pMetricsEvaluator;
}
}}
namespace nv { namespace perf { namespace profiler {
inline NVPA_RawMetricsConfig* D3D11CreateRawMetricsConfig(const char* pChipName)
{
NVPW_D3D11_RawMetricsConfig_Create_Params configParams = { NVPW_D3D11_RawMetricsConfig_Create_Params_STRUCT_SIZE };
configParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
configParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_D3D11_RawMetricsConfig_Create(&configParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_D3D11_RawMetricsConfig_Create failed\n");
return nullptr;
}
return configParams.pRawMetricsConfig;
}
inline bool D3D11IsGpuSupported(ID3D11Device* pDevice, size_t sliIndex = 0)
{
const size_t deviceIndex = D3D11GetNvperfDeviceIndex(pDevice, sliIndex);
if (deviceIndex == ~size_t(0))
{
NV_PERF_LOG_ERR(10, "D3D11GetNvperfDeviceIndex failed on %ls\n", D3D11GetDeviceName(pDevice).c_str());
return false;
}
NVPW_D3D11_Profiler_IsGpuSupported_Params params = { NVPW_D3D11_Profiler_IsGpuSupported_Params_STRUCT_SIZE };
params.deviceIndex = deviceIndex;
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_IsGpuSupported(&params);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_D3D11_Profiler_IsGpuSupported failed on %ls\n", D3D11GetDeviceName(pDevice).c_str());
return false;
}
if (!params.isSupported)
{
NV_PERF_LOG_ERR(10, "%ls is not supported\n", D3D11GetDeviceName(pDevice).c_str());
if (params.gpuArchitectureSupportLevel != NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED)
{
const DeviceIdentifiers deviceIdentifiers = D3D11GetDeviceIdentifiers(pDevice, sliIndex);
NV_PERF_LOG_ERR(10, "Unsupported GPU architecture %s\n", deviceIdentifiers.pChipName);
}
if (params.sliSupportLevel == NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED)
{
NV_PERF_LOG_ERR(10, "Devices in SLI configuration are not supported.\n");
}
return false;
}
return true;
}
inline bool D3D11IsGpuSupported(ID3D11DeviceContext* pDeviceContext, size_t sliIndex = 0)
{
CComPtr<ID3D11Device> pDevice;
pDeviceContext->GetDevice(&pDevice);
if (!pDevice)
{
return false;
}
const bool isGpuSupported = D3D11IsGpuSupported(pDevice, sliIndex);
return isGpuSupported;
}
}}}

View File

@@ -0,0 +1,351 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "NvPerfInit.h"
#include "NvPerfDeviceProperties.h"
#include "NvPerfD3D.h"
#include "nvperf_d3d12_host.h"
#include "nvperf_d3d12_target.h"
#include <D3D12.h>
#include <atlbase.h>
namespace nv { namespace perf {
//
// D3D Only Utilities
//
inline bool D3D12FindAdapterForDevice(ID3D12Device* pDevice, IDXGIAdapter1** ppDXGIAdapter, DXGI_ADAPTER_DESC1* pAdapterDesc = nullptr)
{
const LUID deviceLuid = pDevice->GetAdapterLuid();
CComPtr<IDXGIFactory1> pDXGIFactory;
HRESULT hr = CreateDXGIFactory1(IID_PPV_ARGS(&pDXGIFactory));
if (FAILED(hr))
{
return false;
}
for (UINT adapterIndex = 0; ; ++adapterIndex)
{
CComPtr<IDXGIAdapter1> pDXGIAdapter;
hr = pDXGIFactory->EnumAdapters1(adapterIndex, &pDXGIAdapter);
if (FAILED(hr))
{
break; // the intended loop termination
}
DXGI_ADAPTER_DESC1 adapterDesc = {};
HRESULT hr = pDXGIAdapter->GetDesc1(&adapterDesc);
if (FAILED(hr))
{
continue;
}
if (!memcmp(&adapterDesc.AdapterLuid, &deviceLuid, sizeof(deviceLuid)))
{
*ppDXGIAdapter = pDXGIAdapter.Detach();
if (pAdapterDesc)
{
*pAdapterDesc = adapterDesc;
}
return true;
}
}
return false;
}
inline std::wstring D3D12GetDeviceName(ID3D12Device* pDevice)
{
DXGI_ADAPTER_DESC1 adapterDesc = {};
CComPtr<IDXGIAdapter1> pDXGIAdapter;
if (!D3D12FindAdapterForDevice(pDevice, &pDXGIAdapter, &adapterDesc))
{
return L"";
}
return adapterDesc.Description;
}
inline bool D3D12IsNvidiaDevice(ID3D12Device* pDevice)
{
CComPtr<IDXGIAdapter1> pDXGIAdapter;
if (!D3D12FindAdapterForDevice(pDevice, &pDXGIAdapter))
{
return false;
}
const bool isNvidiaDevice = DxgiIsNvidiaDevice(pDXGIAdapter);
return isNvidiaDevice;
}
inline bool D3D12IsNvidiaDevice(ID3D12CommandQueue* pCommandQueue)
{
CComPtr<ID3D12Device> pDevice;
HRESULT hr = pCommandQueue->GetDevice(IID_PPV_ARGS(&pDevice));
if (FAILED(hr))
{
return false;
}
const bool isNvidiaDevice = D3D12IsNvidiaDevice(pDevice);
return isNvidiaDevice;
}
//
// D3D12 NvPerf Utilities
//
inline bool D3D12LoadDriver()
{
NVPW_D3D12_LoadDriver_Params loadDriverParams = { NVPW_D3D12_LoadDriver_Params_STRUCT_SIZE };
NVPA_Status nvpaStatus = NVPW_D3D12_LoadDriver(&loadDriverParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_D3D12_LoadDriver failed\n");
return false;
}
return true;
}
inline size_t D3D12GetNvperfDeviceIndex(ID3D12Device* pDevice, size_t sliIndex = 0)
{
NVPW_D3D12_Device_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_D3D12_Device_GetDeviceIndex_Params_STRUCT_SIZE };
getDeviceIndexParams.pDevice = pDevice;
getDeviceIndexParams.sliIndex = sliIndex;
NVPA_Status nvpaStatus = NVPW_D3D12_Device_GetDeviceIndex(&getDeviceIndexParams);
if (nvpaStatus)
{
return ~size_t(0);
}
return getDeviceIndexParams.deviceIndex;
}
inline DeviceIdentifiers D3D12GetDeviceIdentifiers(ID3D12Device* pDevice, size_t sliIndex = 0)
{
CComPtr<IDXGIAdapter1> pDXGIAdapter;
if (!D3D12FindAdapterForDevice(pDevice, &pDXGIAdapter))
{
return {};
}
return D3DGetDeviceIdentifiers(pDXGIAdapter, sliIndex);
}
inline NVPW_Device_ClockStatus D3D12GetDeviceClockState(ID3D12Device* pDevice)
{
size_t nvperfDeviceIndex = D3D12GetNvperfDeviceIndex(pDevice);
return GetDeviceClockState(nvperfDeviceIndex);
}
inline bool D3D12SetDeviceClockState(ID3D12Device* pDevice, NVPW_Device_ClockSetting clockSetting)
{
size_t nvperfDeviceIndex = D3D12GetNvperfDeviceIndex(pDevice);
return SetDeviceClockState(nvperfDeviceIndex, clockSetting);
}
inline bool D3D12SetDeviceClockState(ID3D12Device* pDevice, NVPW_Device_ClockStatus clockStatus)
{
size_t nvperfDeviceIndex = D3D12GetNvperfDeviceIndex(pDevice);
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
}
inline size_t D3D12CalculateMetricsEvaluatorScratchBufferSize(const char* pChipName)
{
NVPW_D3D12_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParams = { NVPW_D3D12_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE };
calculateScratchBufferSizeParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_D3D12_MetricsEvaluator_CalculateScratchBufferSize(&calculateScratchBufferSizeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_D3D12_MetricsEvaluator_CalculateScratchBufferSize failed\n");
return 0;
}
return calculateScratchBufferSizeParams.scratchBufferSize;
}
inline NVPW_MetricsEvaluator* D3D12CreateMetricsEvaluator(uint8_t* pScratchBuffer, size_t scratchBufferSize, const char* pChipName)
{
NVPW_D3D12_MetricsEvaluator_Initialize_Params initializeParams = { NVPW_D3D12_MetricsEvaluator_Initialize_Params_STRUCT_SIZE };
initializeParams.pScratchBuffer = pScratchBuffer;
initializeParams.scratchBufferSize = scratchBufferSize;
initializeParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_D3D12_MetricsEvaluator_Initialize(&initializeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_D3D12_MetricsEvaluator_Initialize failed\n");
return nullptr;
}
return initializeParams.pMetricsEvaluator;
}
}}
namespace nv { namespace perf { namespace profiler {
inline NVPA_RawMetricsConfig* D3D12CreateRawMetricsConfig(const char* pChipName)
{
NVPW_D3D12_RawMetricsConfig_Create_Params configParams = { NVPW_D3D12_RawMetricsConfig_Create_Params_STRUCT_SIZE };
configParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
configParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_D3D12_RawMetricsConfig_Create(&configParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_D3D12_RawMetricsConfig_Create failed\n");
return nullptr;
}
return configParams.pRawMetricsConfig;
}
inline bool D3D12IsGpuSupported(ID3D12Device* pDevice, size_t sliIndex = 0)
{
const size_t deviceIndex = D3D12GetNvperfDeviceIndex(pDevice, sliIndex);
if (deviceIndex == ~size_t(0))
{
NV_PERF_LOG_ERR(10, "D3D12GetNvperfDeviceIndex failed on %ls\n", D3D12GetDeviceName(pDevice).c_str());
return false;
}
NVPW_D3D12_Profiler_IsGpuSupported_Params params = { NVPW_D3D12_Profiler_IsGpuSupported_Params_STRUCT_SIZE };
params.deviceIndex = deviceIndex;
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_IsGpuSupported(&params);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_D3D12_Profiler_IsGpuSupported failed on %ls\n", D3D12GetDeviceName(pDevice).c_str());
return false;
}
if (!params.isSupported)
{
NV_PERF_LOG_ERR(10, "%ls is not supported\n", D3D12GetDeviceName(pDevice).c_str());
if (params.gpuArchitectureSupportLevel != NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED)
{
const DeviceIdentifiers deviceIdentifiers = D3D12GetDeviceIdentifiers(pDevice, sliIndex);
NV_PERF_LOG_ERR(10, "Unsupported GPU architecture %s\n", deviceIdentifiers.pChipName);
}
if (params.sliSupportLevel == NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED)
{
NV_PERF_LOG_ERR(10, "Devices in SLI configuration are not supported.\n");
}
return false;
}
return true;
}
inline bool D3D12IsGpuSupported(ID3D12CommandQueue* pCommandQueue, size_t sliIndex = 0)
{
CComPtr<ID3D12Device> pDevice;
HRESULT hr = pCommandQueue->GetDevice(IID_PPV_ARGS(&pDevice));
if (FAILED(hr))
{
return false;
}
const bool isGpuSupported = D3D12IsGpuSupported(pDevice, sliIndex);
return isGpuSupported;
}
inline bool D3D12PushRange(ID3D12GraphicsCommandList* pCommandList, const char* pRangeName)
{
NVPW_D3D12_Profiler_CommandList_PushRange_Params pushRangeParams = { NVPW_D3D12_Profiler_CommandList_PushRange_Params_STRUCT_SIZE };
pushRangeParams.pRangeName = pRangeName;
pushRangeParams.rangeNameLength = 0;
pushRangeParams.pCommandList = pCommandList;
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_CommandList_PushRange(&pushRangeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(50, "NVPW_D3D12_Profiler_CommandList_PushRange failed\n");
return false;
}
return true;
}
inline bool D3D12PopRange(ID3D12GraphicsCommandList* pCommandList)
{
NVPW_D3D12_Profiler_CommandList_PopRange_Params popParams = { NVPW_D3D12_Profiler_CommandList_PopRange_Params_STRUCT_SIZE };
popParams.pCommandList = pCommandList;
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_CommandList_PopRange(&popParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(50, "NVPW_D3D12_Profiler_CommandList_PopRange failed\n");
return false;
}
return true;
}
inline bool D3D12PushRange_Nop(ID3D12GraphicsCommandList* pCommandList, const char* pRangeName)
{
return false;
}
inline bool D3D12PopRange_Nop(ID3D12GraphicsCommandList* pCommandList)
{
return false;
}
//
struct D3D12RangeCommands
{
bool isNvidiaDevice;
bool(*PushRange)(ID3D12GraphicsCommandList* pCommandList, const char* pRangeName);
bool(*PopRange)(ID3D12GraphicsCommandList* pCommandList);
public:
D3D12RangeCommands()
: isNvidiaDevice(false)
, PushRange(&D3D12PushRange_Nop)
, PopRange(&D3D12PopRange_Nop)
{
}
void Initialize(bool isNvidiaDevice_)
{
isNvidiaDevice = isNvidiaDevice_;
if (isNvidiaDevice_)
{
PushRange = &D3D12PushRange;
PopRange = &D3D12PopRange;
}
else
{
PushRange = &D3D12PushRange_Nop;
PopRange = &D3D12PopRange_Nop;
}
}
void Initialize(IDXGIAdapter* pDXGIAdapter)
{
const bool isNvidiaDevice_ = DxgiIsNvidiaDevice(pDXGIAdapter);
return Initialize(isNvidiaDevice_);
}
void Initialize(ID3D12Device* pDevice)
{
const bool isNvidiaDevice_ = D3D12IsNvidiaDevice(pDevice);
return Initialize(isNvidiaDevice_);
}
};
}}}

View File

@@ -0,0 +1,125 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "nvperf_host.h"
#include "nvperf_target.h"
#include "NvPerfInit.h"
#include <vector>
namespace nv { namespace perf {
enum
{
NVIDIA_VENDOR_ID = 0x10de
};
struct DeviceIdentifiers
{
const char* pDeviceName;
const char* pChipName;
};
inline DeviceIdentifiers GetDeviceIdentifiers(size_t deviceIndex)
{
NVPW_Device_GetNames_Params getNamesParams = { NVPW_Device_GetNames_Params_STRUCT_SIZE };
getNamesParams.deviceIndex = deviceIndex;
NVPA_Status nvpaStatus = NVPW_Device_GetNames(&getNamesParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_Device_GetNames failed\n");
return {};
}
DeviceIdentifiers deviceIdentifiers = {};
deviceIdentifiers.pDeviceName = getNamesParams.pDeviceName;
deviceIdentifiers.pChipName = getNamesParams.pChipName;
return deviceIdentifiers;
}
inline NVPW_Device_ClockStatus GetDeviceClockState(size_t nvperfDeviceIndex)
{
NVPW_Device_GetClockStatus_Params getClockStatusParams = { NVPW_Device_GetClockStatus_Params_STRUCT_SIZE };
getClockStatusParams.deviceIndex = nvperfDeviceIndex;
NVPA_Status nvpaStatus = NVPW_Device_GetClockStatus(&getClockStatusParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_Device_GetClockStatus() failed on %s\n", GetDeviceIdentifiers(nvperfDeviceIndex).pDeviceName);
return NVPW_DEVICE_CLOCK_STATUS_UNKNOWN;
}
return getClockStatusParams.clockStatus;
}
inline const char* ToCString(NVPW_Device_ClockSetting clockSetting)
{
switch(clockSetting)
{
case NVPW_DEVICE_CLOCK_SETTING_INVALID: return "NVPW_DEVICE_CLOCK_SETTING_INVALID";
case NVPW_DEVICE_CLOCK_SETTING_DEFAULT: return "NVPW_DEVICE_CLOCK_SETTING_DEFAULT";
case NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_RATED_TDP: return "NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_RATED_TDP";
default: return "Unknown NVPW_Device_ClockSetting";
}
}
inline bool SetDeviceClockState(size_t nvperfDeviceIndex, NVPW_Device_ClockSetting clockSetting)
{
NVPW_Device_SetClockSetting_Params setClockSettingParams = { NVPW_Device_SetClockSetting_Params_STRUCT_SIZE };
setClockSettingParams.deviceIndex = nvperfDeviceIndex;
setClockSettingParams.clockSetting = clockSetting;
NVPA_Status nvpaStatus = NVPW_Device_SetClockSetting(&setClockSettingParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_Device_SetClockSetting( %s ) failed on %s\n", ToCString(clockSetting), GetDeviceIdentifiers(nvperfDeviceIndex).pDeviceName);
return false;
}
return true;
}
inline const char* ToCString(NVPW_Device_ClockStatus clockStatus)
{
switch(clockStatus)
{
case NVPW_DEVICE_CLOCK_STATUS_UNKNOWN: return "NVPW_DEVICE_CLOCK_STATUS_UNKNOWN";
case NVPW_DEVICE_CLOCK_STATUS_LOCKED_TO_RATED_TDP: return "NVPW_DEVICE_CLOCK_STATUS_LOCKED_TO_RATED_TDP";
case NVPW_DEVICE_CLOCK_STATUS_BOOST_ENABLED: return "NVPW_DEVICE_CLOCK_STATUS_BOOST_ENABLED";
case NVPW_DEVICE_CLOCK_STATUS_BOOST_DISABLED: return "NVPW_DEVICE_CLOCK_STATUS_BOOST_DISABLED";
case NVPW_DEVICE_CLOCK_STATUS__COUNT: return "NVPW_DEVICE_CLOCK_STATUS__COUNT";
default: return "Unknown NVPW_Device_ClockStatus";
}
}
inline bool SetDeviceClockState(size_t nvperfDeviceIndex, NVPW_Device_ClockStatus clockStatus)
{
// convert to NVPW_Device_ClockSetting
NVPW_Device_ClockSetting clockSetting = NVPW_DEVICE_CLOCK_SETTING_INVALID;
switch (clockStatus)
{
case NVPW_DEVICE_CLOCK_STATUS_UNKNOWN:
case NVPW_DEVICE_CLOCK_STATUS_BOOST_ENABLED:
case NVPW_DEVICE_CLOCK_STATUS_BOOST_DISABLED:
// default driver setting (normally unlocked and not boosted, but could be unlocked boosted, or locked to rated TDP)
clockSetting = NVPW_DEVICE_CLOCK_SETTING_DEFAULT;
break;
case NVPW_DEVICE_CLOCK_STATUS_LOCKED_TO_RATED_TDP:
clockSetting = NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_RATED_TDP;
break;
default:
NV_PERF_LOG_ERR(10, "Invalid clockStatus: %s\n", ToCString(clockStatus));
return false;
}
return SetDeviceClockState(nvperfDeviceIndex, clockSetting);
}
}}

View File

@@ -0,0 +1,432 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <stdio.h>
#include <stdarg.h>
#include <string>
#include <cassert>
#include "nvperf_host.h"
#include "nvperf_target.h"
#if defined(_WIN32)
#include <Windows.h>
#else
#include <sys/time.h>
#endif
namespace nv { namespace perf {
inline int FormatTimeCommon(char* pBuf, size_t size, uint32_t hour, uint32_t minute, uint32_t second, uint32_t milliSecond)
{
const int written = snprintf(pBuf, size, "%02u:%02u:%02u:%03u", hour, minute, second, milliSecond);
return written;
}
inline int FormatDateCommon(char* pBuf, size_t size, uint32_t year, uint32_t month, uint32_t day)
{
const char* pMonth = [&](){
static const char* s_months[12] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
if (1 <= month && month <= 12)
{
return s_months[month - 1];
}
return "???";
}();
const int written = snprintf(pBuf, size, "%4u-%s-%02u", year, pMonth, day);
return written;
}
#if defined(_WIN32)
typedef struct _FILETIME LogTimeStamp;
inline void UserLogImplPlatform(const char* pMessage)
{
OutputDebugStringA(pMessage);
}
inline void GetTimeStamp(LogTimeStamp* pTimestamp)
{
GetSystemTimeAsFileTime(pTimestamp);
}
inline size_t FormatTime(LogTimeStamp* pTimestamp, char* pBuf, size_t size)
{
SYSTEMTIME utc, stime;
FileTimeToSystemTime(pTimestamp, &utc);
SystemTimeToTzSpecificLocalTime(NULL, &utc, &stime);
return FormatTimeCommon(pBuf, size, (uint32_t)stime.wHour, (uint32_t)stime.wMinute, (uint32_t)stime.wSecond, (uint32_t)stime.wMilliseconds);
}
inline size_t FormatDate(LogTimeStamp* pTimestamp, char* pBuf, size_t size)
{
SYSTEMTIME utc, stime;
FileTimeToSystemTime(pTimestamp, &utc);
SystemTimeToTzSpecificLocalTime(NULL, &utc, &stime);
return FormatDateCommon(pBuf, size, (uint32_t)stime.wYear, (uint32_t)stime.wMonth, (uint32_t)stime.wDay);
}
#else // !defined(_WIN32)
typedef struct timeval LogTimeStamp;
inline void UserLogImplPlatform(const char* pMessage)
{
(void*)pMessage;
}
inline void GetTimeStamp(LogTimeStamp* pTimestamp)
{
gettimeofday(pTimestamp, 0);
}
inline size_t FormatTime(LogTimeStamp* pTimestamp, char* pBuf, size_t size)
{
const struct tm* ltm = localtime(&pTimestamp->tv_sec);
int milliseconds = pTimestamp->tv_usec / 1000;
return FormatTimeCommon(pBuf, size, (uint32_t)ltm->tm_hour, (uint32_t)ltm->tm_min, (uint32_t)ltm->tm_sec, (uint32_t)milliseconds);
}
inline size_t FormatDate(LogTimeStamp* pTimestamp, char* pBuf, size_t size)
{
const struct tm* ltm = localtime(&pTimestamp->tv_sec);
return FormatDateCommon(pBuf, size, (uint32_t)ltm->tm_year + 1900, (uint32_t)ltm->tm_mon + 1, (uint32_t)ltm->tm_mday);
}
#endif // defined(_WIN32)
}}
#ifndef NV_PERF_LOG_INF
#define NV_PERF_LOG_INF(level_, ...) ::nv::perf::UserLog(LogSeverity::Inf, level_, __FUNCTION__, __VA_ARGS__)
#endif
#ifndef NV_PERF_LOG_WRN
#define NV_PERF_LOG_WRN(level_, ...) ::nv::perf::UserLog(LogSeverity::Wrn, level_, __FUNCTION__, __VA_ARGS__)
#endif
#ifndef NV_PERF_LOG_ERR
#define NV_PERF_LOG_ERR(level_, ...) ::nv::perf::UserLog(LogSeverity::Err, level_, __FUNCTION__, __VA_ARGS__)
#endif
namespace nv { namespace perf {
enum class LogSeverity
{
Inf,
Wrn,
Err,
COUNT
};
struct LogSettings
{
uint32_t volumeLevels[(unsigned)LogSeverity::COUNT] = { 50, 50, 50 };
#if defined(_WIN32)
bool writePlatform = true;
#else
bool writePlatform = false;
#endif
bool writeStderr = true;
FILE* writeFileFD = nullptr;
bool appendToFile = true;
LogSeverity flushFileSeverity = LogSeverity::Err;
bool logDate = true;
bool logTime = true;
LogSettings()
{
#if defined(_WIN32)
{
const char* const pEnvValue = getenv("NV_PERF_LOG_ENABLE_PLATFORM");
if (pEnvValue)
{
char* pEnd = nullptr;
writePlatform = !!strtol(pEnvValue, &pEnd, 0);
}
}
#endif
{
const char* const pEnvValue = getenv("NV_PERF_LOG_ENABLE_STDERR");
if (pEnvValue)
{
char* pEnd = nullptr;
writeStderr = !!strtol(pEnvValue, &pEnd, 0);
}
}
{
const char* const pEnvValue = getenv("NV_PERF_LOG_ENABLE_FILE");
if (pEnvValue)
{
FILE* fp = fopen(pEnvValue, appendToFile ? "a" : "w");
assert(fp);
writeFileFD = fp;
}
}
{
const char* const pEnvValue = getenv("NV_PERF_LOG_FILE_FLUSH_SEVERITY");
if (pEnvValue)
{
char* pEnd = nullptr;
int severity = strtol(pEnvValue, &pEnd, 0);
if (0 <= severity && severity < (int)LogSeverity::COUNT)
{
flushFileSeverity = (LogSeverity)severity;
}
}
}
}
~LogSettings()
{
if (writeFileFD)
{
fclose(writeFileFD);
}
}
};
inline LogSettings* GetLogSettingsStorage_()
{
static LogSettings settings;
return &settings;
}
inline uint32_t GetLogVolumeLevel(LogSeverity severity)
{
LogSettings* pSettings = GetLogSettingsStorage_();
if ((uint32_t)severity < 3)
{
return pSettings->volumeLevels[(uint32_t)severity];
}
return 0;
}
// Higher values produce more log output. 0 <= volumeLevel <= 100
// Technically it's more like a noise floor (all messages below this level are treated as noise and discarded).
inline void SetLogVolumeLevel(LogSeverity severity, uint32_t volumeLevel)
{
LogSettings* pSettings = GetLogSettingsStorage_();
if ((uint32_t)severity < 3)
{
pSettings->volumeLevels[(uint32_t)severity] = volumeLevel;
}
}
inline void SetLogAppendToFile(bool enable)
{
LogSettings* pSettings = GetLogSettingsStorage_();
pSettings->appendToFile = enable;
}
inline void SetLogFlushSeverity(LogSeverity severity)
{
LogSettings* pSettings = GetLogSettingsStorage_();
if (0 <= (int)severity && (int)severity < (int)LogSeverity::COUNT)
{
pSettings->flushFileSeverity = severity;
}
}
inline void SetLogDate(bool enable)
{
LogSettings* pSettings = GetLogSettingsStorage_();
pSettings->logDate = enable;
}
inline void SetLogTime(bool enable)
{
LogSettings* pSettings = GetLogSettingsStorage_();
pSettings->logTime = enable;
}
inline bool UserLogEnablePlatform(bool enable)
{
LogSettings* pSettings = GetLogSettingsStorage_();
pSettings->writePlatform = enable;
return true;
}
inline bool UserLogEnableStderr(bool enable)
{
LogSettings* pSettings = GetLogSettingsStorage_();
pSettings->writeStderr = enable;
return true;
}
inline bool UserLogEnableFile(const char* filename)
{
LogSettings* pSettings = GetLogSettingsStorage_();
if (filename)
{
FILE* fp = fopen(filename, pSettings->appendToFile ? "a" : "w");
if (!fp)
{
return false;
}
pSettings->writeFileFD = fp;
}
return true;
}
inline void UserLogImplStderr(const char* pMessage)
{
fprintf(stderr, "%s", pMessage);
}
inline void UserLogImplFile(const char* pMessage, FILE* fd)
{
fprintf(fd, "%s", pMessage);
}
inline void UserLogImplFileFlush(FILE* fd)
{
fflush(fd);
}
inline void UserLog(LogSeverity severity, uint32_t level, const char* pFunctionName, const char* pFormat, ...)
{
const uint32_t volumeLevel = GetLogVolumeLevel(severity);
if (volumeLevel < level)
{
return;
}
LogSettings& settings = *GetLogSettingsStorage_();
va_list args;
va_start(args, pFormat);
const int length = vsnprintf(nullptr, 0, pFormat, args);
va_end(args);
std::string str;
str.append(length + 1, ' ');
va_start(args, pFormat);
vsnprintf(&str[0], length+1, pFormat, args);
va_end(args);
str.back() = '\0'; // ensure NULL terminated
const char* const pPrefix = [&]() {
switch (severity)
{
case (LogSeverity::Inf): return "NVPERF|INF|";
case (LogSeverity::Wrn): return "NVPERF|WRN|";
case (LogSeverity::Err): return "NVPERF|ERR|";
default: return "NVPERF|???|";
}
}();
char datebuf[16];
char timebuf[16];
if (settings.logDate || settings.logTime)
{
LogTimeStamp time;
GetTimeStamp(&time);
if (settings.logDate)
{
FormatDate(&time, datebuf, sizeof(datebuf));
}
if (settings.logTime)
{
FormatTime(&time, timebuf, sizeof(timebuf));
}
}
if (settings.writePlatform)
{
UserLogImplPlatform(pPrefix);
if (settings.logDate)
{
UserLogImplPlatform(datebuf);
UserLogImplPlatform("|");
}
if (settings.logTime)
{
UserLogImplPlatform(timebuf);
UserLogImplPlatform("|");
}
UserLogImplPlatform(pFunctionName);
UserLogImplPlatform(" || ");
UserLogImplPlatform(str.c_str());
}
if (settings.writeStderr)
{
UserLogImplStderr(pPrefix);
if (settings.logDate)
{
UserLogImplStderr(datebuf);
UserLogImplStderr("|");
}
if (settings.logTime)
{
UserLogImplStderr(timebuf);
UserLogImplStderr("|");
}
UserLogImplStderr(pFunctionName);
UserLogImplStderr(" || ");
UserLogImplStderr(str.c_str());
}
if (settings.writeFileFD)
{
UserLogImplFile(pPrefix, settings.writeFileFD);
if (settings.logDate)
{
UserLogImplFile(datebuf, settings.writeFileFD);
UserLogImplFile("|", settings.writeFileFD);
}
if (settings.logTime)
{
UserLogImplFile(timebuf, settings.writeFileFD);
UserLogImplFile("|", settings.writeFileFD);
}
UserLogImplFile(pFunctionName, settings.writeFileFD);
UserLogImplFile(" || ", settings.writeFileFD);
UserLogImplFile(str.c_str(), settings.writeFileFD);
if (severity >= settings.flushFileSeverity)
{
UserLogImplFileFlush(settings.writeFileFD);
}
}
}
inline bool InitializeNvPerf()
{
NVPA_Status nvpaStatus;
NVPW_InitializeHost_Params initializeHostParams = { NVPW_InitializeHost_Params_STRUCT_SIZE };
nvpaStatus = NVPW_InitializeHost(&initializeHostParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_InitalizeHost failed\n");
return false;
}
NVPW_InitializeTarget_Params initializeTargetParams = { NVPW_InitializeTarget_Params_STRUCT_SIZE };
nvpaStatus = NVPW_InitializeTarget(&initializeTargetParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_InitializeTarget failed\n");
return false;
}
return true;
}
}}

View File

@@ -0,0 +1,299 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <utility>
#include "NvPerfMetricsEvaluator.h"
namespace nv { namespace perf {
class MetricsConfigBuilder
{
protected:
NVPW_MetricsEvaluator* m_pMetricsEvaluator; // not owned
NVPA_RawMetricsConfig* m_pRawMetricsConfig; // owned
NVPA_CounterDataBuilder* m_pCounterDataBuilder; // owned
bool m_configuring;
protected:
void MoveAssign(MetricsConfigBuilder&& rhs)
{
Reset();
m_pMetricsEvaluator = rhs.m_pMetricsEvaluator;
m_pRawMetricsConfig = rhs.m_pRawMetricsConfig;
m_pCounterDataBuilder = rhs.m_pCounterDataBuilder;
m_configuring = rhs.m_configuring;
rhs.m_pMetricsEvaluator = nullptr;
rhs.m_pRawMetricsConfig = nullptr;
rhs.m_pCounterDataBuilder = nullptr;
}
public:
~MetricsConfigBuilder()
{
Reset();
}
MetricsConfigBuilder() : m_pMetricsEvaluator(nullptr), m_pRawMetricsConfig(nullptr), m_pCounterDataBuilder(nullptr), m_configuring(false)
{
}
MetricsConfigBuilder(MetricsConfigBuilder&& rhs) : m_pMetricsEvaluator(nullptr), m_pRawMetricsConfig(nullptr), m_pCounterDataBuilder(nullptr), m_configuring(false)
{
MoveAssign(std::forward<MetricsConfigBuilder>(rhs));
}
MetricsConfigBuilder& operator=(MetricsConfigBuilder&& rhs)
{
MoveAssign(std::forward<MetricsConfigBuilder>(rhs));
return *this;
}
void Reset()
{
NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigParams = { NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE };
rawMetricsConfigParams.pRawMetricsConfig = m_pRawMetricsConfig;
NVPW_RawMetricsConfig_Destroy(&rawMetricsConfigParams);
NVPW_CounterDataBuilder_Destroy_Params counterDataBuilderParams = { NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE };
counterDataBuilderParams.pCounterDataBuilder = m_pCounterDataBuilder;
NVPW_CounterDataBuilder_Destroy(&counterDataBuilderParams);
m_pMetricsEvaluator = nullptr;
m_pRawMetricsConfig = nullptr;
m_pCounterDataBuilder = nullptr;
}
bool Initialize(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPA_RawMetricsConfig* pRawMetricsConfig, const char* chipName)
{
NVPA_Status nvpaStatus;
Reset(); // destroy any existing objects
m_pMetricsEvaluator = pMetricsEvaluator;
m_pRawMetricsConfig = pRawMetricsConfig;
NVPW_CounterDataBuilder_Create_Params counterDataBuilderParams = { NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE };
counterDataBuilderParams.pChipName = chipName;
nvpaStatus = NVPW_CounterDataBuilder_Create(&counterDataBuilderParams);
if (nvpaStatus)
{
return false;
}
m_pCounterDataBuilder = counterDataBuilderParams.pCounterDataBuilder;
NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = { NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE };
beginPassGroupParams.pRawMetricsConfig = m_pRawMetricsConfig;
nvpaStatus = NVPW_RawMetricsConfig_BeginPassGroup(&beginPassGroupParams);
if (nvpaStatus)
{
return false;
}
m_configuring = true;
return true;
}
bool AddMetrics(const NVPW_MetricEvalRequest* pMetricEvalRequests, size_t numMetricEvalRequests)
{
NVPA_Status nvpaStatus;
NVPW_MetricsEvaluator_GetMetricRawDependencies_Params getMetricRawDependenciesParams = { NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE };
getMetricRawDependenciesParams.pMetricsEvaluator = m_pMetricsEvaluator;
getMetricRawDependenciesParams.pMetricEvalRequests = pMetricEvalRequests;
getMetricRawDependenciesParams.numMetricEvalRequests = numMetricEvalRequests;
getMetricRawDependenciesParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
getMetricRawDependenciesParams.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest);
nvpaStatus = NVPW_MetricsEvaluator_GetMetricRawDependencies(&getMetricRawDependenciesParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(50, "NVPW_MetricsEvaluator_GetMetricRawDependencies failed\n");
return false;
}
std::vector<const char*> rawDependencies(getMetricRawDependenciesParams.numRawDependencies);
getMetricRawDependenciesParams.ppRawDependencies = rawDependencies.data();
nvpaStatus = NVPW_MetricsEvaluator_GetMetricRawDependencies(&getMetricRawDependenciesParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(50, "NVPW_MetricsEvaluator_GetMetricRawDependencies failed\n");
return false;
}
for (const char* const pRawMetricName : rawDependencies)
{
NVPA_RawMetricRequest rawMetricRequest = { NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE };
rawMetricRequest.pMetricName = pRawMetricName;
rawMetricRequest.isolated = true;
rawMetricRequest.keepInstances = true;
NVPW_CounterDataBuilder_AddMetrics_Params addMetricParams = { NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE };
addMetricParams.numMetricRequests = 1;
addMetricParams.pCounterDataBuilder = m_pCounterDataBuilder;
addMetricParams.pRawMetricRequests = &rawMetricRequest;
nvpaStatus = NVPW_CounterDataBuilder_AddMetrics(&addMetricParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(50, "NVPW_CounterDataBuilder_AddMetrics failed\n");
return false;
}
NVPW_RawMetricsConfig_AddMetrics_Params configAddMetricParams = { NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE };
configAddMetricParams.numMetricRequests = 1;
configAddMetricParams.pRawMetricRequests = &rawMetricRequest;
configAddMetricParams.pRawMetricsConfig = m_pRawMetricsConfig;
nvpaStatus = NVPW_RawMetricsConfig_AddMetrics(&configAddMetricParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(50, "NVPW_RawMetricsConfig_AddMetrics failed\n");
return false;
}
}
return true;
}
bool AddMetric(const char* pMetricName)
{
NVPW_MetricEvalRequest metricEvalRequest{};
bool success = ToMetricEvalRequest(m_pMetricsEvaluator, pMetricName, metricEvalRequest);
if (!success)
{
NV_PERF_LOG_ERR(50, "ToMetricEvalRequest failed for metric: %s\n", pMetricName);
return false;
}
success = AddMetrics(&metricEvalRequest, 1);
if (!success)
{
NV_PERF_LOG_ERR(50, "AddMetrics failed for metric: %s\n", pMetricName);
return false;
}
return true;
}
bool AddMetrics(const char* const pMetricNames[], size_t numMetrics)
{
bool success = true;
for (size_t metricIdx = 0; metricIdx < numMetrics; ++metricIdx)
{
const bool addMetricSuccess = AddMetric(pMetricNames[metricIdx]);
if (!addMetricSuccess)
{
success = false;
}
}
if (!success)
{
return false;
}
return true;
}
bool PrepareConfigImage()
{
NVPA_Status nvpaStatus;
m_configuring = false;
NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParam = { NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE };
endPassGroupParam.pRawMetricsConfig = m_pRawMetricsConfig;
nvpaStatus = NVPW_RawMetricsConfig_EndPassGroup(&endPassGroupParam);
if (nvpaStatus)
{
return false;
}
NVPW_RawMetricsConfig_GenerateConfigImage_Params generateConfigImageParam = { NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE };
generateConfigImageParam.pRawMetricsConfig = m_pRawMetricsConfig;
nvpaStatus = NVPW_RawMetricsConfig_GenerateConfigImage(&generateConfigImageParam);
if (nvpaStatus)
{
return false;
}
// Start a new PassGroup so that subsequent AddMetrics() calls will succeed.
// This will not result in optimal scheduling, but it obeys the principle of least surprise.
NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = { NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE };
beginPassGroupParams.pRawMetricsConfig = m_pRawMetricsConfig;
nvpaStatus = NVPW_RawMetricsConfig_BeginPassGroup(&beginPassGroupParams);
if (nvpaStatus)
{
return false;
}
m_configuring = true;
return true;
}
// Returns the buffer size needed for the ConfigImage, or zero on error.
size_t GetConfigImageSize() const
{
NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParam = { NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE };
getConfigImageParam.pBuffer = nullptr;
getConfigImageParam.bytesAllocated = 0;
getConfigImageParam.pRawMetricsConfig = m_pRawMetricsConfig;
NVPA_Status nvpaStatus = NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParam);
if (nvpaStatus)
{
return 0;
}
return getConfigImageParam.bytesCopied;
}
// Copies the generated ConfigImage into pBuffer.
bool GetConfigImage(size_t bufferSize, uint8_t* pBuffer) const
{
NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParam = { NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE };
getConfigImageParam.pRawMetricsConfig = m_pRawMetricsConfig;
getConfigImageParam.bytesAllocated = bufferSize;
getConfigImageParam.pBuffer = pBuffer;
NVPA_Status nvpaStatus = NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParam);
if (nvpaStatus)
{
return false;
}
return true;
}
// Returns the buffer size needed for the CounterDataPrefix, or zero on error.
size_t GetCounterDataPrefixSize() const
{
NVPW_CounterDataBuilder_GetCounterDataPrefix_Params getCounterDataPrefixParams = { NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE };
getCounterDataPrefixParams.bytesAllocated = 0;
getCounterDataPrefixParams.pBuffer = nullptr;
getCounterDataPrefixParams.pCounterDataBuilder = m_pCounterDataBuilder;
NVPA_Status nvpaStatus = NVPW_CounterDataBuilder_GetCounterDataPrefix(&getCounterDataPrefixParams);
if (nvpaStatus)
{
return 0;
}
return getCounterDataPrefixParams.bytesCopied;
}
// Copies the generated CounterDataPrefix into pBuffer.
bool GetCounterDataPrefix(size_t bufferSize, uint8_t* pBuffer) const
{
NVPW_CounterDataBuilder_GetCounterDataPrefix_Params getCounterDataPrefixParams = { NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE };
getCounterDataPrefixParams.bytesAllocated = bufferSize;
getCounterDataPrefixParams.pBuffer = pBuffer;
getCounterDataPrefixParams.pCounterDataBuilder = m_pCounterDataBuilder;
NVPA_Status nvpaStatus = NVPW_CounterDataBuilder_GetCounterDataPrefix(&getCounterDataPrefixParams);
if (nvpaStatus)
{
return false;
}
return true;
}
};
}}

View File

@@ -0,0 +1,766 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <sstream>
#include <utility>
#include <vector>
#include <string>
#include "NvPerfInit.h"
namespace nv { namespace perf {
// Smart Pointer for NVPW_MetricsEvaluator
class MetricsEvaluator
{
protected:
NVPW_MetricsEvaluator* m_pMetricsEvaluator;
std::vector<uint8_t> m_scratchBuffer;
private:
// Prevent accidental use of "delete" keyword on this class' implicit conversions.
// Introducing a second 'operator CompileErrorOnOperatorDelete*()' triggers an 'ambiguous conversion to void*'
// on the 'delete', which catches the usage error at compile time. c.f. http://stackoverflow.com/a/3312507
struct CompileErrorOnOperatorDelete;
operator CompileErrorOnOperatorDelete*() const;
private:
// non-copyable
MetricsEvaluator(const MetricsEvaluator& rhs);
MetricsEvaluator& operator=(const MetricsEvaluator& rhs);
public:
~MetricsEvaluator()
{
Reset();
}
MetricsEvaluator()
: m_pMetricsEvaluator()
{
}
// takes the ownership
MetricsEvaluator(NVPW_MetricsEvaluator* pMetricsEvaluator, std::vector<uint8_t>&& scratchBuffer)
: m_pMetricsEvaluator(pMetricsEvaluator)
, m_scratchBuffer(std::move(scratchBuffer))
{
scratchBuffer.clear();
}
MetricsEvaluator(MetricsEvaluator&& evaluator)
: m_pMetricsEvaluator(evaluator.m_pMetricsEvaluator)
, m_scratchBuffer(std::move(evaluator.m_scratchBuffer))
{
evaluator.m_pMetricsEvaluator = nullptr;
evaluator.m_scratchBuffer.clear();
}
MetricsEvaluator& operator=(MetricsEvaluator&& evaluator)
{
Reset();
m_pMetricsEvaluator = evaluator.m_pMetricsEvaluator;
m_scratchBuffer = std::move(evaluator.m_scratchBuffer);
evaluator.m_pMetricsEvaluator = nullptr;
evaluator.m_scratchBuffer.clear();
return *this;
}
operator NVPW_MetricsEvaluator*() const
{
return m_pMetricsEvaluator;
}
void Reset()
{
if (m_pMetricsEvaluator != nullptr)
{
NVPW_MetricsEvaluator_Destroy_Params destroyParams = { NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE };
destroyParams.pMetricsEvaluator = m_pMetricsEvaluator;
NVPA_Status status = NVPW_MetricsEvaluator_Destroy(&destroyParams);
if (status != NVPA_STATUS_SUCCESS)
{
NV_PERF_LOG_ERR(80, "NVPW_MetricsEvaluator_Destroy failed\n");
}
m_pMetricsEvaluator = nullptr;
}
m_scratchBuffer.clear();
}
};
class MetricsEnumerator
{
public:
class Iterator
{
private:
// note these are pointing to the .RO section of the library, so their lifetime are not bound to any particular metrics enumerator or metrics evaluator instance
const char* m_pMetricNames;
const size_t* m_pMetricNameBeginIndices;
size_t m_numMetrics;
size_t m_metricIndex;
public:
Iterator()
: m_pMetricNames(nullptr)
, m_pMetricNameBeginIndices(nullptr)
, m_numMetrics(0)
, m_metricIndex(0)
{
}
Iterator(const char* pMetricNames, const size_t* pMetricNameBeginIndices, size_t numMetrics, size_t metricIndex)
: m_pMetricNames(pMetricNames)
, m_pMetricNameBeginIndices(pMetricNameBeginIndices)
, m_numMetrics(numMetrics)
, m_metricIndex(metricIndex)
{
}
Iterator(const Iterator& iterator)
: m_pMetricNames(iterator.m_pMetricNames)
, m_pMetricNameBeginIndices(iterator.m_pMetricNameBeginIndices)
, m_numMetrics(iterator.m_numMetrics)
, m_metricIndex(iterator.m_metricIndex)
{
}
Iterator& operator=(const Iterator& rhs)
{
m_pMetricNames = rhs.m_pMetricNames;
m_pMetricNameBeginIndices = rhs.m_pMetricNameBeginIndices;
m_numMetrics = rhs.m_numMetrics;
m_metricIndex = rhs.m_metricIndex;
return *this;
}
bool operator!=(const Iterator& rhs) const
{
return !(*this == rhs);
}
bool operator==(const Iterator& rhs) const
{
return m_pMetricNames == rhs.m_pMetricNames
&& m_pMetricNameBeginIndices == rhs.m_pMetricNameBeginIndices
&& m_numMetrics == rhs.m_numMetrics
&& m_metricIndex == rhs.m_metricIndex;
}
Iterator operator++()
{
if (m_metricIndex < m_numMetrics)
{
++m_metricIndex;
}
return *this;
}
Iterator operator++(int)
{
Iterator prev = *this;
++*this;
return prev;
}
// no validity check
const char* operator*() const
{
const char* pMetricName = &m_pMetricNames[m_pMetricNameBeginIndices[m_metricIndex]];
return pMetricName;
}
};
private:
// note these are pointing to the .RO section of the library, so their lifetime are not bound to any particular metrics evaluator instance
const char* m_pMetricNames;
const size_t* m_pMetricNameBeginIndices;
size_t m_numMetrics;
public:
MetricsEnumerator()
: m_pMetricNames(nullptr)
, m_pMetricNameBeginIndices(nullptr)
, m_numMetrics(0)
{
}
MetricsEnumerator(const char* pMetricNames, const size_t* pMetricNameBeginIndices, size_t numMetrics)
: m_pMetricNames(pMetricNames)
, m_pMetricNameBeginIndices(pMetricNameBeginIndices)
, m_numMetrics(numMetrics)
{
}
MetricsEnumerator(const MetricsEnumerator& metricsEnumerator)
: m_pMetricNames(metricsEnumerator.m_pMetricNames)
, m_pMetricNameBeginIndices(metricsEnumerator.m_pMetricNameBeginIndices)
, m_numMetrics(metricsEnumerator.m_numMetrics)
{
}
MetricsEnumerator& operator=(const MetricsEnumerator& rhs)
{
m_pMetricNames = rhs.m_pMetricNames;
m_pMetricNameBeginIndices = rhs.m_pMetricNameBeginIndices;
m_numMetrics = rhs.m_numMetrics;
return *this;
}
// no bounds check
const char* operator[](size_t index) const
{
const char* pMetricName = &m_pMetricNames[m_pMetricNameBeginIndices[index]];
return pMetricName;
}
Iterator begin() const
{
return Iterator(m_pMetricNames, m_pMetricNameBeginIndices, m_numMetrics, 0);
}
Iterator end() const
{
return Iterator(m_pMetricNames, m_pMetricNameBeginIndices, m_numMetrics, m_numMetrics);
}
size_t size() const
{
return m_numMetrics;
}
bool empty() const
{
return !m_numMetrics;
}
};
inline MetricsEnumerator EnumerateMetrics(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType)
{
NVPW_MetricsEvaluator_GetMetricNames_Params metricsEvaluatorGetMetricNamesParams = { NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE };
metricsEvaluatorGetMetricNamesParams.pMetricsEvaluator = pMetricsEvaluator;
metricsEvaluatorGetMetricNamesParams.metricType = static_cast<uint8_t>(metricType);
const NVPA_Status status = NVPW_MetricsEvaluator_GetMetricNames(&metricsEvaluatorGetMetricNamesParams);
if (status != NVPA_STATUS_SUCCESS)
{
return MetricsEnumerator();
}
return MetricsEnumerator(metricsEvaluatorGetMetricNamesParams.pMetricNames, metricsEvaluatorGetMetricNamesParams.pMetricNameBeginIndices, metricsEvaluatorGetMetricNamesParams.numMetrics);
}
inline MetricsEnumerator EnumerateCounters(NVPW_MetricsEvaluator* pMetricsEvaluator)
{
return EnumerateMetrics(pMetricsEvaluator, NVPW_METRIC_TYPE_COUNTER);
}
inline MetricsEnumerator EnumerateRatios(NVPW_MetricsEvaluator* pMetricsEvaluator)
{
return EnumerateMetrics(pMetricsEvaluator, NVPW_METRIC_TYPE_RATIO);
}
inline MetricsEnumerator EnumerateThroughputs(NVPW_MetricsEvaluator* pMetricsEvaluator)
{
return EnumerateMetrics(pMetricsEvaluator, NVPW_METRIC_TYPE_THROUGHPUT);
}
inline const char* ToCString(NVPW_MetricType metricType)
{
switch (metricType)
{
case NVPW_METRIC_TYPE_COUNTER:
return "Counter";
case NVPW_METRIC_TYPE_RATIO:
return "Ratio";
case NVPW_METRIC_TYPE_THROUGHPUT:
return "Throughput";
default:
return "";
}
}
inline const char* ToCString(NVPW_RollupOp rollupOp)
{
switch (rollupOp)
{
case NVPW_ROLLUP_OP_AVG:
return ".avg";
case NVPW_ROLLUP_OP_MAX:
return ".max";
case NVPW_ROLLUP_OP_MIN:
return ".min";
case NVPW_ROLLUP_OP_SUM:
return ".sum";
default:
return "";
}
}
inline const char* ToCString(NVPW_Submetric submetric)
{
switch (submetric)
{
case NVPW_SUBMETRIC_NONE:
return "";
case NVPW_SUBMETRIC_PEAK_SUSTAINED:
return ".peak_sustained";
case NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE:
return ".peak_sustained_active";
case NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE_PER_SECOND:
return ".peak_sustained_active.per_second";
case NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED:
return ".peak_sustained_elapsed";
case NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED_PER_SECOND:
return ".peak_sustained_elapsed.per_second";
case NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME:
return ".peak_sustained_frame";
case NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME_PER_SECOND:
return ".peak_sustained_frame.per_second";
case NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION:
return ".peak_sustained_region";
case NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION_PER_SECOND:
return ".peak_sustained_region.per_second";
case NVPW_SUBMETRIC_PER_CYCLE_ACTIVE:
return ".per_cycle_active";
case NVPW_SUBMETRIC_PER_CYCLE_ELAPSED:
return ".per_cycle_elapsed";
case NVPW_SUBMETRIC_PER_CYCLE_IN_FRAME:
return ".per_cycle_in_frame";
case NVPW_SUBMETRIC_PER_CYCLE_IN_REGION:
return ".per_cycle_in_region";
case NVPW_SUBMETRIC_PER_SECOND:
return ".per_second";
case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ACTIVE:
return ".pct_of_peak_sustained_active";
case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ELAPSED:
return ".pct_of_peak_sustained_elapsed";
case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_FRAME:
return ".pct_of_peak_sustained_frame";
case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_REGION:
return ".pct_of_peak_sustained_region";
case NVPW_SUBMETRIC_MAX_RATE:
return ".max_rate";
case NVPW_SUBMETRIC_PCT:
return ".pct";
case NVPW_SUBMETRIC_RATIO:
return ".ratio";
default:
return "";
}
}
inline const char* ToCString(const MetricsEnumerator& countersEnumerator, const MetricsEnumerator& ratiosEnumerator, const MetricsEnumerator& throughputsEnumerator, NVPW_MetricType metricType, size_t metricIndex)
{
if (metricType == NVPW_METRIC_TYPE_COUNTER)
{
if (metricIndex < countersEnumerator.size())
{
return countersEnumerator[metricIndex];
}
}
else if (metricType == NVPW_METRIC_TYPE_RATIO)
{
if (metricIndex < ratiosEnumerator.size())
{
return ratiosEnumerator[metricIndex];
}
}
else if (metricType == NVPW_METRIC_TYPE_THROUGHPUT)
{
if (metricIndex < throughputsEnumerator.size())
{
return throughputsEnumerator[metricIndex];
}
}
NV_PERF_LOG_WRN(50, "ToCString failed\n");
return "";
}
inline const char* ToCString(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, size_t metricIndex)
{
if (metricType == NVPW_METRIC_TYPE_COUNTER)
{
const MetricsEnumerator countersEnumerator = EnumerateCounters(pMetricsEvaluator);
if (metricIndex < countersEnumerator.size())
{
return countersEnumerator[metricIndex];
}
}
else if (metricType == NVPW_METRIC_TYPE_RATIO)
{
const MetricsEnumerator ratiosEnumerator = EnumerateRatios(pMetricsEvaluator);
if (metricIndex < ratiosEnumerator.size())
{
return ratiosEnumerator[metricIndex];
}
}
else if (metricType == NVPW_METRIC_TYPE_THROUGHPUT)
{
const MetricsEnumerator throughputsEnumerator = EnumerateThroughputs(pMetricsEvaluator);
if (metricIndex < throughputsEnumerator.size())
{
return throughputsEnumerator[metricIndex];
}
}
NV_PERF_LOG_WRN(50, "ToCString failed\n");
return "";
}
inline std::string ToString(const MetricsEnumerator& countersEnumerator, const MetricsEnumerator& ratiosEnumerator, const MetricsEnumerator& throughputsEnumerator, const NVPW_MetricEvalRequest& metricEvalRequest)
{
std::string metricName(ToCString(countersEnumerator, ratiosEnumerator, throughputsEnumerator, static_cast<NVPW_MetricType>(metricEvalRequest.metricType), metricEvalRequest.metricIndex));
if (metricEvalRequest.metricType == NVPW_METRIC_TYPE_COUNTER || metricEvalRequest.metricType == NVPW_METRIC_TYPE_THROUGHPUT)
{
metricName += ToCString(static_cast<NVPW_RollupOp>(metricEvalRequest.rollupOp));
}
metricName += ToCString(static_cast<NVPW_Submetric>(metricEvalRequest.submetric));
return metricName;
}
inline std::string ToString(NVPW_MetricsEvaluator* pMetricsEvaluator, const NVPW_MetricEvalRequest& metricEvalRequest)
{
std::string metricName(ToCString(pMetricsEvaluator, static_cast<NVPW_MetricType>(metricEvalRequest.metricType), metricEvalRequest.metricIndex));
if (metricEvalRequest.metricType == NVPW_METRIC_TYPE_COUNTER || metricEvalRequest.metricType == NVPW_METRIC_TYPE_THROUGHPUT)
{
metricName += ToCString(static_cast<NVPW_RollupOp>(metricEvalRequest.rollupOp));
}
metricName += ToCString(static_cast<NVPW_Submetric>(metricEvalRequest.submetric));
return metricName;
}
inline bool ToMetricEvalRequest(NVPW_MetricsEvaluator* pMetricsEvaluator, const char* pMetricName, NVPW_MetricEvalRequest& metricEvalRequest)
{
NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params toMetricEvalRequestParams = { NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE };
toMetricEvalRequestParams.pMetricsEvaluator = pMetricsEvaluator;
toMetricEvalRequestParams.pMetricName = pMetricName;
toMetricEvalRequestParams.pMetricEvalRequest = &metricEvalRequest;
toMetricEvalRequestParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
const NVPA_Status status = NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(&toMetricEvalRequestParams);
if (status != NVPA_STATUS_SUCCESS)
{
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest failed\n");
return false;
}
return true;
}
inline bool GetMetricTypeAndIndex(NVPW_MetricsEvaluator* pMetricsEvaluator, const char* pMetricName, NVPW_MetricType& metricType, size_t& metricIndex)
{
NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params getMetricTypeAndIndexParams = { NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params_STRUCT_SIZE };
getMetricTypeAndIndexParams.pMetricsEvaluator = pMetricsEvaluator;
getMetricTypeAndIndexParams.pMetricName = pMetricName;
NVPA_Status status = NVPW_MetricsEvaluator_GetMetricTypeAndIndex(&getMetricTypeAndIndexParams);
if (status != NVPA_STATUS_SUCCESS)
{
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_GetMetricTypeAndIndex failed\n");
return false;
}
metricType = static_cast<NVPW_MetricType>(getMetricTypeAndIndexParams.metricType);
metricIndex = getMetricTypeAndIndexParams.metricIndex;
return true;
}
inline bool GetSupportedSubmetrics(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, std::vector<NVPW_Submetric>& submetrics)
{
NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params getSupportedSubmetrics = { NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params_STRUCT_SIZE };
getSupportedSubmetrics.pMetricsEvaluator = pMetricsEvaluator;
getSupportedSubmetrics.metricType = static_cast<uint8_t>(metricType);
NVPA_Status status = NVPW_MetricsEvaluator_GetSupportedSubmetrics(&getSupportedSubmetrics);
if (status != NVPA_STATUS_SUCCESS)
{
NV_PERF_LOG_ERR(80, "NVPW_MetricsEvaluator_GetSupportedSubmetrics failed for metric type: %u\n", getSupportedSubmetrics.metricType);
return false;
}
submetrics.reserve(getSupportedSubmetrics.numSupportedSubmetrics);
for (size_t ii = 0; ii < getSupportedSubmetrics.numSupportedSubmetrics; ++ii)
{
submetrics.push_back(static_cast<NVPW_Submetric>(getSupportedSubmetrics.pSupportedSubmetrics[ii]));
}
return true;
}
inline bool MetricsEvaluatorSetDeviceAttributes(NVPW_MetricsEvaluator* pMetricsEvaluator, const uint8_t* pCounterDataImage, size_t counterDataImageSize)
{
NVPW_MetricsEvaluator_SetDeviceAttributes_Params setDeviceAttributesParams = { NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE };
setDeviceAttributesParams.pMetricsEvaluator = pMetricsEvaluator;
setDeviceAttributesParams.pCounterDataImage = pCounterDataImage;
setDeviceAttributesParams.counterDataImageSize = counterDataImageSize;
const NVPA_Status status = NVPW_MetricsEvaluator_SetDeviceAttributes(&setDeviceAttributesParams);
if (status != NVPA_STATUS_SUCCESS)
{
NV_PERF_LOG_ERR(50, "NVPW_MetricsEvaluator_SetDeviceAttributes failed\n");
return false;
}
return true;
}
// Evaluate the named metrics from (CounterDataImage, rangeIndex) and store them in pMetricValues.
inline bool EvaluateToGpuValues(
NVPW_MetricsEvaluator* pMetricsEvaluator,
const uint8_t* pCounterDataImage,
size_t counterDataImageSize,
size_t rangeIndex,
size_t numMetricEvalRequests,
const NVPW_MetricEvalRequest* pMetricEvalRequests,
double* pMetricValues)
{
NVPW_MetricsEvaluator_EvaluateToGpuValues_Params evaluateToGpuValuesParams = { NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE };
evaluateToGpuValuesParams.pMetricsEvaluator = pMetricsEvaluator;
evaluateToGpuValuesParams.pMetricEvalRequests = pMetricEvalRequests;
evaluateToGpuValuesParams.numMetricEvalRequests = numMetricEvalRequests;
evaluateToGpuValuesParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
evaluateToGpuValuesParams.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest);
evaluateToGpuValuesParams.pCounterDataImage = pCounterDataImage;
evaluateToGpuValuesParams.counterDataImageSize = counterDataImageSize;
evaluateToGpuValuesParams.rangeIndex = rangeIndex;
evaluateToGpuValuesParams.isolated = (NVPA_Bool)true;
evaluateToGpuValuesParams.pMetricValues = pMetricValues;
NVPA_Status status = NVPW_MetricsEvaluator_EvaluateToGpuValues(&evaluateToGpuValuesParams);
if (status != NVPA_STATUS_SUCCESS)
{
NV_PERF_LOG_ERR(80, "NVPW_MetricsEvaluator_EvaluateToGpuValues failed\n");
return false;
}
return true;
}
inline bool operator==(const NVPW_DimUnitFactor& lhs, const NVPW_DimUnitFactor& rhs)
{
return (lhs.dimUnit == rhs.dimUnit) && (lhs.exponent == rhs.exponent);
}
inline bool operator<(const NVPW_DimUnitFactor& lhs, const NVPW_DimUnitFactor& rhs)
{
if (lhs.dimUnit != rhs.dimUnit)
{
return lhs.dimUnit < rhs.dimUnit;
}
if (lhs.exponent != rhs.exponent)
{
return lhs.exponent < rhs.exponent;
}
return false;
}
inline bool GetMetricDimUnits(NVPW_MetricsEvaluator* pMetricsEvaluator, const NVPW_MetricEvalRequest& metricRequest, std::vector<NVPW_DimUnitFactor>& dimUnits)
{
NVPW_MetricsEvaluator_GetMetricDimUnits_Params getMetricDimUnitsParams = { NVPW_MetricsEvaluator_GetMetricDimUnits_Params_STRUCT_SIZE };
getMetricDimUnitsParams.pMetricsEvaluator = pMetricsEvaluator;
getMetricDimUnitsParams.pMetricEvalRequest = &metricRequest;
getMetricDimUnitsParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
getMetricDimUnitsParams.dimUnitFactorStructSize = NVPW_DimUnitFactor_STRUCT_SIZE;
NVPA_Status status = NVPW_MetricsEvaluator_GetMetricDimUnits(&getMetricDimUnitsParams);
if (status != NVPA_STATUS_SUCCESS || !getMetricDimUnitsParams.numDimUnits)
{
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_GetMetricDimUnits failed for metric = %s\n", ToString(pMetricsEvaluator, metricRequest).c_str());
return false;
}
dimUnits.resize(getMetricDimUnitsParams.numDimUnits);
getMetricDimUnitsParams.pDimUnits = dimUnits.data();
status = NVPW_MetricsEvaluator_GetMetricDimUnits(&getMetricDimUnitsParams);
if (status != NVPA_STATUS_SUCCESS)
{
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_GetMetricDimUnits failed for metric = %s\n", ToString(pMetricsEvaluator, metricRequest).c_str());
return false;
}
return true;
}
inline const char* GetMetricDescription(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, size_t metricIndex)
{
if (metricType == NVPW_METRIC_TYPE_COUNTER)
{
NVPW_MetricsEvaluator_GetCounterProperties_Params params{ NVPW_MetricsEvaluator_GetCounterProperties_Params_STRUCT_SIZE };
params.pMetricsEvaluator = pMetricsEvaluator;
params.counterIndex = metricIndex;
NVPA_Status status = NVPW_MetricsEvaluator_GetCounterProperties(&params);
if (status == NVPA_STATUS_SUCCESS)
{
return params.pDescription;
}
}
else if (metricType == NVPW_METRIC_TYPE_RATIO)
{
NVPW_MetricsEvaluator_GetRatioMetricProperties_Params params{ NVPW_MetricsEvaluator_GetRatioMetricProperties_Params_STRUCT_SIZE };
params.pMetricsEvaluator = pMetricsEvaluator;
params.ratioMetricIndex = metricIndex;
NVPA_Status status = NVPW_MetricsEvaluator_GetRatioMetricProperties(&params);
if (status == NVPA_STATUS_SUCCESS)
{
return params.pDescription;
}
}
else if (metricType == NVPW_METRIC_TYPE_THROUGHPUT)
{
NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params params{ NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params_STRUCT_SIZE };
params.pMetricsEvaluator = pMetricsEvaluator;
params.throughputMetricIndex = metricIndex;
NVPA_Status status = NVPW_MetricsEvaluator_GetThroughputMetricProperties(&params);
if (status == NVPA_STATUS_SUCCESS)
{
return params.pDescription;
}
}
NV_PERF_LOG_WRN(50, "GetMetricDescription failed for metricType = %u, metricIndex = %u\n", (uint32_t)metricType, (uint32_t)metricIndex);
return nullptr;
}
inline const char* ToCString(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_HwUnit hwUnit)
{
NVPW_MetricsEvaluator_HwUnitToString_Params params{ NVPW_MetricsEvaluator_HwUnitToString_Params_STRUCT_SIZE };
params.pMetricsEvaluator = pMetricsEvaluator;
params.hwUnit = hwUnit;
NVPA_Status status = NVPW_MetricsEvaluator_HwUnitToString(&params);
if (status != NVPA_STATUS_SUCCESS)
{
NV_PERF_LOG_WRN(50, "NVPW_MetricsEvaluator_HwUnitToString failed for hwUnit: %u\n", hwUnit);
return nullptr;
}
return params.pHwUnitName;
}
inline NVPW_HwUnit GetMetricHwUnit(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, size_t metricIndex)
{
if (metricType == NVPW_METRIC_TYPE_COUNTER)
{
NVPW_MetricsEvaluator_GetCounterProperties_Params params{ NVPW_MetricsEvaluator_GetCounterProperties_Params_STRUCT_SIZE };
params.pMetricsEvaluator = pMetricsEvaluator;
params.counterIndex = metricIndex;
NVPA_Status status = NVPW_MetricsEvaluator_GetCounterProperties(&params);
if (status == NVPA_STATUS_SUCCESS)
{
return static_cast<NVPW_HwUnit>(params.hwUnit);
}
}
else if (metricType == NVPW_METRIC_TYPE_RATIO)
{
NVPW_MetricsEvaluator_GetRatioMetricProperties_Params params{ NVPW_MetricsEvaluator_GetRatioMetricProperties_Params_STRUCT_SIZE };
params.pMetricsEvaluator = pMetricsEvaluator;
params.ratioMetricIndex = metricIndex;
NVPA_Status status = NVPW_MetricsEvaluator_GetRatioMetricProperties(&params);
if (status == NVPA_STATUS_SUCCESS)
{
return static_cast<NVPW_HwUnit>(params.hwUnit);
}
}
else if (metricType == NVPW_METRIC_TYPE_THROUGHPUT)
{
NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params params{ NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params_STRUCT_SIZE };
params.pMetricsEvaluator = pMetricsEvaluator;
params.throughputMetricIndex = metricIndex;
NVPA_Status status = NVPW_MetricsEvaluator_GetThroughputMetricProperties(&params);
if (status == NVPA_STATUS_SUCCESS)
{
return static_cast<NVPW_HwUnit>(params.hwUnit);
}
}
NV_PERF_LOG_WRN(50, "GetMetricHwUnit failed for metricType = %u, metricIndex = %u\n", (uint32_t)metricType, (uint32_t)metricIndex);
return NVPW_HW_UNIT_INVALID;
}
inline const char* GetMetricHwUnitStr(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, size_t metricIndex)
{
const NVPW_HwUnit hwUnit = GetMetricHwUnit(pMetricsEvaluator, metricType, metricIndex);
const char* pHwUnitStr = ToCString(pMetricsEvaluator, hwUnit);
return pHwUnitStr;
}
inline const char* ToCString(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_DimUnitName dimUnit, bool plural)
{
NVPW_MetricsEvaluator_DimUnitToString_Params dimUnitToStringParams = { NVPW_MetricsEvaluator_DimUnitToString_Params_STRUCT_SIZE };
dimUnitToStringParams.pMetricsEvaluator = pMetricsEvaluator;
dimUnitToStringParams.dimUnit = static_cast<uint32_t>(dimUnit);
NVPA_Status status = NVPW_MetricsEvaluator_DimUnitToString(&dimUnitToStringParams);
if (status != NVPA_STATUS_SUCCESS)
{
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_DimUnitToString failed for dimUnit = %u\n", dimUnit);
return "";
}
const char* pDimUnitStr = plural? dimUnitToStringParams.pPluralName : dimUnitToStringParams.pSingularName;
return pDimUnitStr;
}
// `getDimUnitStrFunctor` must be in the form of const char*(NVPW_DimUnitName dimUnit, bool plural)
template <typename GetDimUnitStrFunctor>
inline std::string ToString(const std::vector<NVPW_DimUnitFactor>& dimUnitFactors, GetDimUnitStrFunctor&& getDimUnitStrFunctor)
{
if (dimUnitFactors.empty())
{
return "<unitless>";
}
std::stringstream sstream;
size_t numeratorCount = 0;
size_t denominatorCount = 0;
auto isNumerator = [](const NVPW_DimUnitFactor& dimUnitFactor) {
return dimUnitFactor.exponent > 0;
};
// if printNumerator == false, print the denominator
auto printFormattedDimUnits = [&](size_t count, bool printNumerator) {
if (count > 1)
{
sstream << "(";
}
bool isFirst = true;
for (const NVPW_DimUnitFactor& dimUnitFactor : dimUnitFactors)
{
if (printNumerator != isNumerator(dimUnitFactor))
{
continue;
}
if (!isFirst)
{
sstream << " * ";
}
const bool plural = printNumerator;
sstream << getDimUnitStrFunctor(static_cast<NVPW_DimUnitName>(dimUnitFactor.dimUnit), plural);
if (std::abs(dimUnitFactor.exponent) != 1)
{
sstream << "^" << (uint32_t)std::abs(dimUnitFactor.exponent);
}
isFirst = false;
}
if (count > 1)
{
sstream << ")";
}
};
for (const NVPW_DimUnitFactor& dimUnitFactor : dimUnitFactors)
{
isNumerator(dimUnitFactor) ? ++numeratorCount : ++denominatorCount;
}
if (numeratorCount)
{
const bool printNumerator = true;
printFormattedDimUnits(numeratorCount, printNumerator);
}
else
{
sstream << "1";
}
if (denominatorCount)
{
sstream << " / ";
const bool printNumerator = false;
printFormattedDimUnits(denominatorCount, printNumerator);
}
return sstream.str();
}
}}

View File

@@ -0,0 +1,185 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "NvPerfInit.h"
#include "NvPerfDeviceProperties.h"
#include "nvperf_opengl_host.h"
#include "nvperf_opengl_target.h"
#include "GL/gl.h"
#include <string.h>
namespace nv { namespace perf {
// OpenGL Only Utilities
//
inline std::string OpenGLGetDeviceName()
{
const GLubyte* pRenderer = glGetString(GL_RENDERER);
if (!pRenderer)
{
return "";
}
return (const char*) pRenderer;
}
inline bool OpenGLIsNvidiaDevice()
{
const GLubyte* pVendor = glGetString(GL_VENDOR);
if (!pVendor)
{
return false;
}
if (strstr((const char*)pVendor, "NVIDIA"))
{
return true;
}
return false;
}
inline bool OpenGLLoadDriver()
{
NVPW_OpenGL_LoadDriver_Params loadDriverParams = { NVPW_OpenGL_LoadDriver_Params_STRUCT_SIZE };
NVPA_Status nvpaStatus = NVPW_OpenGL_LoadDriver(&loadDriverParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_LoadDriver failed\n");
return false;
}
return true;
}
inline size_t OpenGLGetNvperfDeviceIndex(size_t sliIndex = 0)
{
NVPW_OpenGL_GraphicsContext_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_OpenGL_GraphicsContext_GetDeviceIndex_Params_STRUCT_SIZE };
getDeviceIndexParams.sliIndex = sliIndex;
NVPA_Status nvpaStatus = NVPW_OpenGL_GraphicsContext_GetDeviceIndex(&getDeviceIndexParams);
if (nvpaStatus)
{
return ~size_t(0);
}
return getDeviceIndexParams.deviceIndex;
}
inline DeviceIdentifiers OpenGLGetDeviceIdentifiers(size_t sliIndex = 0)
{
const size_t deviceIndex = OpenGLGetNvperfDeviceIndex(sliIndex);
DeviceIdentifiers deviceIdentifiers = GetDeviceIdentifiers(deviceIndex);
return deviceIdentifiers;
}
inline NVPW_Device_ClockStatus OpenGLGetDeviceClockState()
{
size_t nvperfDeviceIndex = OpenGLGetNvperfDeviceIndex();
return GetDeviceClockState(nvperfDeviceIndex);
}
inline bool OpenGLSetDeviceClockState(NVPW_Device_ClockSetting clockStatus)
{
size_t nvperfDeviceIndex = OpenGLGetNvperfDeviceIndex();
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
}
inline bool OpenGLSetDeviceClockState(NVPW_Device_ClockStatus clockStatus)
{
size_t nvperfDeviceIndex = OpenGLGetNvperfDeviceIndex();
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
}
inline size_t OpenGLCalculateMetricsEvaluatorScratchBufferSize(const char* pChipName)
{
NVPW_OpenGL_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParams = { NVPW_OpenGL_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE };
calculateScratchBufferSizeParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_OpenGL_MetricsEvaluator_CalculateScratchBufferSize(&calculateScratchBufferSizeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_OpenGL_MetricsEvaluator_CalculateScratchBufferSize failed\n");
return 0;
}
return calculateScratchBufferSizeParams.scratchBufferSize;
}
inline NVPW_MetricsEvaluator* OpenGLCreateMetricsEvaluator(uint8_t* pScratchBuffer, size_t scratchBufferSize, const char* pChipName)
{
NVPW_OpenGL_MetricsEvaluator_Initialize_Params initializeParams = { NVPW_OpenGL_MetricsEvaluator_Initialize_Params_STRUCT_SIZE };
initializeParams.pScratchBuffer = pScratchBuffer;
initializeParams.scratchBufferSize = scratchBufferSize;
initializeParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_OpenGL_MetricsEvaluator_Initialize(&initializeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_OpenGL_MetricsEvaluator_Initialize failed\n");
return nullptr;
}
return initializeParams.pMetricsEvaluator;
}
}}
namespace nv { namespace perf { namespace profiler {
inline NVPA_RawMetricsConfig* OpenGLCreateRawMetricsConfig(const char* pChipName)
{
NVPW_OpenGL_RawMetricsConfig_Create_Params configParams = { NVPW_OpenGL_RawMetricsConfig_Create_Params_STRUCT_SIZE };
configParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
configParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_OpenGL_RawMetricsConfig_Create(&configParams);
if (nvpaStatus)
{
return nullptr;
}
return configParams.pRawMetricsConfig;
}
inline bool OpenGLIsGpuSupported(size_t sliIndex = 0)
{
const size_t deviceIndex = OpenGLGetNvperfDeviceIndex(sliIndex);
NVPW_OpenGL_Profiler_IsGpuSupported_Params params = { NVPW_OpenGL_Profiler_IsGpuSupported_Params_STRUCT_SIZE };
params.deviceIndex = deviceIndex;
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_IsGpuSupported(&params);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_Profiler_IsGpuSupported failed on %s\n", OpenGLGetDeviceName().c_str());
return false;
}
if (!params.isSupported)
{
NV_PERF_LOG_ERR(10, "%s is not supported\n", OpenGLGetDeviceName().c_str());
if (params.gpuArchitectureSupportLevel != NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED)
{
const DeviceIdentifiers deviceIdentifiers = OpenGLGetDeviceIdentifiers(sliIndex);
NV_PERF_LOG_ERR(10, "Unsupported GPU architecture %s\n", deviceIdentifiers.pChipName);
}
if (params.sliSupportLevel == NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED)
{
NV_PERF_LOG_ERR(10, "Devices in SLI configuration are not supported.\n");
}
return false;
}
return true;
}
}}}

View File

@@ -0,0 +1,336 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <list>
#include <utility>
#include <vector>
#ifdef __linux__
#include <sys/stat.h>
#endif
#include "NvPerfCounterData.h"
#include "NvPerfCounterConfiguration.h"
namespace nv { namespace perf { namespace profiler {
// safe defaults for realtime
struct SessionOptions
{
size_t maxNumRanges = 16;
size_t avgRangeNameLength = 128;
size_t numTraceBuffers = 5; // recommended: SwapChainDepth + 2
};
struct SetConfigParams
{
const uint8_t* pConfigImage;
size_t configImageSize;
const uint8_t* pCounterDataPrefix;
size_t counterDataPrefixSize;
size_t numPipelinedPasses;
size_t numIsolatedPasses;
uint16_t numNestingLevels;
size_t numStatisticalSamples;
SetConfigParams()
: pConfigImage()
, configImageSize()
, pCounterDataPrefix()
, counterDataPrefixSize()
, numPipelinedPasses()
, numIsolatedPasses()
, numNestingLevels()
, numStatisticalSamples()
{
}
SetConfigParams(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
: pConfigImage(configuration.configImage.data())
, configImageSize(configuration.configImage.size())
, pCounterDataPrefix(configuration.counterDataPrefix.data())
, counterDataPrefixSize(configuration.counterDataPrefix.size())
, numPipelinedPasses(configuration.numPipelinedPasses)
, numIsolatedPasses(configuration.numIsolatedPasses)
, numNestingLevels(numNestingLevels)
, numStatisticalSamples(numStatisticalSamples)
{
}
};
// out-param from DecodeCounters
struct DecodeResult
{
bool onePassDecoded;
bool allPassesDecoded;
bool allStatisticalSamplesCollected;
std::vector<uint8_t> counterDataImage; // if allPassesDecoded is true, this will be non-empty
};
class RangeProfilerStateMachine
{
public: // types
struct IProfilerApi
{
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const = 0;
virtual bool SetConfig(const SetConfigParams& config) const = 0;
virtual bool BeginPass() const = 0;
virtual bool EndPass() const = 0;
virtual bool PushRange(const char* pRangeName) = 0;
virtual bool PopRange() = 0;
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const = 0;
};
protected: // types
struct CounterStateMachine
{
// state updated per-pass
size_t numPassesSubmitted; /// number of passes submitted (incremented at EndPass)
size_t numStatisticalSamplesCollected; /// number of times all passes were collected
// state derived from the configuration
size_t numPassesPerStatisticalSample; /// number of passes required by the {ConfigImage, numNestingLevels}
size_t numStatisticalSamplesRequired; /// number of repeated samplings required by SetConfig
std::vector<uint8_t> counterDataImage; /// opaque buffer containing HW counter data; updated in DecodeCounters on each frame
std::vector<uint8_t> counterDataScratch; /// opaque buffer needed by DecodeCounters
bool AllPassesSubmitted() const
{
const bool allPassesSubmitted = (numPassesSubmitted == numPassesPerStatisticalSample * numStatisticalSamplesRequired);
return allPassesSubmitted;
}
};
protected: // members
IProfilerApi& m_profilerApi;
bool m_inPass;
// Use std::list for stable iterators and a guarantee of no-copy.
typedef std::list<SetConfigParams> ConfigQueue;
typedef std::list<CounterStateMachine> CountersQueue;
bool m_needSetConfig;
ConfigQueue m_configQueue; // m_configQueue.front() is the active configuration (by SetConfig), and is popped after all passes are submitted
CountersQueue m_countersQueue; // queued CounterData, which may lag the configQueue when frames are rendered asynchronously
CountersQueue::iterator m_submitCounterItr; // points at the CounterData corresponding to m_configQueue.front()
private:
// non-copyable
RangeProfilerStateMachine(const RangeProfilerStateMachine&);
public:
~RangeProfilerStateMachine()
{
Reset();
}
RangeProfilerStateMachine(IProfilerApi& profilerApi)
: m_profilerApi(profilerApi)
, m_inPass(false)
, m_needSetConfig()
, m_configQueue()
, m_countersQueue()
, m_submitCounterItr()
{
}
void Reset()
{
m_submitCounterItr = {};
m_countersQueue.clear();
m_configQueue.clear();
m_needSetConfig = false;
m_inPass = false;
}
bool IsInPass() const
{
return m_inPass;
}
bool EnqueueCounterCollection(const SetConfigParams& config)
{
CounterStateMachine counterStateMachine = {};
counterStateMachine.numPassesPerStatisticalSample = config.numPipelinedPasses + config.numIsolatedPasses * config.numNestingLevels;
counterStateMachine.numStatisticalSamplesRequired = config.numStatisticalSamples;
if (!m_profilerApi.CreateCounterData(config, counterStateMachine.counterDataImage, counterStateMachine.counterDataScratch))
{
return false;
}
if (m_configQueue.empty())
{
m_needSetConfig = true;
}
m_configQueue.push_back(config);
const bool countersQueueWasEmpty = m_countersQueue.empty();
m_countersQueue.emplace_back(std::move(counterStateMachine));
if (countersQueueWasEmpty)
{
m_submitCounterItr = m_countersQueue.begin();
}
return true;
}
bool BeginPass()
{
if (m_inPass)
{
// TODO: error - must be called in session, but outside of a pass
return false;
}
if (m_configQueue.empty())
{
// Do not enqueue additional HW data collection.
return true;
}
if (m_needSetConfig)
{
if (!m_profilerApi.SetConfig(m_configQueue.front()))
{
return false;
}
m_needSetConfig = false;
}
if (!m_profilerApi.BeginPass())
{
return false;
}
m_inPass = true;
return true;
}
bool EndPass()
{
if (!m_inPass)
{
// TODO: error - must be called in session, and inside of a pass
return false;
}
if (m_configQueue.empty())
{
// Do not enqueue additional HW data collection.
return true;
}
if (!m_profilerApi.EndPass())
{
return false;
}
CounterStateMachine& counterStateMachine = *m_submitCounterItr;
counterStateMachine.numPassesSubmitted += 1;
if (counterStateMachine.AllPassesSubmitted())
{
++m_submitCounterItr;
m_configQueue.pop_front();
if (!m_configQueue.empty())
{
m_needSetConfig = true;
}
}
m_inPass = false;
return true;
}
bool PushRange(const char* pRangeName)
{
if (!m_inPass)
{
// TODO: error - must be called in session, and inside of a pass
return false;
}
if (m_configQueue.empty())
{
// Do not enqueue additional HW data collection.
return true;
}
if (!m_profilerApi.PushRange(pRangeName))
{
return false;
}
return true;
}
bool PopRange()
{
if (!m_inPass)
{
// TODO: error - must be called in session, and inside of a pass
return false;
}
if (m_configQueue.empty())
{
// Do not enqueue additional HW data collection.
return true;
}
if (!m_profilerApi.PopRange())
{
return false;
}
return true;
}
bool DecodeCounters(DecodeResult& decodeResult)
{
if (m_countersQueue.empty())
{
// TODO: error - nothing is queued for collection. see SetConfig ...
return false;
}
CounterStateMachine& counterStateMachine = m_countersQueue.front();
decodeResult = {};
if (!m_profilerApi.DecodeCounters(counterStateMachine.counterDataImage, counterStateMachine.counterDataScratch, decodeResult.onePassDecoded, decodeResult.allPassesDecoded))
{
// TODO: error - the session must be torn down
return false;
}
if (decodeResult.allPassesDecoded)
{
counterStateMachine.numStatisticalSamplesCollected += 1;
if (counterStateMachine.numStatisticalSamplesCollected == counterStateMachine.numStatisticalSamplesRequired)
{
decodeResult.allStatisticalSamplesCollected = true;
decodeResult.counterDataImage = std::move(counterStateMachine.counterDataImage);
m_countersQueue.pop_front();
}
}
return true;
}
bool AllPassesSubmitted() const
{
const bool allPassesSubmitted = m_configQueue.empty();
return allPassesSubmitted;
}
};
}}}

View File

@@ -0,0 +1,373 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "NvPerfRangeProfiler.h"
#include "NvPerfD3D11.h"
#include <atlbase.h>
namespace nv { namespace perf { namespace profiler {
class RangeProfilerD3D11
{
private:
struct ProfilerApi : RangeProfilerStateMachine::IProfilerApi
{
CComPtr<ID3D11DeviceContext> pDeviceContext;
SessionOptions sessionOptions;
ProfilerApi()
: pDeviceContext(nullptr)
, sessionOptions()
{
}
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const override
{
NVPA_Status nvpaStatus;
NVPW_D3D11_Profiler_CounterDataImageOptions counterDataImageOptions = { NVPW_D3D11_Profiler_CounterDataImageOptions_STRUCT_SIZE };
counterDataImageOptions.pCounterDataPrefix = config.pCounterDataPrefix;
counterDataImageOptions.counterDataPrefixSize = config.counterDataPrefixSize;
counterDataImageOptions.maxNumRanges = static_cast<uint32_t>(sessionOptions.maxNumRanges);
counterDataImageOptions.maxNumRangeTreeNodes = static_cast<uint32_t>(2 * sessionOptions.maxNumRanges);
counterDataImageOptions.maxRangeNameLength = static_cast<uint32_t>(sessionOptions.avgRangeNameLength);
NVPW_D3D11_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { NVPW_D3D11_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE };
calculateSizeParams.counterDataImageOptionsSize = NVPW_D3D11_Profiler_CounterDataImageOptions_STRUCT_SIZE;
calculateSizeParams.pOptions = &counterDataImageOptions;
nvpaStatus = NVPW_D3D11_Profiler_CounterDataImage_CalculateSize(&calculateSizeParams);
if (nvpaStatus)
{
return false;
}
counterDataImage.resize(calculateSizeParams.counterDataImageSize);
NVPW_D3D11_Profiler_CounterDataImage_Initialize_Params initializeParams = { NVPW_D3D11_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE };
initializeParams.counterDataImageOptionsSize = NVPW_D3D11_Profiler_CounterDataImageOptions_STRUCT_SIZE;
initializeParams.pOptions = &counterDataImageOptions;
initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
initializeParams.pCounterDataImage = &counterDataImage[0];
nvpaStatus = NVPW_D3D11_Profiler_CounterDataImage_Initialize(&initializeParams);
if (nvpaStatus)
{
return false;
}
NVPW_D3D11_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = { NVPW_D3D11_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE };
scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage;
nvpaStatus = NVPW_D3D11_Profiler_CounterDataImage_CalculateScratchBufferSize(&scratchBufferSizeParams);
if (nvpaStatus)
{
return false;
}
counterDataScratch.resize(scratchBufferSizeParams.counterDataScratchBufferSize);
NVPW_D3D11_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { NVPW_D3D11_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE };
initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage;
initScratchBufferParams.counterDataScratchBufferSize = scratchBufferSizeParams.counterDataScratchBufferSize;
initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratch[0];
nvpaStatus = NVPW_D3D11_Profiler_CounterDataImage_InitializeScratchBuffer(&initScratchBufferParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool SetConfig(const SetConfigParams& config) const override
{
NVPW_D3D11_Profiler_DeviceContext_SetConfig_Params setConfigParams = { NVPW_D3D11_Profiler_DeviceContext_SetConfig_Params_STRUCT_SIZE };
setConfigParams.pDeviceContext = pDeviceContext;
setConfigParams.pConfig = config.pConfigImage;
setConfigParams.configSize = config.configImageSize;
setConfigParams.minNestingLevel = 1;
setConfigParams.numNestingLevels = config.numNestingLevels;
setConfigParams.passIndex = 0;
setConfigParams.targetNestingLevel = 1;
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_SetConfig(&setConfigParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool BeginPass() const override
{
NVPW_D3D11_Profiler_DeviceContext_BeginPass_Params beginPassParams = { NVPW_D3D11_Profiler_DeviceContext_BeginPass_Params_STRUCT_SIZE };
beginPassParams.pDeviceContext = pDeviceContext;
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_BeginPass(&beginPassParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool EndPass() const override
{
NVPW_D3D11_Profiler_DeviceContext_EndPass_Params endPassParams = { NVPW_D3D11_Profiler_DeviceContext_EndPass_Params_STRUCT_SIZE };
endPassParams.pDeviceContext = pDeviceContext;
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_EndPass(&endPassParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool PushRange(const char* pRangeName) override
{
NVPW_D3D11_Profiler_DeviceContext_PushRange_Params pushRangeParams = { NVPW_D3D11_Profiler_DeviceContext_PushRange_Params_STRUCT_SIZE };
pushRangeParams.pDeviceContext = pDeviceContext;
pushRangeParams.pRangeName = pRangeName;
pushRangeParams.rangeNameLength = 0;
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_PushRange(&pushRangeParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool PopRange() override
{
NVPW_D3D11_Profiler_DeviceContext_PopRange_Params popParams = { NVPW_D3D11_Profiler_DeviceContext_PopRange_Params_STRUCT_SIZE };
popParams.pDeviceContext = pDeviceContext;
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_PopRange(&popParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const
{
NVPW_D3D11_Profiler_DeviceContext_DecodeCounters_Params decodeParams = { NVPW_D3D11_Profiler_DeviceContext_DecodeCounters_Params_STRUCT_SIZE };
decodeParams.pDeviceContext = pDeviceContext;
decodeParams.counterDataImageSize = counterDataImage.size();
decodeParams.pCounterDataImage = counterDataImage.data();
decodeParams.counterDataScratchBufferSize = counterDataScratch.size();
decodeParams.pCounterDataScratchBuffer = counterDataScratch.data();
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_DecodeCounters(&decodeParams);
if (nvpaStatus)
{
return false;
}
onePassDecoded = decodeParams.onePassCollected;
allPassesDecoded = decodeParams.allPassesCollected;
return true;
}
bool Initialize(ID3D11DeviceContext* pDeviceContext_, SessionOptions sessionOptions_)
{
pDeviceContext = pDeviceContext_;
sessionOptions = sessionOptions_;
}
void Reset()
{
NVPW_D3D11_Profiler_DeviceContext_EndSession_Params endSessionParams = {NVPW_D3D11_Profiler_DeviceContext_EndSession_Params_STRUCT_SIZE};
endSessionParams.pDeviceContext = pDeviceContext;
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_EndSession(&endSessionParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_D3D11_Profiler_DeviceContext_EndSession failed, nvpaStatus = %d\n", nvpaStatus);
}
sessionOptions = {};
pDeviceContext = nullptr;
}
};
private:
ProfilerApi m_profilerApi;
RangeProfilerStateMachine m_stateMachine;
public:
~RangeProfilerD3D11()
{
}
RangeProfilerD3D11(const RangeProfilerD3D11&) = delete;
RangeProfilerD3D11()
: m_profilerApi()
, m_stateMachine(m_profilerApi)
{
}
// TODO: make this move friendly
RangeProfilerD3D11& operator=(const RangeProfilerD3D11&) = delete;
bool IsInSession() const
{
return !!m_profilerApi.pDeviceContext;
}
bool IsInPass() const
{
return m_stateMachine.IsInPass();
}
ID3D11DeviceContext* GetDeviceContext() const
{
return m_profilerApi.pDeviceContext;
}
bool BeginSession(ID3D11DeviceContext* pDeviceContext, const SessionOptions& sessionOptions)
{
if (IsInSession())
{
NV_PERF_LOG_ERR(10, "already in a session\n");
return false;
}
if (!nv::perf::D3D11IsNvidiaDevice(pDeviceContext) || !nv::perf::profiler::D3D11IsGpuSupported(pDeviceContext))
{
NV_PERF_LOG_ERR(10, "device is not supported for profiling\n");
return false;
}
NVPA_Status nvpaStatus;
NVPW_D3D11_Profiler_CalcTraceBufferSize_Params calcTraceBufferSizeParam = { NVPW_D3D11_Profiler_CalcTraceBufferSize_Params_STRUCT_SIZE };
calcTraceBufferSizeParam.maxRangesPerPass = sessionOptions.maxNumRanges;
calcTraceBufferSizeParam.avgRangeNameLength = sessionOptions.avgRangeNameLength;
nvpaStatus = NVPW_D3D11_Profiler_CalcTraceBufferSize(&calcTraceBufferSizeParam);
if (nvpaStatus)
{
return false;
}
NVPW_D3D11_Profiler_DeviceContext_BeginSession_Params beginSessionParams = { NVPW_D3D11_Profiler_DeviceContext_BeginSession_Params_STRUCT_SIZE };
beginSessionParams.pDeviceContext = pDeviceContext;
beginSessionParams.numTraceBuffers = sessionOptions.numTraceBuffers;
beginSessionParams.traceBufferSize = calcTraceBufferSizeParam.traceBufferSize;
beginSessionParams.maxRangesPerPass = sessionOptions.maxNumRanges;
beginSessionParams.maxLaunchesPerPass = sessionOptions.maxNumRanges;
nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_BeginSession(&beginSessionParams);
if (nvpaStatus)
{
if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_PRIVILEGE)
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: profiling permissions not enabled. Please follow these instructions: https://developer.nvidia.com/ERR_NVGPUCTRPERM\n");
}
else if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION)
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: insufficient driver version. Please install the latest NVIDIA driver from https://www.nvidia.com\n");
}
else
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: unknown error. It may be a resource conflict - only one profiler session can run at a time per GPU.\n");
}
return false;
}
m_profilerApi.sessionOptions = sessionOptions;
m_profilerApi.pDeviceContext = pDeviceContext;
return true;
}
bool EndSession()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
m_stateMachine.Reset();
m_profilerApi.Reset();
return true;
}
bool EnqueueCounterCollection(const SetConfigParams& config)
{
const bool status = m_stateMachine.EnqueueCounterCollection(config);
return status;
}
bool EnqueueCounterCollection(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
{
const bool status = m_stateMachine.EnqueueCounterCollection(SetConfigParams(configuration, numNestingLevels, numStatisticalSamples));
return status;
}
bool BeginPass()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.BeginPass();
return status;
}
bool EndPass()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.EndPass();
return status;
}
bool PushRange(const char* pRangeName)
{
const bool status = m_stateMachine.PushRange(pRangeName);
return status;
}
bool PopRange()
{
const bool status = m_stateMachine.PopRange();
return status;
}
bool DecodeCounters(DecodeResult& decodeResult)
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.DecodeCounters(decodeResult);
return status;
}
bool AllPassesSubmitted() const
{
const bool allPassesSubmitted = m_stateMachine.AllPassesSubmitted();
return allPassesSubmitted;
}
};
}}}

View File

@@ -0,0 +1,419 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <stdio.h>
#include <thread>
#include <vector>
#include "NvPerfInit.h"
#include "NvPerfCounterConfiguration.h"
#include "NvPerfRangeProfiler.h"
#include "NvPerfD3D12.h"
struct ID3D12CommandQueue;
namespace nv { namespace perf { namespace profiler {
class RangeProfilerD3D12
{
protected:
struct ProfilerApi : RangeProfilerStateMachine::IProfilerApi
{
CComPtr<ID3D12CommandQueue> pCommandQueue;
SessionOptions sessionOptions;
ProfilerApi()
: pCommandQueue(nullptr)
, sessionOptions()
{
}
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const override
{
NVPA_Status nvpaStatus;
NVPW_D3D12_Profiler_CounterDataImageOptions counterDataImageOptions = { NVPW_D3D12_Profiler_CounterDataImageOptions_STRUCT_SIZE };
counterDataImageOptions.pCounterDataPrefix = config.pCounterDataPrefix;
counterDataImageOptions.counterDataPrefixSize = config.counterDataPrefixSize;
counterDataImageOptions.maxNumRanges = static_cast<uint32_t>(sessionOptions.maxNumRanges);
counterDataImageOptions.maxNumRangeTreeNodes = static_cast<uint32_t>(2 * sessionOptions.maxNumRanges);
counterDataImageOptions.maxRangeNameLength = static_cast<uint32_t>(sessionOptions.avgRangeNameLength);
NVPW_D3D12_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { NVPW_D3D12_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE };
calculateSizeParams.pOptions = &counterDataImageOptions;
calculateSizeParams.counterDataImageOptionsSize = NVPW_D3D12_Profiler_CounterDataImageOptions_STRUCT_SIZE;
nvpaStatus = NVPW_D3D12_Profiler_CounterDataImage_CalculateSize(&calculateSizeParams);
if (nvpaStatus)
{
return false;
}
counterDataImage.resize(calculateSizeParams.counterDataImageSize);
NVPW_D3D12_Profiler_CounterDataImage_Initialize_Params initializeParams = { NVPW_D3D12_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE };
initializeParams.counterDataImageOptionsSize = NVPW_D3D12_Profiler_CounterDataImageOptions_STRUCT_SIZE;
initializeParams.pOptions = &counterDataImageOptions;
initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
initializeParams.pCounterDataImage = &counterDataImage[0];
nvpaStatus = NVPW_D3D12_Profiler_CounterDataImage_Initialize(&initializeParams);
if (nvpaStatus)
{
return false;
}
NVPW_D3D12_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = { NVPW_D3D12_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE };
scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage;
nvpaStatus = NVPW_D3D12_Profiler_CounterDataImage_CalculateScratchBufferSize(&scratchBufferSizeParams);
if (nvpaStatus)
{
return false;
}
counterDataScratch.resize(scratchBufferSizeParams.counterDataScratchBufferSize);
NVPW_D3D12_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { NVPW_D3D12_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE };
initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage;
initScratchBufferParams.counterDataScratchBufferSize = scratchBufferSizeParams.counterDataScratchBufferSize;
initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratch[0];
nvpaStatus = NVPW_D3D12_Profiler_CounterDataImage_InitializeScratchBuffer(&initScratchBufferParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool SetConfig(const SetConfigParams& config) const override
{
NVPW_D3D12_Profiler_Queue_SetConfig_Params setConfigParams = { NVPW_D3D12_Profiler_Queue_SetConfig_Params_STRUCT_SIZE };
setConfigParams.pCommandQueue = pCommandQueue;
setConfigParams.pConfig = config.pConfigImage;
setConfigParams.configSize = config.configImageSize;
setConfigParams.minNestingLevel = 1;
setConfigParams.numNestingLevels = config.numNestingLevels;
setConfigParams.passIndex = 0;
setConfigParams.targetNestingLevel = 1;
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_SetConfig(&setConfigParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool BeginPass() const override
{
NVPW_D3D12_Profiler_Queue_BeginPass_Params beginPassParams = { NVPW_D3D12_Profiler_Queue_BeginPass_Params_STRUCT_SIZE };
beginPassParams.pCommandQueue = pCommandQueue;
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_BeginPass(&beginPassParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool EndPass() const override
{
NVPW_D3D12_Profiler_Queue_EndPass_Params endPassParams = { NVPW_D3D12_Profiler_Queue_EndPass_Params_STRUCT_SIZE };
endPassParams.pCommandQueue = pCommandQueue;
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_EndPass(&endPassParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool PushRange(const char* pRangeName) override
{
NVPW_D3D12_Profiler_Queue_PushRange_Params pushRangeParams = {NVPW_D3D12_Profiler_Queue_PushRange_Params_STRUCT_SIZE};
pushRangeParams.pRangeName = pRangeName;
pushRangeParams.rangeNameLength = 0;
pushRangeParams.pCommandQueue = pCommandQueue;
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_PushRange(&pushRangeParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool PopRange() override
{
NVPW_D3D12_Profiler_Queue_PopRange_Params popParams = {NVPW_D3D12_Profiler_Queue_PopRange_Params_STRUCT_SIZE};
popParams.pCommandQueue = pCommandQueue;
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_PopRange(&popParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const
{
NVPW_D3D12_Profiler_Queue_DecodeCounters_Params decodeParams = { NVPW_D3D12_Profiler_Queue_DecodeCounters_Params_STRUCT_SIZE };
decodeParams.pCommandQueue = pCommandQueue;
decodeParams.counterDataImageSize = counterDataImage.size();
decodeParams.pCounterDataImage = counterDataImage.data();
decodeParams.counterDataScratchBufferSize = counterDataScratch.size();
decodeParams.pCounterDataScratchBuffer = counterDataScratch.data();
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_DecodeCounters(&decodeParams);
if (nvpaStatus)
{
return false;
}
onePassDecoded = decodeParams.onePassCollected;
allPassesDecoded = decodeParams.allPassesCollected;
return true;
}
bool Initialize(ID3D12CommandQueue* pCommandQueue_, const SessionOptions& sessionOptions_)
{
pCommandQueue = pCommandQueue_;
sessionOptions = sessionOptions_;
return true;
}
void Reset()
{
NVPW_D3D12_Profiler_Queue_EndSession_Params endSessionParams = {NVPW_D3D12_Profiler_Queue_EndSession_Params_STRUCT_SIZE};
endSessionParams.pCommandQueue = pCommandQueue;
endSessionParams.timeout = INFINITE;
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_EndSession(&endSessionParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_D3D12_Profiler_Queue_EndSession failed, nvpaStatus = %d\n", nvpaStatus);
}
sessionOptions = {};
pCommandQueue = nullptr;
}
};
protected: // members
ProfilerApi m_profilerApi;
RangeProfilerStateMachine m_stateMachine;
std::thread m_spgoThread;
volatile bool m_spgoThreadExited;
private:
// non-copyable
RangeProfilerD3D12(const RangeProfilerD3D12&);
static void SpgoThreadProc(RangeProfilerD3D12* pRangeProfilerD3D12, ID3D12CommandQueue* pCommandQueue)
{
// Run continuously in the background, handling all BeginPass and EndPass GPU operations until EndSession().
NVPW_D3D12_Queue_ServicePendingGpuOperations_Params serviceGpuOpsParams = { NVPW_D3D12_Queue_ServicePendingGpuOperations_Params_STRUCT_SIZE };
serviceGpuOpsParams.pCommandQueue = pCommandQueue;
serviceGpuOpsParams.numOperations = 0; // run until EndSession()
serviceGpuOpsParams.timeout = INFINITE;
NVPA_Status nvpaStatus = NVPW_D3D12_Queue_ServicePendingGpuOperations(&serviceGpuOpsParams);
if (nvpaStatus)
{
// TODO: log an error
}
pRangeProfilerD3D12->m_spgoThreadExited = true;
}
public:
~RangeProfilerD3D12()
{
}
RangeProfilerD3D12()
: m_profilerApi()
, m_stateMachine(m_profilerApi)
, m_spgoThread()
, m_spgoThreadExited()
{
}
// TODO: make this move friendly
bool IsInSession() const
{
return !!m_profilerApi.pCommandQueue;
}
bool IsInPass() const
{
return m_stateMachine.IsInPass();
}
ID3D12CommandQueue* GetCommandQueue() const
{
return m_profilerApi.pCommandQueue;
}
bool BeginSession(
ID3D12CommandQueue* pCommandQueue,
const SessionOptions& sessionOptions)
{
if (IsInSession())
{
NV_PERF_LOG_ERR(10, "already in a session\n");
return false;
}
if (!D3D12IsNvidiaDevice(pCommandQueue) || !D3D12IsGpuSupported(pCommandQueue))
{
// TODO: error - device is not supported for profiling
return false;
}
NVPA_Status nvpaStatus;
NVPW_D3D12_Profiler_CalcTraceBufferSize_Params calcTraceBufferSizeParam = { NVPW_D3D12_Profiler_CalcTraceBufferSize_Params_STRUCT_SIZE };
calcTraceBufferSizeParam.maxRangesPerPass = sessionOptions.maxNumRanges;
calcTraceBufferSizeParam.avgRangeNameLength = sessionOptions.avgRangeNameLength;
nvpaStatus = NVPW_D3D12_Profiler_CalcTraceBufferSize(&calcTraceBufferSizeParam);
if (nvpaStatus)
{
return false;
}
NVPW_D3D12_Profiler_Queue_BeginSession_Params beginSessionParams = { NVPW_D3D12_Profiler_Queue_BeginSession_Params_STRUCT_SIZE };
beginSessionParams.pCommandQueue = pCommandQueue;
beginSessionParams.numTraceBuffers = sessionOptions.numTraceBuffers;
beginSessionParams.traceBufferSize = calcTraceBufferSizeParam.traceBufferSize;
beginSessionParams.maxRangesPerPass = sessionOptions.maxNumRanges;
beginSessionParams.maxLaunchesPerPass = sessionOptions.maxNumRanges;
nvpaStatus = NVPW_D3D12_Profiler_Queue_BeginSession(&beginSessionParams);
if (nvpaStatus)
{
if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_PRIVILEGE)
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: profiling permissions not enabled. Please follow these instructions: https://developer.nvidia.com/ERR_NVGPUCTRPERM\n");
}
else if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION)
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: insufficient driver version. Please install the latest NVIDIA driver from https://www.nvidia.com\n");
}
else
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: unknown error. It may be a resource conflict - only one profiler session can run at a time per GPU.\n");
}
return false;
}
m_spgoThreadExited = false;
m_spgoThread = std::thread(SpgoThreadProc, this, pCommandQueue);
m_profilerApi.Initialize(pCommandQueue, sessionOptions);
return true;
}
bool EndSession()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
m_stateMachine.Reset();
m_profilerApi.Reset();
m_spgoThread.join();
m_spgoThreadExited = false;
return true;
}
bool EnqueueCounterCollection(const SetConfigParams& config)
{
const bool status = m_stateMachine.EnqueueCounterCollection(config);
return status;
}
bool EnqueueCounterCollection(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
{
const bool status = m_stateMachine.EnqueueCounterCollection(SetConfigParams(configuration, numNestingLevels, numStatisticalSamples));
return status;
}
bool BeginPass()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.BeginPass();
return status;
}
bool EndPass()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.EndPass();
return status;
}
// Convenience method to start a Queue-level range. For CommandLists, use D3D12RangeCommands::PushRange.
bool PushRange(const char* pRangeName)
{
const bool status = m_stateMachine.PushRange(pRangeName);
return status;
}
// Convenience method to end a Queue-level range. For CommandLists, use D3D12RangeCommands::PopRange.
bool PopRange()
{
const bool status = m_stateMachine.PopRange();
return status;
}
bool DecodeCounters(DecodeResult& decodeResult)
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
if (m_spgoThreadExited)
{
NV_PERF_LOG_ERR(10, "the background thread exited; possible hang on subsequent CPU-waiting-on-GPU calls\n");
return false;
}
const bool status = m_stateMachine.DecodeCounters(decodeResult);
return status;
}
bool AllPassesSubmitted() const
{
const bool allPassesSubmitted = m_stateMachine.AllPassesSubmitted();
return allPassesSubmitted;
}
};
}}}

View File

@@ -0,0 +1,401 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <stdio.h>
#include <vector>
#include "NvPerfInit.h"
#include "NvPerfCounterConfiguration.h"
#include "NvPerfRangeProfiler.h"
#include "NvPerfOpenGL.h"
namespace nv { namespace perf { namespace profiler {
class RangeProfilerOpenGL
{
protected:
struct ProfilerApi : RangeProfilerStateMachine::IProfilerApi
{
size_t maxQueueRangesPerPass;
size_t nextCommandBufferIdx;
SessionOptions sessionOptions;
NVPW_OpenGL_GraphicsContext* pGraphicsContext;
ProfilerApi()
: maxQueueRangesPerPass(1)
, nextCommandBufferIdx()
, sessionOptions()
, pGraphicsContext()
{
}
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const override
{
NVPA_Status nvpaStatus;
NVPW_OpenGL_Profiler_CounterDataImageOptions counterDataImageOption = { NVPW_OpenGL_Profiler_CounterDataImageOptions_STRUCT_SIZE };
counterDataImageOption.pCounterDataPrefix = config.pCounterDataPrefix;
counterDataImageOption.counterDataPrefixSize = config.counterDataPrefixSize;
counterDataImageOption.maxNumRanges = static_cast<uint32_t>(sessionOptions.maxNumRanges);
counterDataImageOption.maxNumRangeTreeNodes = static_cast<uint32_t>(2 * sessionOptions.maxNumRanges);
counterDataImageOption.maxRangeNameLength = static_cast<uint32_t>(sessionOptions.avgRangeNameLength);
NVPW_OpenGL_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { NVPW_OpenGL_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE };
calculateSizeParams.pOptions = &counterDataImageOption;
calculateSizeParams.counterDataImageOptionsSize = NVPW_OpenGL_Profiler_CounterDataImageOptions_STRUCT_SIZE;
nvpaStatus = NVPW_OpenGL_Profiler_CounterDataImage_CalculateSize(&calculateSizeParams);
if (nvpaStatus)
{
return false;
}
NVPW_OpenGL_Profiler_CounterDataImage_Initialize_Params initializeParams = { NVPW_OpenGL_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE };
initializeParams.counterDataImageOptionsSize = NVPW_OpenGL_Profiler_CounterDataImageOptions_STRUCT_SIZE;
initializeParams.pOptions = &counterDataImageOption;
initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
counterDataImage.resize(calculateSizeParams.counterDataImageSize);
initializeParams.pCounterDataImage = &counterDataImage[0];
nvpaStatus = NVPW_OpenGL_Profiler_CounterDataImage_Initialize(&initializeParams);
if (nvpaStatus)
{
return false;
}
NVPW_OpenGL_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = { NVPW_OpenGL_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE };
scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage;
nvpaStatus = NVPW_OpenGL_Profiler_CounterDataImage_CalculateScratchBufferSize(&scratchBufferSizeParams);
if (nvpaStatus)
{
return false;
}
counterDataScratch.resize(scratchBufferSizeParams.counterDataScratchBufferSize);
NVPW_OpenGL_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { NVPW_OpenGL_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE };
initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage;
initScratchBufferParams.counterDataScratchBufferSize = scratchBufferSizeParams.counterDataScratchBufferSize;
initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratch[0];
nvpaStatus = NVPW_OpenGL_Profiler_CounterDataImage_InitializeScratchBuffer(&initScratchBufferParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool SetConfig(const SetConfigParams& config) const override
{
NVPW_OpenGL_Profiler_GraphicsContext_SetConfig_Params setConfigParams = { NVPW_OpenGL_Profiler_GraphicsContext_SetConfig_Params_STRUCT_SIZE };
setConfigParams.pConfig = config.pConfigImage;
setConfigParams.configSize = config.configImageSize;
setConfigParams.minNestingLevel = 1;
setConfigParams.numNestingLevels = config.numNestingLevels;
setConfigParams.passIndex = 0;
setConfigParams.targetNestingLevel = 1;
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_SetConfig(&setConfigParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool BeginPass() const override
{
NVPW_OpenGL_Profiler_GraphicsContext_BeginPass_Params beginPassParams = { NVPW_OpenGL_Profiler_GraphicsContext_BeginPass_Params_STRUCT_SIZE };
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_BeginPass(&beginPassParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool EndPass() const override
{
NVPW_OpenGL_Profiler_GraphicsContext_EndPass_Params endPassParams = { NVPW_OpenGL_Profiler_GraphicsContext_EndPass_Params_STRUCT_SIZE };
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_EndPass(&endPassParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool PushRange(const char* pRangeName) override
{
NVPW_OpenGL_Profiler_GraphicsContext_PushRange_Params pushRangeParams = {NVPW_OpenGL_Profiler_GraphicsContext_PushRange_Params_STRUCT_SIZE};
pushRangeParams.pRangeName = pRangeName;
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_PushRange(&pushRangeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_Profiler_GraphicsContext_PushRange failed, nvpaStatus = %d\n", nvpaStatus);
return false;
}
return true;
}
virtual bool PopRange() override
{
NVPW_OpenGL_Profiler_GraphicsContext_PopRange_Params popRangeParams = {NVPW_OpenGL_Profiler_GraphicsContext_PopRange_Params_STRUCT_SIZE};
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_PopRange(&popRangeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_Profiler_GraphicsContext_PopRange failed, nvpaStatus = %d\n", nvpaStatus);
return false;
}
return true;
}
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const
{
NVPW_OpenGL_Profiler_GraphicsContext_DecodeCounters_Params decodeParams = { NVPW_OpenGL_Profiler_GraphicsContext_DecodeCounters_Params_STRUCT_SIZE };
decodeParams.counterDataImageSize = counterDataImage.size();
decodeParams.pCounterDataImage = counterDataImage.data();
decodeParams.counterDataScratchBufferSize = counterDataScratch.size();
decodeParams.pCounterDataScratchBuffer = counterDataScratch.data();
decodeParams.pGraphicsContext = pGraphicsContext;
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_DecodeCounters(&decodeParams);
if (nvpaStatus)
{
return false;
}
onePassDecoded = decodeParams.onePassCollected;
allPassesDecoded = decodeParams.allPassesCollected;
return true;
}
bool Initialize(const SessionOptions& sessionOptions_)
{
NVPW_OpenGL_GetCurrentGraphicsContext_Params getCurrentGraphicsContextParams = {NVPW_OpenGL_GetCurrentGraphicsContext_Params_STRUCT_SIZE};
NVPA_Status nvpaStatus = NVPW_OpenGL_GetCurrentGraphicsContext(&getCurrentGraphicsContextParams);
if (nvpaStatus)
{
return false;
}
pGraphicsContext = getCurrentGraphicsContextParams.pGraphicsContext;
sessionOptions = sessionOptions_;
return true;
}
void Reset()
{
NVPW_OpenGL_Profiler_GraphicsContext_EndSession_Params endSessionParams = {NVPW_OpenGL_Profiler_GraphicsContext_EndSession_Params_STRUCT_SIZE};
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_EndSession(&endSessionParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_Profiler_GraphicsContext_EndSession failed, nvpaStatus = %d\n", nvpaStatus);
}
sessionOptions = {};
pGraphicsContext = nullptr;
}
};
protected: // members
ProfilerApi m_profilerApi;
RangeProfilerStateMachine m_stateMachine;
private:
// non-copyable
RangeProfilerOpenGL(const RangeProfilerOpenGL&);
public:
~RangeProfilerOpenGL()
{
}
RangeProfilerOpenGL()
: m_profilerApi()
, m_stateMachine(m_profilerApi)
{
}
bool IsInSession() const
{
return m_profilerApi.pGraphicsContext;
}
bool IsInPass() const
{
return m_stateMachine.IsInPass();
}
bool SetMaxQueueRangesPerPass(size_t maxQueueRangesPerPass)
{
if (IsInSession())
{
NV_PERF_LOG_ERR(10, "SetMaxQueueRangesPerPass must be called before the session starts.\n");
return false;
}
m_profilerApi.maxQueueRangesPerPass = maxQueueRangesPerPass;
return true;
}
bool BeginSession(
const SessionOptions& sessionOptions)
{
if (IsInSession())
{
NV_PERF_LOG_ERR(10, "already in a session\n");
return false;
}
if (!OpenGLIsNvidiaDevice() || !OpenGLIsGpuSupported())
{
// TODO: error - device is not supported for profiling
return false;
}
NVPA_Status nvpaStatus;
NVPW_OpenGL_Profiler_CalcTraceBufferSize_Params calcTraceBufferSizeParam = { NVPW_OpenGL_Profiler_CalcTraceBufferSize_Params_STRUCT_SIZE };
calcTraceBufferSizeParam.maxRangesPerPass = sessionOptions.maxNumRanges;
calcTraceBufferSizeParam.avgRangeNameLength = sessionOptions.avgRangeNameLength;
nvpaStatus = NVPW_OpenGL_Profiler_CalcTraceBufferSize(&calcTraceBufferSizeParam);
if (nvpaStatus)
{
return false;
}
NVPW_OpenGL_Profiler_GraphicsContext_BeginSession_Params beginSessionParams = { NVPW_OpenGL_Profiler_GraphicsContext_BeginSession_Params_STRUCT_SIZE };
beginSessionParams.numTraceBuffers = sessionOptions.numTraceBuffers;
beginSessionParams.traceBufferSize = calcTraceBufferSizeParam.traceBufferSize;
beginSessionParams.maxRangesPerPass = sessionOptions.maxNumRanges;
beginSessionParams.maxLaunchesPerPass = sessionOptions.maxNumRanges;
nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_BeginSession(&beginSessionParams);
if (nvpaStatus)
{
if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_PRIVILEGE)
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: profiling permissions not enabled. Please follow these instructions: https://developer.nvidia.com/nvidia-development-tools-solutions-ERR_NVGPUCTRPERM-permission-issue-performance-counters \n");
}
else if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION)
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: insufficient driver version. Please install the latest NVIDIA driver from https://www.nvidia.com \n");
}
else
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: unknown error. It may be a resource conflict - only one profiler session can run at a time per GPU.\n");
}
return false;
}
if(!m_profilerApi.Initialize(sessionOptions))
{
return false;
}
return true;
}
bool EndSession()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
m_stateMachine.Reset();
m_profilerApi.Reset();
return true;
}
bool EnqueueCounterCollection(const SetConfigParams& config)
{
const bool status = m_stateMachine.EnqueueCounterCollection(config);
return status;
}
bool EnqueueCounterCollection(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
{
const bool status = m_stateMachine.EnqueueCounterCollection(SetConfigParams(configuration, numNestingLevels, numStatisticalSamples));
return status;
}
bool BeginPass()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.BeginPass();
return status;
}
bool EndPass()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.EndPass();
return status;
}
bool PushRange(const char* pRangeName)
{
if (!IsInPass())
{
return true;
}
const bool status = m_stateMachine.PushRange(pRangeName);
return status;
}
bool PopRange()
{
if (!IsInPass())
{
return true;
}
const bool status = m_stateMachine.PopRange();
return status;
}
bool DecodeCounters(DecodeResult& decodeResult)
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.DecodeCounters(decodeResult);
return status;
}
bool AllPassesSubmitted() const
{
const bool allPassesSubmitted = m_stateMachine.AllPassesSubmitted();
return allPassesSubmitted;
}
};
}}}

View File

@@ -0,0 +1,574 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <stdio.h>
#include <thread>
#include <vector>
#include "NvPerfInit.h"
#include "NvPerfCounterConfiguration.h"
#include "NvPerfRangeProfiler.h"
#include "NvPerfVulkan.h"
namespace nv { namespace perf { namespace profiler {
class RangeProfilerVulkan
{
protected:
struct ProfilerApi : RangeProfilerStateMachine::IProfilerApi
{
VkQueue queue;
VkDevice device;
VkCommandPool commandPool;
size_t maxQueueRangesPerPass;
std::vector<VkCommandBuffer> rangeCommandBuffers;
std::vector<VkFence> rangeFences;
size_t nextCommandBufferIdx;
SessionOptions sessionOptions;
ProfilerApi()
: queue()
, device()
, commandPool()
, maxQueueRangesPerPass(1)
, nextCommandBufferIdx()
, sessionOptions()
{
}
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const override
{
NVPA_Status nvpaStatus;
NVPW_VK_Profiler_CounterDataImageOptions counterDataImageOptions = { NVPW_VK_Profiler_CounterDataImageOptions_STRUCT_SIZE };
counterDataImageOptions.pCounterDataPrefix = config.pCounterDataPrefix;
counterDataImageOptions.counterDataPrefixSize = config.counterDataPrefixSize;
counterDataImageOptions.maxNumRanges = static_cast<uint32_t>(sessionOptions.maxNumRanges);
counterDataImageOptions.maxNumRangeTreeNodes = static_cast<uint32_t>(2 * sessionOptions.maxNumRanges);
counterDataImageOptions.maxRangeNameLength = static_cast<uint32_t>(sessionOptions.avgRangeNameLength);
NVPW_VK_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { NVPW_VK_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE };
calculateSizeParams.pOptions = &counterDataImageOptions;
calculateSizeParams.counterDataImageOptionsSize = NVPW_VK_Profiler_CounterDataImageOptions_STRUCT_SIZE;
nvpaStatus = NVPW_VK_Profiler_CounterDataImage_CalculateSize(&calculateSizeParams);
if (nvpaStatus)
{
return false;
}
counterDataImage.resize(calculateSizeParams.counterDataImageSize);
NVPW_VK_Profiler_CounterDataImage_Initialize_Params initializeParams = { NVPW_VK_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE };
initializeParams.counterDataImageOptionsSize = NVPW_VK_Profiler_CounterDataImageOptions_STRUCT_SIZE;
initializeParams.pOptions = &counterDataImageOptions;
initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
initializeParams.pCounterDataImage = &counterDataImage[0];
nvpaStatus = NVPW_VK_Profiler_CounterDataImage_Initialize(&initializeParams);
if (nvpaStatus)
{
return false;
}
NVPW_VK_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = { NVPW_VK_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE };
scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage;
nvpaStatus = NVPW_VK_Profiler_CounterDataImage_CalculateScratchBufferSize(&scratchBufferSizeParams);
if (nvpaStatus)
{
return false;
}
counterDataScratch.resize(scratchBufferSizeParams.counterDataScratchBufferSize);
NVPW_VK_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { NVPW_VK_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE };
initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage;
initScratchBufferParams.counterDataScratchBufferSize = scratchBufferSizeParams.counterDataScratchBufferSize;
initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratch[0];
nvpaStatus = NVPW_VK_Profiler_CounterDataImage_InitializeScratchBuffer(&initScratchBufferParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool SetConfig(const SetConfigParams& config) const override
{
NVPW_VK_Profiler_Queue_SetConfig_Params setConfigParams = { NVPW_VK_Profiler_Queue_SetConfig_Params_STRUCT_SIZE };
setConfigParams.queue = queue;
setConfigParams.pConfig = config.pConfigImage;
setConfigParams.configSize = config.configImageSize;
setConfigParams.minNestingLevel = 1;
setConfigParams.numNestingLevels = config.numNestingLevels;
setConfigParams.passIndex = 0;
setConfigParams.targetNestingLevel = 1;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_SetConfig(&setConfigParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool BeginPass() const override
{
NVPW_VK_Profiler_Queue_BeginPass_Params beginPassParams = { NVPW_VK_Profiler_Queue_BeginPass_Params_STRUCT_SIZE };
beginPassParams.queue = queue;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_BeginPass(&beginPassParams);
if (nvpaStatus)
{
return false;
}
return true;
}
virtual bool EndPass() const override
{
NVPW_VK_Profiler_Queue_EndPass_Params endPassParams = { NVPW_VK_Profiler_Queue_EndPass_Params_STRUCT_SIZE };
endPassParams.queue = queue;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_EndPass(&endPassParams);
if (nvpaStatus)
{
return false;
}
return true;
}
template <typename Functor>
bool SubmitRangeCommandBufferFunctor(Functor&& functor)
{
VkFence fence = rangeFences[nextCommandBufferIdx];
VkResult vkResult = vkWaitForFences(device, 1, &fence, false, 0);
if (vkResult == VK_TIMEOUT)
{
NV_PERF_LOG_ERR(10, "No more command buffer available for queue level ranges, consider increasing sessionOptions.maxNumRange\n");
return false;
}
if (vkResult)
{
NV_PERF_LOG_ERR(10, "vkWaitForFences failed, VkResult = %d\n", vkResult);
return false;
}
VkCommandBuffer commandBuffer = rangeCommandBuffers[nextCommandBufferIdx];
++nextCommandBufferIdx;
if (nextCommandBufferIdx >= rangeCommandBuffers.size())
{
nextCommandBufferIdx = 0;
}
vkResult = vkResetCommandBuffer(commandBuffer, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
if (vkResult)
{
NV_PERF_LOG_ERR(10, "vkResetCommandBuffer failed, VkResult = %d\n", vkResult);
return false;
}
VkCommandBufferBeginInfo commandBufferBeginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
vkResult = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
if (vkResult)
{
NV_PERF_LOG_ERR(10, "vkBeginCommandBuffer failed, VkResult = %d\n", vkResult);
return false;
}
if (!functor(commandBuffer))
{
return false;
}
vkResult = vkEndCommandBuffer(commandBuffer);
if (vkResult)
{
NV_PERF_LOG_ERR(10, "vkEndCommandBuffer failed, VkResult = %d\n", vkResult);
return false;
}
vkResult = vkResetFences(device, 1, &fence);
if (vkResult)
{
NV_PERF_LOG_ERR(10, "vkResetFences failed, VkResult = %d\n", vkResult);
return false;
}
VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffer;
vkResult = vkQueueSubmit(queue, 1, &submitInfo, fence);
if (vkResult)
{
NV_PERF_LOG_ERR(10, "vkQueueSubmit failed, VkResult = %d\n", vkResult);
return false;
}
return true;
}
virtual bool PushRange(const char* pRangeName) override
{
return SubmitRangeCommandBufferFunctor([&](VkCommandBuffer commandBuffer)
{
NVPW_VK_Profiler_CommandBuffer_PushRange_Params pushRangeParams = {NVPW_VK_Profiler_CommandBuffer_PushRange_Params_STRUCT_SIZE};
pushRangeParams.commandBuffer = commandBuffer;
pushRangeParams.pRangeName = pRangeName;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_CommandBuffer_PushRange(&pushRangeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_CommandBuffer_PushRange failed, nvpaStatus = %d\n", nvpaStatus);
return false;
}
return true;
});
}
virtual bool PopRange() override
{
return SubmitRangeCommandBufferFunctor([&](VkCommandBuffer commandBuffer)
{
NVPW_VK_Profiler_CommandBuffer_PopRange_Params popRangeParams = {NVPW_VK_Profiler_CommandBuffer_PopRange_Params_STRUCT_SIZE};
popRangeParams.commandBuffer = commandBuffer;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_CommandBuffer_PopRange(&popRangeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_CommandBuffer_PopRange failed, nvpaStatus = %d\n", nvpaStatus);
return false;
}
return true;
});
}
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const
{
NVPW_VK_Profiler_Queue_DecodeCounters_Params decodeParams = { NVPW_VK_Profiler_Queue_DecodeCounters_Params_STRUCT_SIZE };
decodeParams.queue = queue;
decodeParams.counterDataImageSize = counterDataImage.size();
decodeParams.pCounterDataImage = counterDataImage.data();
decodeParams.counterDataScratchBufferSize = counterDataScratch.size();
decodeParams.pCounterDataScratchBuffer = counterDataScratch.data();
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_DecodeCounters(&decodeParams);
if (nvpaStatus)
{
return false;
}
onePassDecoded = decodeParams.onePassCollected;
allPassesDecoded = decodeParams.allPassesCollected;
return true;
}
bool Initialize(VkDevice device_, VkQueue queue_, uint32_t queueFamilyIndex, const SessionOptions& sessionOptions_)
{
device = device_;
queue = queue_;
sessionOptions = sessionOptions_;
VkCommandPoolCreateInfo commandPoolCreateInfo = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
commandPoolCreateInfo.queueFamilyIndex = queueFamilyIndex;
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VkResult vkResult = vkCreateCommandPool(device, &commandPoolCreateInfo, nullptr, &commandPool);
if (vkResult)
{
return false;
}
const size_t maxRangeCommandBuffers = maxQueueRangesPerPass * 2 * sessionOptions.numTraceBuffers;
rangeCommandBuffers.resize(maxRangeCommandBuffers);
VkCommandBufferAllocateInfo commandBufferAllocateInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
commandBufferAllocateInfo.commandPool = commandPool;
commandBufferAllocateInfo.commandBufferCount = (uint32_t)maxRangeCommandBuffers;
vkResult = vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, rangeCommandBuffers.data());
if (vkResult)
{
return false;
}
rangeFences.resize(maxRangeCommandBuffers);
VkFenceCreateInfo fenceCreateInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO};
fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT;
for (auto& rangeFence : rangeFences)
{
vkResult = vkCreateFence(device, &fenceCreateInfo, nullptr, &rangeFence);
if (vkResult)
{
return false;
}
}
return true;
}
void Reset()
{
NVPW_VK_Profiler_Queue_EndSession_Params endSessionParams = {NVPW_VK_Profiler_Queue_EndSession_Params_STRUCT_SIZE};
endSessionParams.queue = queue;
endSessionParams.timeout = 0xFFFFFFFF;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_EndSession(&endSessionParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_Queue_EndSession failed, nvpaStatus = %d\n", nvpaStatus);
}
sessionOptions = {};
nextCommandBufferIdx = 0;
vkFreeCommandBuffers(device, commandPool, (uint32_t)rangeCommandBuffers.size(), rangeCommandBuffers.data());
rangeCommandBuffers.clear();
vkDestroyCommandPool(device, commandPool, nullptr);
commandPool = VK_NULL_HANDLE;
for (auto fence : rangeFences)
{
vkDestroyFence(device, fence, nullptr);
}
queue = VK_NULL_HANDLE;
device = VK_NULL_HANDLE;
}
};
protected: // members
ProfilerApi m_profilerApi;
RangeProfilerStateMachine m_stateMachine;
std::thread m_spgoThread;
volatile bool m_spgoThreadExited;
private:
// non-copyable
RangeProfilerVulkan(const RangeProfilerVulkan&);
static void SpgoThreadProc(RangeProfilerVulkan* pRangeProfiler, VkQueue queue)
{
// Run continuously in the background, handling all BeginPass and EndPass GPU operations until EndSession().
NVPW_VK_Queue_ServicePendingGpuOperations_Params serviceGpuOpsParams = { NVPW_VK_Queue_ServicePendingGpuOperations_Params_STRUCT_SIZE };
serviceGpuOpsParams.queue = queue;
serviceGpuOpsParams.numOperations = 0; // run until EndSession()
serviceGpuOpsParams.timeout = 0xFFFFFFFF;
NVPA_Status nvpaStatus = NVPW_VK_Queue_ServicePendingGpuOperations(&serviceGpuOpsParams);
if (nvpaStatus)
{
// TODO: log an error
}
pRangeProfiler->m_spgoThreadExited = true;
}
public:
~RangeProfilerVulkan()
{
}
RangeProfilerVulkan()
: m_profilerApi()
, m_stateMachine(m_profilerApi)
, m_spgoThread()
, m_spgoThreadExited()
{
}
// TODO: make this move friendly
bool IsInSession() const
{
return !!m_profilerApi.queue;
}
bool IsInPass() const
{
return m_stateMachine.IsInPass();
}
VkQueue GetVkQueue() const
{
return m_profilerApi.queue;
}
bool SetMaxQueueRangesPerPass(size_t maxQueueRangesPerPass)
{
if (IsInSession())
{
NV_PERF_LOG_ERR(10, "SetMaxQueueRangesPerPass must be called before the session starts.\n");
return false;
}
m_profilerApi.maxQueueRangesPerPass = maxQueueRangesPerPass;
return true;
}
bool BeginSession(
VkInstance instance,
VkPhysicalDevice physicalDevice,
VkDevice device,
VkQueue queue,
uint32_t queueFamilyIndex,
const SessionOptions& sessionOptions)
{
if (IsInSession())
{
NV_PERF_LOG_ERR(10, "already in a session\n");
return false;
}
if (!VulkanIsNvidiaDevice(physicalDevice) || !VulkanIsGpuSupported(instance, physicalDevice, device))
{
// TODO: error - device is not supported for profiling
return false;
}
NVPA_Status nvpaStatus;
NVPW_VK_Profiler_CalcTraceBufferSize_Params calcTraceBufferSizeParam = { NVPW_VK_Profiler_CalcTraceBufferSize_Params_STRUCT_SIZE };
calcTraceBufferSizeParam.maxRangesPerPass = sessionOptions.maxNumRanges;
calcTraceBufferSizeParam.avgRangeNameLength = sessionOptions.avgRangeNameLength;
nvpaStatus = NVPW_VK_Profiler_CalcTraceBufferSize(&calcTraceBufferSizeParam);
if (nvpaStatus)
{
return false;
}
NVPW_VK_Profiler_Queue_BeginSession_Params beginSessionParams = { NVPW_VK_Profiler_Queue_BeginSession_Params_STRUCT_SIZE };
beginSessionParams.instance = instance;
beginSessionParams.physicalDevice = physicalDevice;
beginSessionParams.device = device;
beginSessionParams.queue = queue;
beginSessionParams.pfnGetInstanceProcAddr = (void*)vkGetInstanceProcAddr;
beginSessionParams.pfnGetDeviceProcAddr = (void*)vkGetDeviceProcAddr;
beginSessionParams.numTraceBuffers = sessionOptions.numTraceBuffers;
beginSessionParams.traceBufferSize = calcTraceBufferSizeParam.traceBufferSize;
beginSessionParams.maxRangesPerPass = sessionOptions.maxNumRanges;
beginSessionParams.maxLaunchesPerPass = sessionOptions.maxNumRanges;
nvpaStatus = NVPW_VK_Profiler_Queue_BeginSession(&beginSessionParams);
if (nvpaStatus)
{
if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_PRIVILEGE)
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: profiling permissions not enabled. Please follow these instructions: https://developer.nvidia.com/ERR_NVGPUCTRPERM\n");
}
else if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION)
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: insufficient driver version. Please install the latest NVIDIA driver from https://www.nvidia.com\n");
}
else
{
NV_PERF_LOG_ERR(10, "Failed to start profiler session: unknown error. It may be a resource conflict - only one profiler session can run at a time per GPU.\n");
}
return false;
}
m_spgoThreadExited = false;
m_spgoThread = std::thread(SpgoThreadProc, this, queue);
if(!m_profilerApi.Initialize(device, queue, queueFamilyIndex, sessionOptions))
{
return false;
}
return true;
}
bool EndSession()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
m_stateMachine.Reset();
m_profilerApi.Reset();
m_spgoThread.join();
m_spgoThreadExited = false;
return true;
}
bool EnqueueCounterCollection(const SetConfigParams& config)
{
const bool status = m_stateMachine.EnqueueCounterCollection(config);
return status;
}
bool EnqueueCounterCollection(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
{
const bool status = m_stateMachine.EnqueueCounterCollection(SetConfigParams(configuration, numNestingLevels, numStatisticalSamples));
return status;
}
bool BeginPass()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.BeginPass();
return status;
}
bool EndPass()
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
const bool status = m_stateMachine.EndPass();
return status;
}
// Convenience method to start a Queue-level range. For CommandLists, use VulkanRangeCommands::PushRange.
bool PushRange(const char* pRangeName)
{
const bool status = m_stateMachine.PushRange(pRangeName);
return status;
}
// Convenience method to end a Queue-level range. For CommandLists, use VulkanRangeCommands::PopRange.
bool PopRange()
{
const bool status = m_stateMachine.PopRange();
return status;
}
bool DecodeCounters(DecodeResult& decodeResult)
{
if (!IsInSession())
{
NV_PERF_LOG_ERR(10, "must be called in a session\n");
return false;
}
if (m_spgoThreadExited)
{
NV_PERF_LOG_ERR(10, "the background thread exited; possible hang on subsequent CPU-waiting-on-GPU calls\n");
return false;
}
const bool status = m_stateMachine.DecodeCounters(decodeResult);
return status;
}
bool AllPassesSubmitted() const
{
const bool allPassesSubmitted = m_stateMachine.AllPassesSubmitted();
return allPassesSubmitted;
}
};
}}}

View File

@@ -0,0 +1,34 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <stddef.h>
namespace nv { namespace perf {
struct ReportDefinition
{
const char* const* ppCounterNames;
size_t numCounters;
const char* const* ppRatioNames;
size_t numRatios;
const char* const* ppThroughputNames;
size_t numThroughputs;
const char* pReportHtml;
};
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,79 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <string.h>
#include "NvPerfInit.h"
#include "NvPerfReportDefinition.h"
#include "NvPerfReportDefinitionGV100.h"
#include "NvPerfReportDefinitionTU10X.h"
#include "NvPerfReportDefinitionTU11X.h"
#include "NvPerfReportDefinitionGA10X.h"
namespace nv { namespace perf {
namespace PerRangeReport {
inline ReportDefinition GetReportDefinition(const char* pChipName)
{
if (!strcmp(pChipName, "GV100"))
{
return gv100::PerRangeReport::GetReportDefinition();
}
else if (!strcmp(pChipName, "TU102") || !strcmp(pChipName, "TU104") || !strcmp(pChipName, "TU106"))
{
return tu10x::PerRangeReport::GetReportDefinition();
}
else if (!strcmp(pChipName, "TU116") || !strcmp(pChipName, "TU117"))
{
return tu11x::PerRangeReport::GetReportDefinition();
}
else if (!strcmp(pChipName, "GA102") || !strcmp(pChipName, "GA104") || !strcmp(pChipName, "GA106"))
{
return ga10x::PerRangeReport::GetReportDefinition();
}
return {};
}
} // namespace PerRangeReport
namespace SummaryReport {
inline ReportDefinition GetReportDefinition(const char* pChipName)
{
if (!strcmp(pChipName, "GV100"))
{
return gv100::SummaryReport::GetReportDefinition();
}
else if (!strcmp(pChipName, "TU102") || !strcmp(pChipName, "TU104") || !strcmp(pChipName, "TU106"))
{
return tu10x::SummaryReport::GetReportDefinition();
}
else if (!strcmp(pChipName, "TU116") || !strcmp(pChipName, "TU117"))
{
return tu11x::SummaryReport::GetReportDefinition();
}
else if (!strcmp(pChipName, "GA102") || !strcmp(pChipName, "GA104") || !strcmp(pChipName, "GA106"))
{
return ga10x::SummaryReport::GetReportDefinition();
}
return {};
}
} // namespace SummaryReport
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,414 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "NvPerfReportGenerator.h"
#include "NvPerfD3D11.h"
#include "NvPerfRangeProfilerD3D11.h"
namespace nv { namespace perf { namespace profiler {
class ReportGeneratorD3D11
{
protected:
struct ReportProfiler : ReportGeneratorStateMachine::IReportProfiler
{
RangeProfilerD3D11 rangeProfiler;
ReportProfiler()
: rangeProfiler()
{
}
virtual bool IsInSession() const override
{
return rangeProfiler.IsInSession();
}
virtual bool IsInPass() const override
{
return rangeProfiler.IsInPass();
}
virtual bool EndSession() override
{
return rangeProfiler.EndSession();
}
virtual bool EnqueueCounterCollection(const SetConfigParams& config) override
{
return rangeProfiler.EnqueueCounterCollection(config);
}
virtual bool BeginPass() override
{
return rangeProfiler.BeginPass();
}
virtual bool EndPass() override
{
return rangeProfiler.EndPass();
}
virtual bool PushRange(const char* pRangeName) override
{
return rangeProfiler.PushRange(pRangeName);
}
virtual bool PopRange() override
{
return rangeProfiler.PopRange();
}
virtual bool DecodeCounters(DecodeResult& decodeResult) override
{
return rangeProfiler.DecodeCounters(decodeResult);
}
virtual bool AllPassesSubmitted() const override
{
return rangeProfiler.AllPassesSubmitted();
}
};
protected:
ReportProfiler m_reportProfiler;
ReportGeneratorStateMachine m_stateMachine;
// When enabled, OnFrameStart() will check whether its argument's ID3D12Device == m_pDevice.
bool m_enableDeviceContextValidation;
CComPtr<ID3D11Device> m_pDevice;
ReportGeneratorInitStatus m_initStatus; // the state of InitializeReportGenerator()
protected:
bool BeginSessionWithOptions(ID3D11DeviceContext* pDeviceContext, const SessionOptions* pSessionOptions = nullptr)
{
SessionOptions sessionOptions = {};
sessionOptions.maxNumRanges = ReportGeneratorStateMachine::MaxNumRangesDefault;
if (pSessionOptions)
{
sessionOptions = *pSessionOptions;
}
if (!m_reportProfiler.rangeProfiler.BeginSession(pDeviceContext, sessionOptions))
{
NV_PERF_LOG_ERR(10, "m_reportProfiler.rangeProfiler.BeginSession failed\n");
return false;
}
return true;
}
bool IsDeviceContextValid(ID3D11DeviceContext* pDeviceContext, const char* pFunctionName) const
{
if (!m_enableDeviceContextValidation)
{
return true; // when validation is disabled, always assume the pDeviceContext is valid
}
if (!m_pDevice)
{
NV_PERF_LOG_WRN(50, "Cannot validate DeviceContext. Please call EnableDeviceContextValidation(true) before InitializeReportGenerator().\n");
return true; // allow it to proceed unvalidated
}
CComPtr<ID3D11Device> pDevice;
pDeviceContext->GetDevice(&pDevice);
if (!pDevice)
{
NV_PERF_LOG_ERR(10, "pDeviceContext->GetDevice() failed\n");
return false;
}
if (!pDevice.IsEqualObject(m_pDevice))
{
NV_PERF_LOG_ERR(10, "The pDeviceContext passed to %s does not match the ID3D11Device passed to InitializeReportGenerator().\n", pFunctionName);
return false;
}
return true;
}
public:
DeviceIdentifiers deviceIdentifiers;
std::vector<std::string> additionalMetrics;
public:
~ReportGeneratorD3D11()
{
Reset();
}
ReportGeneratorD3D11()
: m_reportProfiler()
, m_stateMachine(m_reportProfiler)
, m_enableDeviceContextValidation(true)
, m_pDevice()
, m_initStatus(ReportGeneratorInitStatus::NeverCalled)
, deviceIdentifiers()
, additionalMetrics()
{
}
ReportGeneratorInitStatus GetInitStatus() const
{
return m_initStatus;
}
/// Ends all current sessions and frees all internal memory.
/// This object may be reused by calling InitializeReportGenerator() again.
/// Does not reset deviceIdentifiers.
void Reset()
{
if (m_reportProfiler.rangeProfiler.IsInSession())
{
const bool endSessionStatus = m_reportProfiler.rangeProfiler.EndSession();
if (!endSessionStatus)
{
NV_PERF_LOG_ERR(10, "m_reportProfiler.EndSession failed\n");
}
}
m_stateMachine.Reset();
m_pDevice.Release();
if (m_initStatus != ReportGeneratorInitStatus::NeverCalled)
{
m_initStatus = ReportGeneratorInitStatus::Reset;
}
}
bool InitializeReportGenerator(ID3D11Device* pDevice)
{
m_pDevice.Release();
m_initStatus = ReportGeneratorInitStatus::Failed;
// Can this device be profiled by Nsight Perf SDK?
if (!nv::perf::D3D11IsNvidiaDevice(pDevice))
{
NV_PERF_LOG_ERR(10, "%ls is not an NVIDIA Device\n", D3D11GetDeviceName(pDevice).c_str());
return false;
}
if (!InitializeNvPerf())
{
NV_PERF_LOG_ERR(10, "InitializeNvPerf failed\n");
return false;
}
if (!nv::perf::D3D11LoadDriver())
{
NV_PERF_LOG_ERR(10, "Could not load driver\n");
return false;
}
if (!nv::perf::profiler::D3D11IsGpuSupported(pDevice))
{
NV_PERF_LOG_ERR(10, "GPU is not supported\n");
return false;
}
deviceIdentifiers = D3D11GetDeviceIdentifiers(pDevice);
if (!deviceIdentifiers.pChipName)
{
NV_PERF_LOG_ERR(10, "Unrecognaized GPU\n");
return false;
}
auto createMetricsEvaluator = [&](std::vector<uint8_t>& scratchBuffer) {
const size_t scratchBufferSize = nv::perf::D3D11CalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
if (!scratchBufferSize)
{
return (NVPW_MetricsEvaluator*)nullptr;
}
scratchBuffer.resize(scratchBufferSize);
NVPW_MetricsEvaluator* pMetricsEvaluator = nv::perf::D3D11CreateMetricsEvaluator(scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
return pMetricsEvaluator;
};
auto createRawMetricsConfig = [&]() {
NVPA_RawMetricsConfig* pRawMetricsConfig = nv::perf::profiler::D3D11CreateRawMetricsConfig(deviceIdentifiers.pChipName);
return pRawMetricsConfig;
};
if (!m_stateMachine.InitializeReportMetrics(deviceIdentifiers, createMetricsEvaluator, createRawMetricsConfig, additionalMetrics))
{
NV_PERF_LOG_ERR(100, "m_stateMachine.InitializeReportMetrics failed\n");
return false;
}
if (m_enableDeviceContextValidation)
{
m_pDevice = pDevice;
}
m_initStatus = ReportGeneratorInitStatus::Succeeded;
NV_PERF_LOG_INF(50, "Initialization succeeded\n");
return true;
}
/// Explicitly starts a session. This allows you to control resource allocation.
/// Calling this function is optional; by default, OnFrameStart() will start a session if this isn't called.
/// The session must be explicitly ended by calling Reset().
/// The pDeviceContext must belong the ID3D11Device passed into InitializeReportGenerator().
bool BeginSession(ID3D11DeviceContext* pDeviceContext, const SessionOptions* pSessionOptions = nullptr)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!IsDeviceContextValid(pDeviceContext, "BeginSession"))
{
return false;
}
auto beginSessionFn = [&]() {
return BeginSessionWithOptions(pDeviceContext, pSessionOptions);
};
if (!m_stateMachine.OnFrameStart(beginSessionFn))
{
return false;
}
return true;
}
/// Automatically starts collecting counters after StartCollectionOnNextFrame().
/// Call this at the start of each frame.
/// The pDeviceContext must belong the ID3D11Device passed into InitializeReportGenerator().
bool OnFrameStart(ID3D11DeviceContext* pDeviceContext)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!IsDeviceContextValid(pDeviceContext, "OnFrameStart"))
{
return false;
}
auto beginSessionFn = [&]() {
return BeginSessionWithOptions(pDeviceContext);
};
if (!m_stateMachine.OnFrameStart(beginSessionFn))
{
return false;
}
return true;
}
/// Advances the counter-collection state-machine after rendering.
/// Call this at the end of each frame.
bool OnFrameEnd()
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!m_stateMachine.OnFrameEnd())
{
return false;
}
return true;
}
bool PushRange(const char* pRangeName)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!m_reportProfiler.IsInPass())
{
NV_PERF_LOG_WRN(100, "skipping; not in a profiler pass");
return false;
}
if (!m_reportProfiler.PushRange(pRangeName))
{
return false;
}
return true;
}
bool PopRange()
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!m_reportProfiler.IsInPass())
{
NV_PERF_LOG_WRN(100, "skipping; not in a profiler pass");
return false;
}
if (!m_reportProfiler.PopRange())
{
return false;
}
return true;
}
/// Reports true after StartCollectionOnNextFrame() is called, until the HTML Report has been written to disk.
/// This state is cleared by OnFrameEnd().
bool IsCollectingReport() const
{
return m_stateMachine.IsCollectingReport();
}
/// Enqueues report collection, starting on the next frame.
bool StartCollectionOnNextFrame(const char* pDirectoryName, AppendDateTime appendDateTime)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
return m_stateMachine.StartCollectionOnNextFrame(pDirectoryName, appendDateTime);
}
/// Enables a frame-level parent range.
/// When enabled (non-NULL, non-empty pRangeName), every frame will have a parent range.
/// Pass in NULL or an empty string to disable this behavior.
/// The pRangeName string is copied by value, and may be modified or freed after this function returns.
void SetFrameLevelRangeName(const char* pRangeName)
{
m_stateMachine.SetFrameLevelRangeName(pRangeName);
}
/// Retrieves the current frame-level parent range. An empty string signifies no parent range.
const std::string& GetFrameLevelRangeName() const
{
return m_stateMachine.GetFrameLevelRangeName();
}
/// Sets the number of Push/Pop nesting levels to collect in the report.
void SetNumNestingLevels(uint16_t numNestingLevels)
{
m_stateMachine.SetNumNestingLevels(numNestingLevels);
}
/// Retrieves the number of Push/Pop nesting levels being collected in the report.
uint16_t GetNumNestingLevels() const
{
return m_stateMachine.GetNumNestingLevels();
}
/// When enabled, OnFrameStart() will check whether its argument's ID3D11DeviceContext
/// corresponds to the device passed into InitializeReportGenerator().
void EnableDeviceContextValidation(bool enable = true)
{
m_enableDeviceContextValidation = enable;
}
};
}}}

View File

@@ -0,0 +1,394 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "NvPerfReportGenerator.h"
#include "NvPerfRangeProfilerD3D12.h"
namespace nv { namespace perf { namespace profiler {
class ReportGeneratorD3D12
{
protected:
struct ReportProfiler : public ReportGeneratorStateMachine::IReportProfiler
{
RangeProfilerD3D12 rangeProfiler;
ReportProfiler()
: rangeProfiler()
{
}
virtual bool IsInSession() const override
{
return rangeProfiler.IsInSession();
}
virtual bool IsInPass() const override
{
return rangeProfiler.IsInPass();
}
virtual bool EndSession() override
{
return rangeProfiler.EndSession();
}
virtual bool EnqueueCounterCollection(const SetConfigParams& config) override
{
return rangeProfiler.EnqueueCounterCollection(config);
}
virtual bool BeginPass() override
{
return rangeProfiler.BeginPass();
}
virtual bool EndPass() override
{
return rangeProfiler.EndPass();
}
virtual bool PushRange(const char* pRangeName) override
{
return rangeProfiler.PushRange(pRangeName);
}
virtual bool PopRange() override
{
return rangeProfiler.PopRange();
}
virtual bool DecodeCounters(DecodeResult& decodeResult) override
{
return rangeProfiler.DecodeCounters(decodeResult);
}
virtual bool AllPassesSubmitted() const override
{
return rangeProfiler.AllPassesSubmitted();
}
};
protected:
ReportProfiler m_reportProfiler;
ReportGeneratorStateMachine m_stateMachine;
// When enabled, OnFrameStart() will check whether its argument's ID3D12Device == m_pDevice.
bool m_enableCommandQueueValidation;
CComPtr<ID3D12Device> m_pDevice;
ReportGeneratorInitStatus m_initStatus; // the state of InitializeReportGenerator()
protected:
bool BeginSessionWithOptions(ID3D12CommandQueue* pCommandQueue, const SessionOptions* pSessionOptions = nullptr)
{
SessionOptions sessionOptions = {};
sessionOptions.maxNumRanges = ReportGeneratorStateMachine::MaxNumRangesDefault;
if (pSessionOptions)
{
sessionOptions = *pSessionOptions;
}
if (!m_reportProfiler.rangeProfiler.BeginSession(pCommandQueue, sessionOptions))
{
NV_PERF_LOG_ERR(10, "m_reportProfiler.rangeProfiler.BeginSession failed\n");
return false;
}
return true;
}
bool IsCommandQueueValid(ID3D12CommandQueue* pCommandQueue, const char* pFunctionName) const
{
if (!m_enableCommandQueueValidation)
{
return true; // when validation is disabled, always assume the CommandQueue is valid
}
if (!m_pDevice)
{
NV_PERF_LOG_WRN(50, "Cannot validate CommandQueue. Please call EnableCommandQueueValidation(true) before InitializeReportGenerator().\n");
return true; // allow it to proceed unvalidated
}
CComPtr<ID3D12Device> pDevice;
HRESULT hr = pCommandQueue->GetDevice(IID_PPV_ARGS(&pDevice));
if (FAILED(hr) || !pDevice)
{
NV_PERF_LOG_ERR(10, "pCommandQueue->GetDevice() failed\n");
return false;
}
if (!pDevice.IsEqualObject(m_pDevice))
{
NV_PERF_LOG_ERR(10, "The pCommandQueue passed to %s does not match the ID3D12Device passed to InitializeReportGenerator().\n", pFunctionName);
return false;
}
return true;
}
public:
/// RangeCommands is safe to use on any CommandList belonging to the ID3D12Device used for initialization.
/// RangeCommands perform no operation when called on unsupported or non-NVIDIA devices.
D3D12RangeCommands rangeCommands;
/// NVIDIA device identifiers.
DeviceIdentifiers deviceIdentifiers;
std::vector<std::string> additionalMetrics;
public:
~ReportGeneratorD3D12()
{
Reset();
}
ReportGeneratorD3D12()
: m_reportProfiler()
, m_stateMachine(m_reportProfiler)
, m_enableCommandQueueValidation(true)
, m_pDevice()
, m_initStatus(ReportGeneratorInitStatus::NeverCalled)
, rangeCommands()
, deviceIdentifiers()
, additionalMetrics()
{
}
ReportGeneratorInitStatus GetInitStatus() const
{
return m_initStatus;
}
/// Ends all current sessions and frees all internal memory.
/// This object may be reused by calling InitializeReportGenerator() again.
/// Does not reset rangeCommands and deviceIdentifiers.
void Reset()
{
if (m_reportProfiler.rangeProfiler.IsInSession())
{
const bool endSessionStatus = m_reportProfiler.rangeProfiler.EndSession();
if (!endSessionStatus)
{
NV_PERF_LOG_ERR(10, "m_reportProfiler.EndSession failed\n");
}
}
m_stateMachine.Reset();
m_pDevice.Release();
if (m_initStatus != ReportGeneratorInitStatus::NeverCalled)
{
m_initStatus = ReportGeneratorInitStatus::Reset;
}
}
/// Initialize this object on the provided ID3D12Device.
bool InitializeReportGenerator(ID3D12Device* pDevice)
{
// Do this first, in case this object is re-initialized on a different device.
rangeCommands.Initialize(pDevice);
m_pDevice.Release();
m_initStatus = ReportGeneratorInitStatus::Failed;
// Can this device be profiled by Nsight Perf SDK?
if (!nv::perf::D3D12IsNvidiaDevice(pDevice))
{
NV_PERF_LOG_ERR(10, "%ls is not an NVIDIA Device\n", D3D12GetDeviceName(pDevice).c_str());
return false;
}
if (!InitializeNvPerf())
{
NV_PERF_LOG_ERR(10, "InitializeNvPerf failed\n");
return false;
}
if (!nv::perf::D3D12LoadDriver())
{
NV_PERF_LOG_ERR(10, "Could not load driver\n");
return false;
}
if (!nv::perf::profiler::D3D12IsGpuSupported(pDevice))
{
NV_PERF_LOG_ERR(10, "GPU is not supported\n");
return false;
}
deviceIdentifiers = D3D12GetDeviceIdentifiers(pDevice);
if (!deviceIdentifiers.pChipName)
{
NV_PERF_LOG_ERR(10, "Unrecognaized GPU\n");
return false;
}
auto createMetricsEvaluator = [&](std::vector<uint8_t>& scratchBuffer) {
const size_t scratchBufferSize = nv::perf::D3D12CalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
if (!scratchBufferSize)
{
return (NVPW_MetricsEvaluator*)nullptr;
}
scratchBuffer.resize(scratchBufferSize);
NVPW_MetricsEvaluator* pMetricsEvaluator = nv::perf::D3D12CreateMetricsEvaluator(scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
return pMetricsEvaluator;
};
auto createRawMetricsConfig = [&]() {
NVPA_RawMetricsConfig* pRawMetricsConfig = nv::perf::profiler::D3D12CreateRawMetricsConfig(deviceIdentifiers.pChipName);
return pRawMetricsConfig;
};
if (!m_stateMachine.InitializeReportMetrics(deviceIdentifiers, createMetricsEvaluator, createRawMetricsConfig, additionalMetrics))
{
NV_PERF_LOG_ERR(100, "m_stateMachine.InitializeReportMetrics failed\n");
return false;
}
if (m_enableCommandQueueValidation)
{
m_pDevice = pDevice;
}
m_initStatus = ReportGeneratorInitStatus::Succeeded;
NV_PERF_LOG_INF(50, "Initialization succeeded\n");
return true;
}
/// Explicitly starts a session. This allows you to control resource allocation.
/// Calling this function is optional; by default, OnFrameStart() will start a session if this isn't called.
/// The session must be explicitly ended by calling Reset().
/// The pCommandQueue must belong the ID3D12Device passed into InitializeReportGenerator().
bool BeginSession(ID3D12CommandQueue* pCommandQueue, const SessionOptions* pSessionOptions = nullptr)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!IsCommandQueueValid(pCommandQueue, "BeginSession"))
{
return false;
}
auto beginSessionFn = [&]() {
return BeginSessionWithOptions(pCommandQueue, pSessionOptions);
};
if (!m_stateMachine.BeginSession(beginSessionFn))
{
return false;
}
return true;
}
/// Automatically starts collecting counters after StartCollectionOnNextFrame().
/// Call this at the start of each frame.
/// The pCommandQueue must belong the ID3D12Device passed into InitializeReportGenerator().
bool OnFrameStart(ID3D12CommandQueue* pCommandQueue)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!IsCommandQueueValid(pCommandQueue, "OnFrameStart"))
{
return false;
}
auto beginSessionFn = [&]() {
return BeginSessionWithOptions(pCommandQueue);
};
if (!m_stateMachine.OnFrameStart(beginSessionFn))
{
return false;
}
return true;
}
/// Advances the counter-collection state-machine after rendering.
/// Call this at the end of each frame.
bool OnFrameEnd()
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!m_stateMachine.OnFrameEnd())
{
return false;
}
return true;
}
/// Reports true after StartCollectionOnNextFrame() is called, until the HTML Report has been written to disk.
/// This state is cleared by OnFrameEnd().
bool IsCollectingReport() const
{
return m_stateMachine.IsCollectingReport();
}
const std::string& GetReportDirectoryName() const
{
return m_stateMachine.GetReportDirectoryName();
}
/// Enqueues report collection, starting on the next frame.
bool StartCollectionOnNextFrame(const char* pDirectoryName, AppendDateTime appendDateTime)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
return m_stateMachine.StartCollectionOnNextFrame(pDirectoryName, appendDateTime);
}
/// Enables a frame-level parent range.
/// When enabled (non-NULL, non-empty pRangeName), every frame will have a parent range.
/// This is also convenient for programs that have no CommandList-level ranges.
/// Pass in NULL or an empty string to disable this behavior.
/// The pRangeName string is copied by value, and may be modified or freed after this function returns.
void SetFrameLevelRangeName(const char* pRangeName)
{
m_stateMachine.SetFrameLevelRangeName(pRangeName);
}
/// Retrieves the current frame-level parent range. An empty string signifies no parent range.
const std::string& GetFrameLevelRangeName() const
{
return m_stateMachine.GetFrameLevelRangeName();
}
/// Sets the number of Push/Pop nesting levels to collect in the report.
void SetNumNestingLevels(uint16_t numNestingLevels)
{
m_stateMachine.SetNumNestingLevels(numNestingLevels);
}
/// Retrieves the number of Push/Pop nesting levels being collected in the report.
uint16_t GetNumNestingLevels() const
{
return m_stateMachine.GetNumNestingLevels();
}
/// Open the report directory in file browser after perf data collection.
/// The default behavor is false, and can be changed by enviroment variable NV_PERF_OPEN_REPORT_DIR_AFTER_COLLECTION.
void SetOpenReportDirectoryAfterCollection(bool openReportDirectoryAfterCollection)
{
m_stateMachine.SetOpenReportDirectoryAfterCollection(openReportDirectoryAfterCollection);
}
/// When enabled, OnFrameStart() will check whether its argument's ID3D12Device
/// corresponds to the device passed into InitializeReportGenerator().
void EnableCommandQueueValidation(bool enable = true)
{
m_enableCommandQueueValidation = enable;
}
};
}}}

View File

@@ -0,0 +1,367 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "NvPerfReportGenerator.h"
#include "NvPerfRangeProfilerOpenGL.h"
namespace nv { namespace perf { namespace profiler {
class ReportGeneratorOpenGL
{
protected:
struct ReportProfiler : public ReportGeneratorStateMachine::IReportProfiler
{
RangeProfilerOpenGL rangeProfiler;
ReportProfiler()
: rangeProfiler()
{
}
virtual bool IsInSession() const override
{
return rangeProfiler.IsInSession();
}
virtual bool IsInPass() const override
{
return rangeProfiler.IsInPass();
}
virtual bool EndSession() override
{
return rangeProfiler.EndSession();
}
virtual bool EnqueueCounterCollection(const SetConfigParams& config) override
{
return rangeProfiler.EnqueueCounterCollection(config);
}
virtual bool BeginPass() override
{
return rangeProfiler.BeginPass();
}
virtual bool EndPass() override
{
return rangeProfiler.EndPass();
}
virtual bool PushRange(const char* pRangeName) override
{
return rangeProfiler.PushRange(pRangeName);
}
virtual bool PopRange() override
{
return rangeProfiler.PopRange();
}
virtual bool DecodeCounters(DecodeResult& decodeResult) override
{
return rangeProfiler.DecodeCounters(decodeResult);
}
virtual bool AllPassesSubmitted() const override
{
return rangeProfiler.AllPassesSubmitted();
}
};
protected:
ReportProfiler m_reportProfiler;
ReportGeneratorStateMachine m_stateMachine;
// OpenGL device state, set at initialize
ReportGeneratorInitStatus m_initStatus; // the state of InitializeReportGenerator()
protected:
bool BeginSessionWithOptions(
const SessionOptions* pSessionOptions = nullptr)
{
SessionOptions sessionOptions = {};
sessionOptions.maxNumRanges = ReportGeneratorStateMachine::MaxNumRangesDefault;
if (pSessionOptions)
{
sessionOptions = *pSessionOptions;
}
if (!m_reportProfiler.rangeProfiler.BeginSession(sessionOptions))
{
NV_PERF_LOG_ERR(10, "m_reportProfiler.rangeProfiler.BeginSession failed\n");
return false;
}
return true;
}
public:
DeviceIdentifiers deviceIdentifiers;
std::vector<std::string> additionalMetrics;
public:
~ReportGeneratorOpenGL()
{
Reset();
}
ReportGeneratorOpenGL()
: m_reportProfiler()
, m_stateMachine(m_reportProfiler)
, m_initStatus(ReportGeneratorInitStatus::NeverCalled)
, deviceIdentifiers()
, additionalMetrics()
{
}
ReportGeneratorInitStatus GetInitStatus() const
{
return m_initStatus;
}
/// Ends all current sessions and frees all internal memory.
/// This object may be reused by calling InitializeReportGenerator() again.
/// Does not reset rangeCommands and deviceIdentifiers.
void Reset()
{
if (m_reportProfiler.rangeProfiler.IsInSession())
{
const bool endSessionStatus = m_reportProfiler.rangeProfiler.EndSession();
if (!endSessionStatus)
{
NV_PERF_LOG_ERR(10, "m_reportProfiler.EndSession failed\n");
}
}
m_stateMachine.Reset();
if (m_initStatus != ReportGeneratorInitStatus::NeverCalled)
{
m_initStatus = ReportGeneratorInitStatus::Reset;
}
}
/// Initialize this object on the provided current context.
bool InitializeReportGenerator()
{
m_initStatus = ReportGeneratorInitStatus::Failed;
// Can this device be profiled by Nsight Perf SDK?
if (!nv::perf::OpenGLIsNvidiaDevice())
{
NV_PERF_LOG_ERR(10, "%s is not an NVIDIA Device\n", OpenGLGetDeviceName().c_str());
return false;
}
if (!InitializeNvPerf())
{
NV_PERF_LOG_ERR(10, "InitializeNvPerf failed\n");
return false;
}
if (!nv::perf::OpenGLLoadDriver())
{
NV_PERF_LOG_ERR(10, "Could not load driver\n");
return false;
}
if (!nv::perf::profiler::OpenGLIsGpuSupported())
{
NV_PERF_LOG_ERR(10, "GPU is not supported\n");
return false;
}
deviceIdentifiers = OpenGLGetDeviceIdentifiers();
if (!deviceIdentifiers.pChipName)
{
NV_PERF_LOG_ERR(10, "Unrecognized GPU\n");
return false;
}
auto createMetricsEvaluator = [&](std::vector<uint8_t>& scratchBuffer) {
const size_t scratchBufferSize = nv::perf::OpenGLCalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
if (!scratchBufferSize)
{
return (NVPW_MetricsEvaluator*)nullptr;
}
scratchBuffer.resize(scratchBufferSize);
NVPW_MetricsEvaluator* pMetricsEvaluator = nv::perf::OpenGLCreateMetricsEvaluator(scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
return pMetricsEvaluator;
};
auto createRawMetricsConfig = [&]() {
NVPA_RawMetricsConfig* pRawMetricsConfig = nv::perf::profiler::OpenGLCreateRawMetricsConfig(deviceIdentifiers.pChipName);
return pRawMetricsConfig;
};
if (!m_stateMachine.InitializeReportMetrics(deviceIdentifiers, createMetricsEvaluator, createRawMetricsConfig, additionalMetrics))
{
NV_PERF_LOG_ERR(10, "m_stateMachine.InitializeReportMetrics failed\n");
return false;
}
m_initStatus = ReportGeneratorInitStatus::Succeeded;
NV_PERF_LOG_INF(50, "Initialization succeeded\n");
return true;
}
/// Explicitly starts a session. This allows you to control resource allocation.
/// Calling this function is optional; by default, OnFrameStart() will start a session if this isn't called.
/// The session must be explicitly ended by calling Reset().
bool BeginSession(const SessionOptions* pSessionOptions = nullptr)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
auto beginSessionFn = [&]() {
return BeginSessionWithOptions(pSessionOptions);
};
if (!m_stateMachine.BeginSession(beginSessionFn))
{
return false;
}
return true;
}
/// Automatically starts collecting counters for a report, after StartCollectionOnNextFrame().
/// Call this at the start of each frame.
bool OnFrameStart()
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
auto beginSessionFn = [&]() {
return BeginSessionWithOptions();
};
if (!m_stateMachine.OnFrameStart(beginSessionFn))
{
return false;
}
return true;
}
/// Advances the counter-collection state-machine after rendering.
/// Call this at the end of each frame.
bool OnFrameEnd()
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!m_stateMachine.OnFrameEnd())
{
return false;
}
return true;
}
bool PushRange(const char* pRangeName)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!m_reportProfiler.IsInPass())
{
NV_PERF_LOG_WRN(100, "skipping; not in a profiler pass");
return false;
}
if (!m_reportProfiler.PushRange(pRangeName))
{
return false;
}
return true;
}
bool PopRange()
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!m_reportProfiler.IsInPass())
{
NV_PERF_LOG_WRN(100, "skipping; not in a profiler pass");
return false;
}
if (!m_reportProfiler.PopRange())
{
return false;
}
return true;
}
/// Reports true after StartCollectionOnNextFrame() is called, until the HTML Report has been written to disk.
/// This state is cleared by OnFrameEnd().
bool IsCollectingReport() const
{
return m_stateMachine.IsCollectingReport();
}
const std::string& GetReportDirectoryName() const
{
return m_stateMachine.GetReportDirectoryName();
}
/// Enqueues report collection, starting on the next frame.
bool StartCollectionOnNextFrame(const char* pDirectoryName, AppendDateTime appendDateTime)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
return m_stateMachine.StartCollectionOnNextFrame(pDirectoryName, appendDateTime);
}
/// Enables a frame-level parent range.
/// When enabled (non-NULL, non-empty pRangeName), every frame will have a parent range.
/// This is also convenient for programs that have no CommandList-level ranges.
/// Pass in NULL or an empty string to disable this behavior.
/// The pRangeName string is copied by value, and may be modified or freed after this function returns.
void SetFrameLevelRangeName(const char* pRangeName)
{
m_stateMachine.SetFrameLevelRangeName(pRangeName);
}
/// Retrieves the current frame-level parent range. An empty string signifies no parent range.
const std::string& GetFrameLevelRangeName() const
{
return m_stateMachine.GetFrameLevelRangeName();
}
/// Sets the number of Push/Pop nesting levels to collect in the report.
void SetNumNestingLevels(uint16_t numNestingLevels)
{
m_stateMachine.SetNumNestingLevels(numNestingLevels);
}
/// Retrieves the number of Push/Pop nesting levels being collected in the report.
uint16_t GetNumNestingLevels() const
{
return m_stateMachine.GetNumNestingLevels();
}
/// Open the report directory in file browser after perf data collection.
/// The default behavor is false, and can be changed by enviroment variable NV_PERF_OPEN_REPORT_DIR_AFTER_COLLECTION.
void SetOpenReportDirectoryAfterCollection(bool openReportDirectoryAfterCollection)
{
m_stateMachine.SetOpenReportDirectoryAfterCollection(openReportDirectoryAfterCollection);
}
};
}}}

View File

@@ -0,0 +1,358 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "NvPerfReportGenerator.h"
#include "NvPerfRangeProfilerVulkan.h"
namespace nv { namespace perf { namespace profiler {
class ReportGeneratorVulkan
{
protected:
struct ReportProfiler : public ReportGeneratorStateMachine::IReportProfiler
{
RangeProfilerVulkan rangeProfiler;
ReportProfiler()
: rangeProfiler()
{
}
virtual bool IsInSession() const override
{
return rangeProfiler.IsInSession();
}
virtual bool IsInPass() const override
{
return rangeProfiler.IsInPass();
}
virtual bool EndSession() override
{
return rangeProfiler.EndSession();
}
virtual bool EnqueueCounterCollection(const SetConfigParams& config) override
{
return rangeProfiler.EnqueueCounterCollection(config);
}
virtual bool BeginPass() override
{
return rangeProfiler.BeginPass();
}
virtual bool EndPass() override
{
return rangeProfiler.EndPass();
}
virtual bool PushRange(const char* pRangeName) override
{
return rangeProfiler.PushRange(pRangeName);
}
virtual bool PopRange() override
{
return rangeProfiler.PopRange();
}
virtual bool DecodeCounters(DecodeResult& decodeResult) override
{
return rangeProfiler.DecodeCounters(decodeResult);
}
virtual bool AllPassesSubmitted() const override
{
return rangeProfiler.AllPassesSubmitted();
}
};
protected:
ReportProfiler m_reportProfiler;
ReportGeneratorStateMachine m_stateMachine;
// vulkan device state, set at initialize
VkInstance m_instance;
VkPhysicalDevice m_physicalDevice;
VkDevice m_device;
ReportGeneratorInitStatus m_initStatus; // the state of InitializeReportGenerator()
protected:
bool BeginSessionWithOptions(
VkInstance instance,
VkPhysicalDevice physicalDevice,
VkDevice device,
VkQueue queue,
uint32_t queueFamilyIndex,
const SessionOptions* pSessionOptions = nullptr)
{
SessionOptions sessionOptions = {};
sessionOptions.maxNumRanges = ReportGeneratorStateMachine::MaxNumRangesDefault;
if (pSessionOptions)
{
sessionOptions = *pSessionOptions;
}
if (!m_reportProfiler.rangeProfiler.BeginSession(instance, physicalDevice, device, queue, queueFamilyIndex, sessionOptions))
{
NV_PERF_LOG_ERR(10, "m_reportProfiler.rangeProfiler.BeginSession failed\n");
return false;
}
return true;
}
public:
/// RangeCommands is safe to use on any CommandBuffer belonging to the VkDevice used for initialization.
/// RangeCommands perform no operation when called on unsupported or non-NVIDIA devices.
VulkanRangeCommands rangeCommands;
DeviceIdentifiers deviceIdentifiers;
std::vector<std::string> additionalMetrics;
public:
~ReportGeneratorVulkan()
{
Reset();
}
ReportGeneratorVulkan()
: m_reportProfiler()
, m_stateMachine(m_reportProfiler)
, m_instance(VK_NULL_HANDLE)
, m_physicalDevice(VK_NULL_HANDLE)
, m_device(VK_NULL_HANDLE)
, m_initStatus(ReportGeneratorInitStatus::NeverCalled)
, rangeCommands()
, deviceIdentifiers()
, additionalMetrics()
{
}
ReportGeneratorInitStatus GetInitStatus() const
{
return m_initStatus;
}
/// Ends all current sessions and frees all internal memory.
/// This object may be reused by calling InitializeReportGenerator() again.
/// Does not reset rangeCommands and deviceIdentifiers.
void Reset()
{
if (m_reportProfiler.rangeProfiler.IsInSession())
{
const bool endSessionStatus = m_reportProfiler.rangeProfiler.EndSession();
if (!endSessionStatus)
{
NV_PERF_LOG_ERR(10, "m_reportProfiler.EndSession failed\n");
}
}
m_stateMachine.Reset();
m_device = VK_NULL_HANDLE;
m_physicalDevice = VK_NULL_HANDLE;
m_instance = VK_NULL_HANDLE;
if (m_initStatus != ReportGeneratorInitStatus::NeverCalled)
{
m_initStatus = ReportGeneratorInitStatus::Reset;
}
}
/// Initialize this object on the provided VkDevice.
bool InitializeReportGenerator(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device)
{
// Do this first, in case this object was previously initialized on an NVIDIA device, and is now re-initialized on non-NVIDIA.
rangeCommands.Initialize(physicalDevice);
m_instance = VK_NULL_HANDLE;
m_physicalDevice = VK_NULL_HANDLE;
m_device = VK_NULL_HANDLE;
m_initStatus = ReportGeneratorInitStatus::Failed;
// Can this device be profiled by Nsight Perf SDK?
if (!nv::perf::VulkanIsNvidiaDevice(physicalDevice))
{
NV_PERF_LOG_ERR(10, "%ls is not an NVIDIA Device\n", VulkanGetDeviceName(physicalDevice).c_str());
return false;
}
if (!InitializeNvPerf())
{
NV_PERF_LOG_ERR(10, "InitializeNvPerf failed\n");
return false;
}
if (!nv::perf::VulkanLoadDriver(instance))
{
NV_PERF_LOG_ERR(10, "Could not load driver\n");
return false;
}
if (!nv::perf::profiler::VulkanIsGpuSupported(instance, physicalDevice, device))
{
NV_PERF_LOG_ERR(10, "GPU is not supported\n");
return false;
}
deviceIdentifiers = VulkanGetDeviceIdentifiers(instance, physicalDevice, device);
if (!deviceIdentifiers.pChipName)
{
NV_PERF_LOG_ERR(10, "Unrecognized GPU\n");
return false;
}
auto createMetricsEvaluator = [&](std::vector<uint8_t>& scratchBuffer) {
const size_t scratchBufferSize = nv::perf::VulkanCalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
if (!scratchBufferSize)
{
return (NVPW_MetricsEvaluator*)nullptr;
}
scratchBuffer.resize(scratchBufferSize);
NVPW_MetricsEvaluator* pMetricsEvaluator = nv::perf::VulkanCreateMetricsEvaluator(scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
return pMetricsEvaluator;
};
auto createRawMetricsConfig = [&]() {
NVPA_RawMetricsConfig* pRawMetricsConfig = nv::perf::profiler::VulkanCreateRawMetricsConfig(deviceIdentifiers.pChipName);
return pRawMetricsConfig;
};
if (!m_stateMachine.InitializeReportMetrics(deviceIdentifiers, createMetricsEvaluator, createRawMetricsConfig, additionalMetrics))
{
NV_PERF_LOG_ERR(10, "m_stateMachine.InitializeReportMetrics failed\n");
return false;
}
m_instance = instance;
m_physicalDevice = physicalDevice;
m_device = device;
m_initStatus = ReportGeneratorInitStatus::Succeeded;
NV_PERF_LOG_INF(50, "Initialization succeeded\n");
return true;
}
/// Explicitly starts a session. This allows you to control resource allocation.
/// Calling this function is optional; by default, OnFrameStart() will start a session if this isn't called.
/// The session must be explicitly ended by calling Reset().
/// The queue must belong the VkDevice passed into InitializeReportGenerator().
bool BeginSession(VkQueue queue, uint32_t queueFamilyIndex, const SessionOptions* pSessionOptions = nullptr)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
auto beginSessionFn = [&]() {
return BeginSessionWithOptions(m_instance, m_physicalDevice, m_device, queue, queueFamilyIndex, pSessionOptions);
};
if (!m_stateMachine.BeginSession(beginSessionFn))
{
return false;
}
return true;
}
/// Automatically starts collecting counters for a report, after StartCollectionOnNextFrame().
/// Call this at the start of each frame.
/// The queue must belong the VkDevice passed into InitializeReportGenerator().
bool OnFrameStart(VkQueue queue, uint32_t queueFamilyIndex)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
auto beginSessionFn = [&]() {
return BeginSessionWithOptions(m_instance, m_physicalDevice, m_device, queue, queueFamilyIndex);
};
if (!m_stateMachine.OnFrameStart(beginSessionFn))
{
return false;
}
return true;
}
/// Advances the counter-collection state-machine after rendering.
/// Call this at the end of each frame.
bool OnFrameEnd()
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
if (!m_stateMachine.OnFrameEnd())
{
return false;
}
return true;
}
/// Reports true after StartCollectionOnNextFrame() is called, until the HTML Report has been written to disk.
/// This state is cleared by OnFrameEnd().
bool IsCollectingReport() const
{
return m_stateMachine.IsCollectingReport();
}
const std::string& GetReportDirectoryName() const
{
return m_stateMachine.GetReportDirectoryName();
}
/// Enqueues report collection, starting on the next frame.
bool StartCollectionOnNextFrame(const char* pDirectoryName, AppendDateTime appendDateTime)
{
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
{
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
return false;
}
return m_stateMachine.StartCollectionOnNextFrame(pDirectoryName, appendDateTime);
}
/// Enables a frame-level parent range.
/// When enabled (non-NULL, non-empty pRangeName), every frame will have a parent range.
/// This is also convenient for programs that have no CommandList-level ranges.
/// Pass in NULL or an empty string to disable this behavior.
/// The pRangeName string is copied by value, and may be modified or freed after this function returns.
void SetFrameLevelRangeName(const char* pRangeName)
{
m_stateMachine.SetFrameLevelRangeName(pRangeName);
}
/// Retrieves the current frame-level parent range. An empty string signifies no parent range.
const std::string& GetFrameLevelRangeName() const
{
return m_stateMachine.GetFrameLevelRangeName();
}
/// Sets the number of Push/Pop nesting levels to collect in the report.
void SetNumNestingLevels(uint16_t numNestingLevels)
{
m_stateMachine.SetNumNestingLevels(numNestingLevels);
}
/// Retrieves the number of Push/Pop nesting levels being collected in the report.
uint16_t GetNumNestingLevels() const
{
return m_stateMachine.GetNumNestingLevels();
}
/// Open the report directory in file browser after perf data collection.
/// The default behavor is false, and can be changed by enviroment variable NV_PERF_OPEN_REPORT_DIR_AFTER_COLLECTION.
void SetOpenReportDirectoryAfterCollection(bool openReportDirectoryAfterCollection)
{
m_stateMachine.SetOpenReportDirectoryAfterCollection(openReportDirectoryAfterCollection);
}
};
}}}

View File

@@ -0,0 +1,374 @@
/*
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <vulkan/vulkan.h>
#include "NvPerfInit.h"
#include "NvPerfDeviceProperties.h"
#include "nvperf_vulkan_host.h"
#include "nvperf_vulkan_target.h"
namespace nv { namespace perf {
//
// Vulkan Only Utilities
//
inline std::string VulkanGetDeviceName(VkPhysicalDevice physicalDevice)
{
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
return properties.deviceName;
}
inline bool VulkanIsNvidiaDevice(VkPhysicalDevice physicalDevice)
{
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
if (properties.vendorID != NVIDIA_VENDOR_ID)
{
return false;
}
return true;
}
inline uint32_t VulkanGetInstanceApiVersion()
{
PFN_vkEnumerateInstanceVersion pfnVkEnumerateInstanceVersion = (PFN_vkEnumerateInstanceVersion)vkGetInstanceProcAddr(VK_NULL_HANDLE, "vkEnumerateInstanceVersion");
//This API doesn't exist on 1.0 loader
if (!pfnVkEnumerateInstanceVersion)
{
return VK_API_VERSION_1_0;
}
uint32_t loaderVersion;
VkResult res = pfnVkEnumerateInstanceVersion(&loaderVersion);
if (res != VK_SUCCESS)
{
NV_PERF_LOG_ERR(10, "Couldn't enumerate instance version!\n");
return 0;
}
return loaderVersion;
}
inline uint32_t VulkanGetPhysicalDeviceApiVersion(VkPhysicalDevice physicalDevice)
{
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
return properties.apiVersion;
}
//
// Vulkan NvPerf Utilities
//
inline bool VulkanAppendInstanceRequiredExtensions(std::vector<const char*>& instanceExtensionNames)
{
NVPW_VK_Profiler_GetRequiredInstanceExtensions_Params getRequiredInstanceExtensionsParams = { NVPW_VK_Profiler_GetRequiredInstanceExtensions_Params_STRUCT_SIZE };
getRequiredInstanceExtensionsParams.apiVersion = VulkanGetInstanceApiVersion();
NVPA_Status nvpaStatus = NVPW_VK_Profiler_GetRequiredInstanceExtensions(&getRequiredInstanceExtensionsParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_GetRequiredInstanceExtensions failed\n");
return false;
}
if (!getRequiredInstanceExtensionsParams.isOfficiallySupportedVersion)
{
uint32_t major = VK_VERSION_MAJOR(getRequiredInstanceExtensionsParams.apiVersion);
uint32_t minor = VK_VERSION_MINOR(getRequiredInstanceExtensionsParams.apiVersion);
uint32_t patch = VK_VERSION_PATCH(getRequiredInstanceExtensionsParams.apiVersion);
// not an error - NvPerf treats any unknown version as the same as its latest known version.
// Unknown version warnings should be reported back to the Nsight Perf team to get official support
NV_PERF_LOG_WRN(10, "Vulkan Instance API Version: %u.%u.%u - is not an officially supported version\n", major, minor, patch);
}
for (uint32_t extensionIndex=0; extensionIndex < getRequiredInstanceExtensionsParams.numInstanceExtensionNames; ++ extensionIndex)
{
instanceExtensionNames.push_back(getRequiredInstanceExtensionsParams.ppInstanceExtensionNames[extensionIndex]);
}
return true;
}
inline bool VulkanAppendDeviceRequiredExtensions(VkInstance instance, VkPhysicalDevice physicalDevice, void* pfnGetInstanceProcAddr, std::vector<const char*>& deviceExtensionNames)
{
if (!VulkanIsNvidiaDevice(physicalDevice))
{
return true; // do nothing on non-NVIDIA devices
}
NVPW_VK_Profiler_GetRequiredDeviceExtensions_Params getRequiredDeviceExtensionsParams = { NVPW_VK_Profiler_GetRequiredDeviceExtensions_Params_STRUCT_SIZE };
getRequiredDeviceExtensionsParams.apiVersion = VulkanGetPhysicalDeviceApiVersion(physicalDevice);
// optional parameters - this allows NvPerf to query if certain advanced features are available for use
getRequiredDeviceExtensionsParams.instance = instance;
getRequiredDeviceExtensionsParams.physicalDevice = physicalDevice;
getRequiredDeviceExtensionsParams.pfnGetInstanceProcAddr = pfnGetInstanceProcAddr;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_GetRequiredDeviceExtensions(&getRequiredDeviceExtensionsParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_GetRequiredDeviceExtensions failed\n");
return false;
}
if (!getRequiredDeviceExtensionsParams.isOfficiallySupportedVersion)
{
uint32_t major = VK_VERSION_MAJOR(getRequiredDeviceExtensionsParams.apiVersion);
uint32_t minor = VK_VERSION_MINOR(getRequiredDeviceExtensionsParams.apiVersion);
uint32_t patch = VK_VERSION_PATCH(getRequiredDeviceExtensionsParams.apiVersion);
// not an error - NvPerf treats any unknown version as the same as its latest known version.
// Unknown version warnings should be reported back to the Nsight Perf team to get official support
NV_PERF_LOG_WRN(100, "Vulkan Device API Version: %u.%u.%u - is not an officially supported version\n", major, minor, patch);
}
for (uint32_t extensionIndex=0; extensionIndex < getRequiredDeviceExtensionsParams.numDeviceExtensionNames; ++ extensionIndex)
{
deviceExtensionNames.push_back(getRequiredDeviceExtensionsParams.ppDeviceExtensionNames[extensionIndex]);
}
return true;
}
inline bool VulkanAppendRequiredExtensions(std::vector<const char*>& instanceExtensionNames, std::vector<const char*>& deviceExtensionNames)
{
bool status = VulkanAppendInstanceRequiredExtensions(instanceExtensionNames);
if (!status)
{
return false;
}
status = VulkanAppendDeviceRequiredExtensions(VK_NULL_HANDLE, VK_NULL_HANDLE, nullptr, deviceExtensionNames);
if (!status)
{
return false;
}
return true;
}
inline bool VulkanLoadDriver(VkInstance instance)
{
NVPW_VK_LoadDriver_Params loadDriverParams = { NVPW_VK_LoadDriver_Params_STRUCT_SIZE };
loadDriverParams.instance = instance;
NVPA_Status nvpaStatus = NVPW_VK_LoadDriver(&loadDriverParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_VK_LoadDriver failed\n");
return false;
}
return true;
}
inline size_t VulkanGetNvperfDeviceIndex(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, size_t sliIndex = 0)
{
NVPW_VK_Device_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_VK_Device_GetDeviceIndex_Params_STRUCT_SIZE };
getDeviceIndexParams.instance = instance;
getDeviceIndexParams.physicalDevice = physicalDevice;
getDeviceIndexParams.device = device;
getDeviceIndexParams.sliIndex = sliIndex;
getDeviceIndexParams.pfnGetInstanceProcAddr = (void*)vkGetInstanceProcAddr;
getDeviceIndexParams.pfnGetDeviceProcAddr = (void*)vkGetDeviceProcAddr;
NVPA_Status nvpaStatus = NVPW_VK_Device_GetDeviceIndex(&getDeviceIndexParams);
if (nvpaStatus)
{
return ~size_t(0);
}
return getDeviceIndexParams.deviceIndex;
}
inline DeviceIdentifiers VulkanGetDeviceIdentifiers(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, size_t sliIndex = 0)
{
const size_t deviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device, sliIndex);
DeviceIdentifiers deviceIdentifiers = GetDeviceIdentifiers(deviceIndex);
return deviceIdentifiers;
}
inline NVPW_Device_ClockStatus VulkanGetDeviceClockState(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device)
{
size_t nvperfDeviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device);
return GetDeviceClockState(nvperfDeviceIndex);
}
inline bool VulkanSetDeviceClockState(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, NVPW_Device_ClockSetting clockStatus)
{
size_t nvperfDeviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device);
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
}
inline bool VulkanSetDeviceClockState(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, NVPW_Device_ClockStatus clockStatus)
{
size_t nvperfDeviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device);
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
}
inline size_t VulkanCalculateMetricsEvaluatorScratchBufferSize(const char* pChipName)
{
NVPW_VK_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParams = { NVPW_VK_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE };
calculateScratchBufferSizeParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_VK_MetricsEvaluator_CalculateScratchBufferSize(&calculateScratchBufferSizeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_VK_MetricsEvaluator_CalculateScratchBufferSize failed\n");
return 0;
}
return calculateScratchBufferSizeParams.scratchBufferSize;
}
inline NVPW_MetricsEvaluator* VulkanCreateMetricsEvaluator(uint8_t* pScratchBuffer, size_t scratchBufferSize, const char* pChipName)
{
NVPW_VK_MetricsEvaluator_Initialize_Params initializeParams = { NVPW_VK_MetricsEvaluator_Initialize_Params_STRUCT_SIZE };
initializeParams.pScratchBuffer = pScratchBuffer;
initializeParams.scratchBufferSize = scratchBufferSize;
initializeParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_VK_MetricsEvaluator_Initialize(&initializeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(20, "NVPW_VK_MetricsEvaluator_Initialize failed\n");
return nullptr;
}
return initializeParams.pMetricsEvaluator;
}
}}
namespace nv { namespace perf { namespace profiler {
inline NVPA_RawMetricsConfig* VulkanCreateRawMetricsConfig(const char* pChipName)
{
NVPW_VK_RawMetricsConfig_Create_Params configParams = { NVPW_VK_RawMetricsConfig_Create_Params_STRUCT_SIZE };
configParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
configParams.pChipName = pChipName;
NVPA_Status nvpaStatus = NVPW_VK_RawMetricsConfig_Create(&configParams);
if (nvpaStatus)
{
return nullptr;
}
return configParams.pRawMetricsConfig;
}
inline bool VulkanIsGpuSupported(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, size_t sliIndex = 0)
{
const size_t deviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device, sliIndex);
NVPW_VK_Profiler_IsGpuSupported_Params params = { NVPW_VK_Profiler_IsGpuSupported_Params_STRUCT_SIZE };
params.deviceIndex = deviceIndex;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_IsGpuSupported(&params);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_IsGpuSupported failed on %s\n", VulkanGetDeviceName(physicalDevice).c_str());
return false;
}
if (!params.isSupported)
{
NV_PERF_LOG_ERR(10, "%s is not supported\n", VulkanGetDeviceName(physicalDevice).c_str());
if (params.gpuArchitectureSupportLevel != NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED)
{
const DeviceIdentifiers deviceIdentifiers = VulkanGetDeviceIdentifiers(instance, physicalDevice, device, sliIndex);
NV_PERF_LOG_ERR(10, "Unsupported GPU architecture %s\n", deviceIdentifiers.pChipName);
}
if (params.sliSupportLevel == NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED)
{
NV_PERF_LOG_ERR(10, "Devices in SLI configuration are not supported.\n");
}
return false;
}
return true;
}
inline bool VulkanPushRange(VkCommandBuffer commandBuffer, const char* pRangeName)
{
NVPW_VK_Profiler_CommandBuffer_PushRange_Params pushRangeParams = { NVPW_VK_Profiler_CommandBuffer_PushRange_Params_STRUCT_SIZE };
pushRangeParams.pRangeName = pRangeName;
pushRangeParams.rangeNameLength = 0;
pushRangeParams.commandBuffer = commandBuffer;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_CommandBuffer_PushRange(&pushRangeParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(50, "NVPW_VK_Profiler_CommandBuffer_PushRange failed\n");
return false;
}
return true;
}
inline bool VulkanPopRange(VkCommandBuffer commandBuffer)
{
NVPW_VK_Profiler_CommandBuffer_PopRange_Params popParams = { NVPW_VK_Profiler_CommandBuffer_PopRange_Params_STRUCT_SIZE };
popParams.commandBuffer = commandBuffer;
NVPA_Status nvpaStatus = NVPW_VK_Profiler_CommandBuffer_PopRange(&popParams);
if (nvpaStatus)
{
NV_PERF_LOG_ERR(50, "NVPW_VK_Profiler_CommandBuffer_PopRange failed\n");
return false;
}
return true;
}
inline bool VulkanPushRange_Nop(VkCommandBuffer commandBuffer, const char* pRangeName)
{
return false;
}
inline bool VulkanPopRange_Nop(VkCommandBuffer commandBuffer)
{
return false;
}
//
struct VulkanRangeCommands
{
bool isNvidiaDevice;
bool(*PushRange)(VkCommandBuffer commandBuffer, const char* pRangeName);
bool(*PopRange)(VkCommandBuffer commandBuffer);
public:
VulkanRangeCommands()
: isNvidiaDevice(false)
, PushRange(&VulkanPushRange_Nop)
, PopRange(&VulkanPopRange_Nop)
{
}
void Initialize(bool isNvidiaDevice_)
{
isNvidiaDevice = isNvidiaDevice_;
if (isNvidiaDevice_)
{
PushRange = &VulkanPushRange;
PopRange = &VulkanPopRange;
}
else
{
PushRange = &VulkanPushRange_Nop;
PopRange = &VulkanPopRange_Nop;
}
}
void Initialize(VkPhysicalDevice physicalDevice)
{
const bool isNvidiaDevice_ = VulkanIsNvidiaDevice(physicalDevice);
return Initialize(isNvidiaDevice_);
}
};
}}}