port from perforce
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
#include "NvPerfInit.h"
|
||||
#include "NvPerfMetricsConfigBuilder.h"
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
struct CounterConfiguration
|
||||
{
|
||||
std::vector<uint8_t> configImage;
|
||||
std::vector<uint8_t> counterDataPrefix;
|
||||
size_t numPipelinedPasses;
|
||||
size_t numIsolatedPasses;
|
||||
};
|
||||
|
||||
/// Transforms configBuilder into configuration.
|
||||
inline bool CreateConfiguration(
|
||||
MetricsConfigBuilder& configBuilder,
|
||||
CounterConfiguration& configuration)
|
||||
{
|
||||
bool res = false;
|
||||
res = configBuilder.PrepareConfigImage();
|
||||
if (!res)
|
||||
{
|
||||
//std::cerr << "FAILED: D3D12CreateConfiguration - failed PrepareConfigImage\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t configImageSize = configBuilder.GetConfigImageSize();
|
||||
if (!configImageSize)
|
||||
{
|
||||
// std::cerr << "FAILED: GetConfigImageSize - failed PrepareConfigImage\n";
|
||||
return false;
|
||||
}
|
||||
configuration.configImage.resize(configImageSize);
|
||||
if (!configBuilder.GetConfigImage(configuration.configImage.size(), &configuration.configImage[0]))
|
||||
{
|
||||
//std::cerr << "FAILED: GetConfigImage - failed PrepareConfigImage\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t counterDataPrefixSize = configBuilder.GetCounterDataPrefixSize();
|
||||
if (!counterDataPrefixSize)
|
||||
{
|
||||
//std::cerr << "FAILED: GetCounterDataPrefixSize - failed PrepareConfigImage\n";
|
||||
return false;
|
||||
}
|
||||
configuration.counterDataPrefix.resize(counterDataPrefixSize);
|
||||
if (!configBuilder.GetCounterDataPrefix(configuration.counterDataPrefix.size(), &configuration.counterDataPrefix[0]))
|
||||
{
|
||||
//std::cerr << "FAILED: GetCounterDataPrefix - failed PrepareConfigImage\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_Config_GetNumPasses_Params getNumPassesParams = { NVPW_Config_GetNumPasses_Params_STRUCT_SIZE };
|
||||
getNumPassesParams.pConfig = &configuration.configImage[0];
|
||||
NVPA_Status nvpaStatus = NVPW_Config_GetNumPasses(&getNumPassesParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
configuration.numPipelinedPasses = getNumPassesParams.numPipelinedPasses;
|
||||
configuration.numIsolatedPasses = getNumPassesParams.numIsolatedPasses;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// Adds pMetricNames[0..numMetrics-1] into configBuilder, then transforms configBuilder into configuration.
|
||||
inline bool CreateConfiguration(
|
||||
MetricsConfigBuilder& configBuilder,
|
||||
size_t numMetrics,
|
||||
const char* const pMetricNames[],
|
||||
CounterConfiguration& configuration)
|
||||
{
|
||||
bool succeeded = configBuilder.AddMetrics(pMetricNames, numMetrics);
|
||||
if (!succeeded)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
succeeded = CreateConfiguration(configBuilder, configuration);
|
||||
if (!succeeded)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}}
|
||||
80
ruins64k/tools/NvPerfUtility/include/NvPerfCounterData.h
Normal file
80
ruins64k/tools/NvPerfUtility/include/NvPerfCounterData.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "nvperf_host.h"
|
||||
#include "nvperf_target.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace nv { namespace perf {
|
||||
inline size_t CounterDataGetNumRanges(const uint8_t* pCounterDataImage)
|
||||
{
|
||||
NVPW_CounterData_GetNumRanges_Params getNumRangeParams = { NVPW_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE };
|
||||
getNumRangeParams.pCounterDataImage = pCounterDataImage;
|
||||
NVPA_Status nvpaStatus = NVPW_CounterData_GetNumRanges(&getNumRangeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return getNumRangeParams.numRanges;
|
||||
}
|
||||
|
||||
// TODO: this function performs dynamic allocations; either need a non-malloc'ing variant, or move this to an appropriate place
|
||||
inline std::string CounterDataGetRangeName(const uint8_t* pCounterDataImage, size_t rangeIndex, char delimiter, const char** ppLeafName = nullptr)
|
||||
{
|
||||
std::string rangeName;
|
||||
|
||||
NVPW_CounterData_GetRangeDescriptions_Params params = { NVPW_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE };
|
||||
params.pCounterDataImage = pCounterDataImage;
|
||||
params.rangeIndex = rangeIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_CounterData_GetRangeDescriptions(¶ms);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
if (!params.numDescriptions)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
std::vector<const char*> descriptions;
|
||||
descriptions.resize(params.numDescriptions);
|
||||
params.ppDescriptions = descriptions.data();
|
||||
nvpaStatus = NVPW_CounterData_GetRangeDescriptions(¶ms);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
rangeName += descriptions[0];
|
||||
for (size_t descriptionIdx = 1; descriptionIdx < params.numDescriptions; ++descriptionIdx)
|
||||
{
|
||||
const char* pDescription = params.ppDescriptions[descriptionIdx];
|
||||
rangeName += delimiter;
|
||||
rangeName += pDescription;
|
||||
}
|
||||
|
||||
if (ppLeafName)
|
||||
{
|
||||
*ppLeafName = descriptions.back();
|
||||
}
|
||||
|
||||
return rangeName;
|
||||
}
|
||||
}}
|
||||
82
ruins64k/tools/NvPerfUtility/include/NvPerfD3D.h
Normal file
82
ruins64k/tools/NvPerfUtility/include/NvPerfD3D.h
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "NvPerfInit.h"
|
||||
#include "NvPerfDeviceProperties.h"
|
||||
|
||||
#include <dxgi.h>
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
inline bool DxgiIsNvidiaDevice(IDXGIAdapter* pAdapter)
|
||||
{
|
||||
DXGI_ADAPTER_DESC adapterDesc = {};
|
||||
HRESULT hr = pAdapter->GetDesc(&adapterDesc);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (adapterDesc.VendorId != 0x10de)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline size_t D3DGetNvperfDeviceIndex(IDXGIAdapter* pDXGIAdapter, size_t sliIndex = 0)
|
||||
{
|
||||
NVPW_Adapter_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_Adapter_GetDeviceIndex_Params_STRUCT_SIZE };
|
||||
getDeviceIndexParams.pAdapter = pDXGIAdapter;
|
||||
getDeviceIndexParams.sliIndex = sliIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_Adapter_GetDeviceIndex(&getDeviceIndexParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return ~size_t(0);
|
||||
}
|
||||
|
||||
return getDeviceIndexParams.deviceIndex;
|
||||
}
|
||||
|
||||
inline DeviceIdentifiers D3DGetDeviceIdentifiers(IDXGIAdapter* pDXGIAdapter, size_t sliIndex = 0)
|
||||
{
|
||||
const size_t deviceIndex = D3DGetNvperfDeviceIndex(pDXGIAdapter, sliIndex);
|
||||
|
||||
DeviceIdentifiers deviceIdentifiers = GetDeviceIdentifiers(deviceIndex);
|
||||
return deviceIdentifiers;
|
||||
}
|
||||
|
||||
inline NVPW_Device_ClockStatus D3DGetDeviceClockState(IDXGIAdapter* pDXGIAdapter)
|
||||
{
|
||||
size_t nvperfDeviceIndex = D3DGetNvperfDeviceIndex(pDXGIAdapter);
|
||||
return GetDeviceClockState(nvperfDeviceIndex);
|
||||
}
|
||||
|
||||
inline bool D3DSetDeviceClockState(IDXGIAdapter* pDXGIAdapter, NVPW_Device_ClockSetting clockSetting)
|
||||
{
|
||||
size_t nvperfDeviceIndex = D3DGetNvperfDeviceIndex(pDXGIAdapter);
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockSetting);
|
||||
}
|
||||
|
||||
inline bool D3DSetDeviceClockState(IDXGIAdapter* pDXGIAdapter, NVPW_Device_ClockStatus clockStatus)
|
||||
{
|
||||
size_t nvperfDeviceIndex = D3DGetNvperfDeviceIndex(pDXGIAdapter);
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
|
||||
}
|
||||
}}
|
||||
252
ruins64k/tools/NvPerfUtility/include/NvPerfD3D11.h
Normal file
252
ruins64k/tools/NvPerfUtility/include/NvPerfD3D11.h
Normal file
@@ -0,0 +1,252 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "NvPerfD3D.h"
|
||||
|
||||
#include "nvperf_d3d11_host.h"
|
||||
#include "nvperf_d3d11_target.h"
|
||||
#include <D3D11.h>
|
||||
#include <atlbase.h>
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
//
|
||||
// D3D11 Only Utilities
|
||||
//
|
||||
|
||||
inline bool D3D11FindAdapterForDevice(ID3D11Device* pDevice, IDXGIAdapter** ppDXGIAdapter, DXGI_ADAPTER_DESC* pAdapterDesc = nullptr)
|
||||
{
|
||||
CComPtr<IDXGIDevice> pDXGIDevice;
|
||||
HRESULT hr = pDevice->QueryInterface(IID_PPV_ARGS(&pDXGIDevice));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
hr = pDXGIDevice->GetAdapter(ppDXGIAdapter);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pAdapterDesc)
|
||||
{
|
||||
hr = (*ppDXGIAdapter)->GetDesc(pAdapterDesc);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
return false;;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline std::wstring D3D11GetDeviceName(ID3D11Device* pDevice)
|
||||
{
|
||||
DXGI_ADAPTER_DESC adapterDesc = {};
|
||||
CComPtr<IDXGIAdapter> pDXGIAdapter;
|
||||
if (!D3D11FindAdapterForDevice(pDevice, &pDXGIAdapter, &adapterDesc))
|
||||
{
|
||||
return L"";
|
||||
}
|
||||
|
||||
return adapterDesc.Description;
|
||||
}
|
||||
|
||||
inline bool D3D11IsNvidiaDevice(ID3D11Device* pDevice)
|
||||
{
|
||||
CComPtr<IDXGIAdapter> pDXGIAdapter;
|
||||
if (!D3D11FindAdapterForDevice(pDevice, &pDXGIAdapter))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool isNvidiaDevice = DxgiIsNvidiaDevice(pDXGIAdapter);
|
||||
|
||||
return isNvidiaDevice;
|
||||
}
|
||||
|
||||
inline bool D3D11IsNvidiaDevice(ID3D11DeviceContext* pDeviceContext)
|
||||
{
|
||||
CComPtr<ID3D11Device> pDevice;
|
||||
pDeviceContext->GetDevice(&pDevice);
|
||||
if (!pDevice)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool isNvidiaDevice = D3D11IsNvidiaDevice(pDevice);
|
||||
return isNvidiaDevice;
|
||||
}
|
||||
|
||||
//
|
||||
// D3D11 NvPerf Utilities
|
||||
//
|
||||
|
||||
inline bool D3D11LoadDriver()
|
||||
{
|
||||
NVPW_D3D11_LoadDriver_Params loadDriverParams = { NVPW_D3D11_LoadDriver_Params_STRUCT_SIZE };
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_LoadDriver(&loadDriverParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_D3D11_LoadDriver failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline size_t D3D11GetNvperfDeviceIndex(ID3D11Device* pDevice, size_t sliIndex = 0)
|
||||
{
|
||||
NVPW_D3D11_Device_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_D3D11_Device_GetDeviceIndex_Params_STRUCT_SIZE };
|
||||
getDeviceIndexParams.pDevice = pDevice;
|
||||
getDeviceIndexParams.sliIndex = sliIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_Device_GetDeviceIndex(&getDeviceIndexParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return ~size_t(0);
|
||||
}
|
||||
|
||||
return getDeviceIndexParams.deviceIndex;
|
||||
}
|
||||
|
||||
inline DeviceIdentifiers D3D11GetDeviceIdentifiers(ID3D11Device* pDevice, size_t sliIndex = 0)
|
||||
{
|
||||
CComPtr<IDXGIAdapter> pDXGIAdapter;
|
||||
if (!D3D11FindAdapterForDevice(pDevice, &pDXGIAdapter))
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
return D3DGetDeviceIdentifiers(pDXGIAdapter, sliIndex);
|
||||
}
|
||||
|
||||
inline NVPW_Device_ClockStatus D3D11GetDeviceClockState(ID3D11Device* pDevice)
|
||||
{
|
||||
size_t nvperfDeviceIndex = D3D11GetNvperfDeviceIndex(pDevice);
|
||||
return GetDeviceClockState(nvperfDeviceIndex);
|
||||
}
|
||||
|
||||
inline bool D3D11SetDeviceClockState(ID3D11Device* pDevice, NVPW_Device_ClockSetting clockSetting)
|
||||
{
|
||||
size_t nvperfDeviceIndex = D3D11GetNvperfDeviceIndex(pDevice);
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockSetting);
|
||||
}
|
||||
|
||||
inline bool D3D11SetDeviceClockState(ID3D11Device* pDevice, NVPW_Device_ClockStatus clockStatus)
|
||||
{
|
||||
size_t nvperfDeviceIndex = D3D11GetNvperfDeviceIndex(pDevice);
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
|
||||
}
|
||||
|
||||
inline size_t D3D11CalculateMetricsEvaluatorScratchBufferSize(const char* pChipName)
|
||||
{
|
||||
NVPW_D3D11_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParams = { NVPW_D3D11_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE };
|
||||
calculateScratchBufferSizeParams.pChipName = pChipName;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_MetricsEvaluator_CalculateScratchBufferSize(&calculateScratchBufferSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_D3D11_MetricsEvaluator_CalculateScratchBufferSize failed\n");
|
||||
return 0;
|
||||
}
|
||||
return calculateScratchBufferSizeParams.scratchBufferSize;
|
||||
}
|
||||
|
||||
inline NVPW_MetricsEvaluator* D3D11CreateMetricsEvaluator(uint8_t* pScratchBuffer, size_t scratchBufferSize, const char* pChipName)
|
||||
{
|
||||
NVPW_D3D11_MetricsEvaluator_Initialize_Params initializeParams = { NVPW_D3D11_MetricsEvaluator_Initialize_Params_STRUCT_SIZE };
|
||||
initializeParams.pScratchBuffer = pScratchBuffer;
|
||||
initializeParams.scratchBufferSize = scratchBufferSize;
|
||||
initializeParams.pChipName = pChipName;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_MetricsEvaluator_Initialize(&initializeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_D3D11_MetricsEvaluator_Initialize failed\n");
|
||||
return nullptr;
|
||||
}
|
||||
return initializeParams.pMetricsEvaluator;
|
||||
}
|
||||
}}
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
inline NVPA_RawMetricsConfig* D3D11CreateRawMetricsConfig(const char* pChipName)
|
||||
{
|
||||
NVPW_D3D11_RawMetricsConfig_Create_Params configParams = { NVPW_D3D11_RawMetricsConfig_Create_Params_STRUCT_SIZE };
|
||||
configParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
|
||||
configParams.pChipName = pChipName;
|
||||
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_RawMetricsConfig_Create(&configParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_D3D11_RawMetricsConfig_Create failed\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return configParams.pRawMetricsConfig;
|
||||
}
|
||||
|
||||
inline bool D3D11IsGpuSupported(ID3D11Device* pDevice, size_t sliIndex = 0)
|
||||
{
|
||||
const size_t deviceIndex = D3D11GetNvperfDeviceIndex(pDevice, sliIndex);
|
||||
if (deviceIndex == ~size_t(0))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "D3D11GetNvperfDeviceIndex failed on %ls\n", D3D11GetDeviceName(pDevice).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_D3D11_Profiler_IsGpuSupported_Params params = { NVPW_D3D11_Profiler_IsGpuSupported_Params_STRUCT_SIZE };
|
||||
params.deviceIndex = deviceIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_IsGpuSupported(¶ms);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_D3D11_Profiler_IsGpuSupported failed on %ls\n", D3D11GetDeviceName(pDevice).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!params.isSupported)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "%ls is not supported\n", D3D11GetDeviceName(pDevice).c_str());
|
||||
if (params.gpuArchitectureSupportLevel != NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED)
|
||||
{
|
||||
const DeviceIdentifiers deviceIdentifiers = D3D11GetDeviceIdentifiers(pDevice, sliIndex);
|
||||
NV_PERF_LOG_ERR(10, "Unsupported GPU architecture %s\n", deviceIdentifiers.pChipName);
|
||||
}
|
||||
if (params.sliSupportLevel == NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Devices in SLI configuration are not supported.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool D3D11IsGpuSupported(ID3D11DeviceContext* pDeviceContext, size_t sliIndex = 0)
|
||||
{
|
||||
CComPtr<ID3D11Device> pDevice;
|
||||
pDeviceContext->GetDevice(&pDevice);
|
||||
if (!pDevice)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool isGpuSupported = D3D11IsGpuSupported(pDevice, sliIndex);
|
||||
return isGpuSupported;
|
||||
}
|
||||
|
||||
}}}
|
||||
351
ruins64k/tools/NvPerfUtility/include/NvPerfD3D12.h
Normal file
351
ruins64k/tools/NvPerfUtility/include/NvPerfD3D12.h
Normal file
@@ -0,0 +1,351 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "NvPerfInit.h"
|
||||
#include "NvPerfDeviceProperties.h"
|
||||
#include "NvPerfD3D.h"
|
||||
#include "nvperf_d3d12_host.h"
|
||||
#include "nvperf_d3d12_target.h"
|
||||
#include <D3D12.h>
|
||||
#include <atlbase.h>
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
//
|
||||
// D3D Only Utilities
|
||||
//
|
||||
|
||||
inline bool D3D12FindAdapterForDevice(ID3D12Device* pDevice, IDXGIAdapter1** ppDXGIAdapter, DXGI_ADAPTER_DESC1* pAdapterDesc = nullptr)
|
||||
{
|
||||
const LUID deviceLuid = pDevice->GetAdapterLuid();
|
||||
|
||||
CComPtr<IDXGIFactory1> pDXGIFactory;
|
||||
HRESULT hr = CreateDXGIFactory1(IID_PPV_ARGS(&pDXGIFactory));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (UINT adapterIndex = 0; ; ++adapterIndex)
|
||||
{
|
||||
CComPtr<IDXGIAdapter1> pDXGIAdapter;
|
||||
hr = pDXGIFactory->EnumAdapters1(adapterIndex, &pDXGIAdapter);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
break; // the intended loop termination
|
||||
}
|
||||
|
||||
DXGI_ADAPTER_DESC1 adapterDesc = {};
|
||||
HRESULT hr = pDXGIAdapter->GetDesc1(&adapterDesc);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!memcmp(&adapterDesc.AdapterLuid, &deviceLuid, sizeof(deviceLuid)))
|
||||
{
|
||||
*ppDXGIAdapter = pDXGIAdapter.Detach();
|
||||
if (pAdapterDesc)
|
||||
{
|
||||
*pAdapterDesc = adapterDesc;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline std::wstring D3D12GetDeviceName(ID3D12Device* pDevice)
|
||||
{
|
||||
DXGI_ADAPTER_DESC1 adapterDesc = {};
|
||||
CComPtr<IDXGIAdapter1> pDXGIAdapter;
|
||||
if (!D3D12FindAdapterForDevice(pDevice, &pDXGIAdapter, &adapterDesc))
|
||||
{
|
||||
return L"";
|
||||
}
|
||||
|
||||
return adapterDesc.Description;
|
||||
}
|
||||
|
||||
|
||||
inline bool D3D12IsNvidiaDevice(ID3D12Device* pDevice)
|
||||
{
|
||||
CComPtr<IDXGIAdapter1> pDXGIAdapter;
|
||||
if (!D3D12FindAdapterForDevice(pDevice, &pDXGIAdapter))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool isNvidiaDevice = DxgiIsNvidiaDevice(pDXGIAdapter);
|
||||
return isNvidiaDevice;
|
||||
}
|
||||
|
||||
inline bool D3D12IsNvidiaDevice(ID3D12CommandQueue* pCommandQueue)
|
||||
{
|
||||
CComPtr<ID3D12Device> pDevice;
|
||||
HRESULT hr = pCommandQueue->GetDevice(IID_PPV_ARGS(&pDevice));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool isNvidiaDevice = D3D12IsNvidiaDevice(pDevice);
|
||||
return isNvidiaDevice;
|
||||
}
|
||||
|
||||
//
|
||||
// D3D12 NvPerf Utilities
|
||||
//
|
||||
|
||||
inline bool D3D12LoadDriver()
|
||||
{
|
||||
NVPW_D3D12_LoadDriver_Params loadDriverParams = { NVPW_D3D12_LoadDriver_Params_STRUCT_SIZE };
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_LoadDriver(&loadDriverParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_D3D12_LoadDriver failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
inline size_t D3D12GetNvperfDeviceIndex(ID3D12Device* pDevice, size_t sliIndex = 0)
|
||||
{
|
||||
NVPW_D3D12_Device_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_D3D12_Device_GetDeviceIndex_Params_STRUCT_SIZE };
|
||||
getDeviceIndexParams.pDevice = pDevice;
|
||||
getDeviceIndexParams.sliIndex = sliIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Device_GetDeviceIndex(&getDeviceIndexParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return ~size_t(0);
|
||||
}
|
||||
|
||||
return getDeviceIndexParams.deviceIndex;
|
||||
}
|
||||
|
||||
inline DeviceIdentifiers D3D12GetDeviceIdentifiers(ID3D12Device* pDevice, size_t sliIndex = 0)
|
||||
{
|
||||
CComPtr<IDXGIAdapter1> pDXGIAdapter;
|
||||
if (!D3D12FindAdapterForDevice(pDevice, &pDXGIAdapter))
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
return D3DGetDeviceIdentifiers(pDXGIAdapter, sliIndex);
|
||||
}
|
||||
|
||||
inline NVPW_Device_ClockStatus D3D12GetDeviceClockState(ID3D12Device* pDevice)
|
||||
{
|
||||
size_t nvperfDeviceIndex = D3D12GetNvperfDeviceIndex(pDevice);
|
||||
return GetDeviceClockState(nvperfDeviceIndex);
|
||||
}
|
||||
|
||||
inline bool D3D12SetDeviceClockState(ID3D12Device* pDevice, NVPW_Device_ClockSetting clockSetting)
|
||||
{
|
||||
size_t nvperfDeviceIndex = D3D12GetNvperfDeviceIndex(pDevice);
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockSetting);
|
||||
}
|
||||
|
||||
inline bool D3D12SetDeviceClockState(ID3D12Device* pDevice, NVPW_Device_ClockStatus clockStatus)
|
||||
{
|
||||
size_t nvperfDeviceIndex = D3D12GetNvperfDeviceIndex(pDevice);
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
|
||||
}
|
||||
|
||||
inline size_t D3D12CalculateMetricsEvaluatorScratchBufferSize(const char* pChipName)
|
||||
{
|
||||
NVPW_D3D12_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParams = { NVPW_D3D12_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE };
|
||||
calculateScratchBufferSizeParams.pChipName = pChipName;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_MetricsEvaluator_CalculateScratchBufferSize(&calculateScratchBufferSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_D3D12_MetricsEvaluator_CalculateScratchBufferSize failed\n");
|
||||
return 0;
|
||||
}
|
||||
return calculateScratchBufferSizeParams.scratchBufferSize;
|
||||
}
|
||||
|
||||
inline NVPW_MetricsEvaluator* D3D12CreateMetricsEvaluator(uint8_t* pScratchBuffer, size_t scratchBufferSize, const char* pChipName)
|
||||
{
|
||||
NVPW_D3D12_MetricsEvaluator_Initialize_Params initializeParams = { NVPW_D3D12_MetricsEvaluator_Initialize_Params_STRUCT_SIZE };
|
||||
initializeParams.pScratchBuffer = pScratchBuffer;
|
||||
initializeParams.scratchBufferSize = scratchBufferSize;
|
||||
initializeParams.pChipName = pChipName;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_MetricsEvaluator_Initialize(&initializeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_D3D12_MetricsEvaluator_Initialize failed\n");
|
||||
return nullptr;
|
||||
}
|
||||
return initializeParams.pMetricsEvaluator;
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
inline NVPA_RawMetricsConfig* D3D12CreateRawMetricsConfig(const char* pChipName)
|
||||
{
|
||||
NVPW_D3D12_RawMetricsConfig_Create_Params configParams = { NVPW_D3D12_RawMetricsConfig_Create_Params_STRUCT_SIZE };
|
||||
configParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
|
||||
configParams.pChipName = pChipName;
|
||||
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_RawMetricsConfig_Create(&configParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_D3D12_RawMetricsConfig_Create failed\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return configParams.pRawMetricsConfig;
|
||||
}
|
||||
|
||||
inline bool D3D12IsGpuSupported(ID3D12Device* pDevice, size_t sliIndex = 0)
|
||||
{
|
||||
const size_t deviceIndex = D3D12GetNvperfDeviceIndex(pDevice, sliIndex);
|
||||
if (deviceIndex == ~size_t(0))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "D3D12GetNvperfDeviceIndex failed on %ls\n", D3D12GetDeviceName(pDevice).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_D3D12_Profiler_IsGpuSupported_Params params = { NVPW_D3D12_Profiler_IsGpuSupported_Params_STRUCT_SIZE };
|
||||
params.deviceIndex = deviceIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_IsGpuSupported(¶ms);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_D3D12_Profiler_IsGpuSupported failed on %ls\n", D3D12GetDeviceName(pDevice).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!params.isSupported)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "%ls is not supported\n", D3D12GetDeviceName(pDevice).c_str());
|
||||
if (params.gpuArchitectureSupportLevel != NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED)
|
||||
{
|
||||
const DeviceIdentifiers deviceIdentifiers = D3D12GetDeviceIdentifiers(pDevice, sliIndex);
|
||||
NV_PERF_LOG_ERR(10, "Unsupported GPU architecture %s\n", deviceIdentifiers.pChipName);
|
||||
}
|
||||
if (params.sliSupportLevel == NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Devices in SLI configuration are not supported.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool D3D12IsGpuSupported(ID3D12CommandQueue* pCommandQueue, size_t sliIndex = 0)
|
||||
{
|
||||
CComPtr<ID3D12Device> pDevice;
|
||||
HRESULT hr = pCommandQueue->GetDevice(IID_PPV_ARGS(&pDevice));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool isGpuSupported = D3D12IsGpuSupported(pDevice, sliIndex);
|
||||
return isGpuSupported;
|
||||
}
|
||||
|
||||
|
||||
inline bool D3D12PushRange(ID3D12GraphicsCommandList* pCommandList, const char* pRangeName)
|
||||
{
|
||||
NVPW_D3D12_Profiler_CommandList_PushRange_Params pushRangeParams = { NVPW_D3D12_Profiler_CommandList_PushRange_Params_STRUCT_SIZE };
|
||||
pushRangeParams.pRangeName = pRangeName;
|
||||
pushRangeParams.rangeNameLength = 0;
|
||||
pushRangeParams.pCommandList = pCommandList;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_CommandList_PushRange(&pushRangeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "NVPW_D3D12_Profiler_CommandList_PushRange failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool D3D12PopRange(ID3D12GraphicsCommandList* pCommandList)
|
||||
{
|
||||
NVPW_D3D12_Profiler_CommandList_PopRange_Params popParams = { NVPW_D3D12_Profiler_CommandList_PopRange_Params_STRUCT_SIZE };
|
||||
popParams.pCommandList = pCommandList;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_CommandList_PopRange(&popParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "NVPW_D3D12_Profiler_CommandList_PopRange failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool D3D12PushRange_Nop(ID3D12GraphicsCommandList* pCommandList, const char* pRangeName)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool D3D12PopRange_Nop(ID3D12GraphicsCommandList* pCommandList)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
struct D3D12RangeCommands
|
||||
{
|
||||
bool isNvidiaDevice;
|
||||
bool(*PushRange)(ID3D12GraphicsCommandList* pCommandList, const char* pRangeName);
|
||||
bool(*PopRange)(ID3D12GraphicsCommandList* pCommandList);
|
||||
|
||||
public:
|
||||
D3D12RangeCommands()
|
||||
: isNvidiaDevice(false)
|
||||
, PushRange(&D3D12PushRange_Nop)
|
||||
, PopRange(&D3D12PopRange_Nop)
|
||||
{
|
||||
}
|
||||
|
||||
void Initialize(bool isNvidiaDevice_)
|
||||
{
|
||||
isNvidiaDevice = isNvidiaDevice_;
|
||||
if (isNvidiaDevice_)
|
||||
{
|
||||
PushRange = &D3D12PushRange;
|
||||
PopRange = &D3D12PopRange;
|
||||
}
|
||||
else
|
||||
{
|
||||
PushRange = &D3D12PushRange_Nop;
|
||||
PopRange = &D3D12PopRange_Nop;
|
||||
}
|
||||
}
|
||||
|
||||
void Initialize(IDXGIAdapter* pDXGIAdapter)
|
||||
{
|
||||
const bool isNvidiaDevice_ = DxgiIsNvidiaDevice(pDXGIAdapter);
|
||||
return Initialize(isNvidiaDevice_);
|
||||
}
|
||||
|
||||
void Initialize(ID3D12Device* pDevice)
|
||||
{
|
||||
const bool isNvidiaDevice_ = D3D12IsNvidiaDevice(pDevice);
|
||||
return Initialize(isNvidiaDevice_);
|
||||
}
|
||||
};
|
||||
|
||||
}}}
|
||||
125
ruins64k/tools/NvPerfUtility/include/NvPerfDeviceProperties.h
Normal file
125
ruins64k/tools/NvPerfUtility/include/NvPerfDeviceProperties.h
Normal file
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "nvperf_host.h"
|
||||
#include "nvperf_target.h"
|
||||
#include "NvPerfInit.h"
|
||||
#include <vector>
|
||||
|
||||
namespace nv { namespace perf {
|
||||
enum
|
||||
{
|
||||
NVIDIA_VENDOR_ID = 0x10de
|
||||
};
|
||||
|
||||
struct DeviceIdentifiers
|
||||
{
|
||||
const char* pDeviceName;
|
||||
const char* pChipName;
|
||||
};
|
||||
|
||||
inline DeviceIdentifiers GetDeviceIdentifiers(size_t deviceIndex)
|
||||
{
|
||||
NVPW_Device_GetNames_Params getNamesParams = { NVPW_Device_GetNames_Params_STRUCT_SIZE };
|
||||
getNamesParams.deviceIndex = deviceIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_Device_GetNames(&getNamesParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_Device_GetNames failed\n");
|
||||
return {};
|
||||
}
|
||||
|
||||
DeviceIdentifiers deviceIdentifiers = {};
|
||||
deviceIdentifiers.pDeviceName = getNamesParams.pDeviceName;
|
||||
deviceIdentifiers.pChipName = getNamesParams.pChipName;
|
||||
|
||||
return deviceIdentifiers;
|
||||
}
|
||||
|
||||
inline NVPW_Device_ClockStatus GetDeviceClockState(size_t nvperfDeviceIndex)
|
||||
{
|
||||
NVPW_Device_GetClockStatus_Params getClockStatusParams = { NVPW_Device_GetClockStatus_Params_STRUCT_SIZE };
|
||||
getClockStatusParams.deviceIndex = nvperfDeviceIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_Device_GetClockStatus(&getClockStatusParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_Device_GetClockStatus() failed on %s\n", GetDeviceIdentifiers(nvperfDeviceIndex).pDeviceName);
|
||||
return NVPW_DEVICE_CLOCK_STATUS_UNKNOWN;
|
||||
}
|
||||
return getClockStatusParams.clockStatus;
|
||||
}
|
||||
|
||||
inline const char* ToCString(NVPW_Device_ClockSetting clockSetting)
|
||||
{
|
||||
switch(clockSetting)
|
||||
{
|
||||
case NVPW_DEVICE_CLOCK_SETTING_INVALID: return "NVPW_DEVICE_CLOCK_SETTING_INVALID";
|
||||
case NVPW_DEVICE_CLOCK_SETTING_DEFAULT: return "NVPW_DEVICE_CLOCK_SETTING_DEFAULT";
|
||||
case NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_RATED_TDP: return "NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_RATED_TDP";
|
||||
default: return "Unknown NVPW_Device_ClockSetting";
|
||||
}
|
||||
}
|
||||
|
||||
inline bool SetDeviceClockState(size_t nvperfDeviceIndex, NVPW_Device_ClockSetting clockSetting)
|
||||
{
|
||||
NVPW_Device_SetClockSetting_Params setClockSettingParams = { NVPW_Device_SetClockSetting_Params_STRUCT_SIZE };
|
||||
setClockSettingParams.deviceIndex = nvperfDeviceIndex;
|
||||
setClockSettingParams.clockSetting = clockSetting;
|
||||
NVPA_Status nvpaStatus = NVPW_Device_SetClockSetting(&setClockSettingParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_Device_SetClockSetting( %s ) failed on %s\n", ToCString(clockSetting), GetDeviceIdentifiers(nvperfDeviceIndex).pDeviceName);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline const char* ToCString(NVPW_Device_ClockStatus clockStatus)
|
||||
{
|
||||
switch(clockStatus)
|
||||
{
|
||||
case NVPW_DEVICE_CLOCK_STATUS_UNKNOWN: return "NVPW_DEVICE_CLOCK_STATUS_UNKNOWN";
|
||||
case NVPW_DEVICE_CLOCK_STATUS_LOCKED_TO_RATED_TDP: return "NVPW_DEVICE_CLOCK_STATUS_LOCKED_TO_RATED_TDP";
|
||||
case NVPW_DEVICE_CLOCK_STATUS_BOOST_ENABLED: return "NVPW_DEVICE_CLOCK_STATUS_BOOST_ENABLED";
|
||||
case NVPW_DEVICE_CLOCK_STATUS_BOOST_DISABLED: return "NVPW_DEVICE_CLOCK_STATUS_BOOST_DISABLED";
|
||||
case NVPW_DEVICE_CLOCK_STATUS__COUNT: return "NVPW_DEVICE_CLOCK_STATUS__COUNT";
|
||||
default: return "Unknown NVPW_Device_ClockStatus";
|
||||
}
|
||||
}
|
||||
|
||||
inline bool SetDeviceClockState(size_t nvperfDeviceIndex, NVPW_Device_ClockStatus clockStatus)
|
||||
{
|
||||
// convert to NVPW_Device_ClockSetting
|
||||
NVPW_Device_ClockSetting clockSetting = NVPW_DEVICE_CLOCK_SETTING_INVALID;
|
||||
switch (clockStatus)
|
||||
{
|
||||
case NVPW_DEVICE_CLOCK_STATUS_UNKNOWN:
|
||||
case NVPW_DEVICE_CLOCK_STATUS_BOOST_ENABLED:
|
||||
case NVPW_DEVICE_CLOCK_STATUS_BOOST_DISABLED:
|
||||
// default driver setting (normally unlocked and not boosted, but could be unlocked boosted, or locked to rated TDP)
|
||||
clockSetting = NVPW_DEVICE_CLOCK_SETTING_DEFAULT;
|
||||
break;
|
||||
case NVPW_DEVICE_CLOCK_STATUS_LOCKED_TO_RATED_TDP:
|
||||
clockSetting = NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_RATED_TDP;
|
||||
break;
|
||||
default:
|
||||
NV_PERF_LOG_ERR(10, "Invalid clockStatus: %s\n", ToCString(clockStatus));
|
||||
return false;
|
||||
}
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockSetting);
|
||||
}
|
||||
}}
|
||||
432
ruins64k/tools/NvPerfUtility/include/NvPerfInit.h
Normal file
432
ruins64k/tools/NvPerfUtility/include/NvPerfInit.h
Normal file
@@ -0,0 +1,432 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <string>
|
||||
#include <cassert>
|
||||
#include "nvperf_host.h"
|
||||
#include "nvperf_target.h"
|
||||
#if defined(_WIN32)
|
||||
#include <Windows.h>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
inline int FormatTimeCommon(char* pBuf, size_t size, uint32_t hour, uint32_t minute, uint32_t second, uint32_t milliSecond)
|
||||
{
|
||||
const int written = snprintf(pBuf, size, "%02u:%02u:%02u:%03u", hour, minute, second, milliSecond);
|
||||
return written;
|
||||
}
|
||||
|
||||
inline int FormatDateCommon(char* pBuf, size_t size, uint32_t year, uint32_t month, uint32_t day)
|
||||
{
|
||||
const char* pMonth = [&](){
|
||||
static const char* s_months[12] = {
|
||||
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
|
||||
};
|
||||
|
||||
if (1 <= month && month <= 12)
|
||||
{
|
||||
return s_months[month - 1];
|
||||
}
|
||||
return "???";
|
||||
}();
|
||||
const int written = snprintf(pBuf, size, "%4u-%s-%02u", year, pMonth, day);
|
||||
return written;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
typedef struct _FILETIME LogTimeStamp;
|
||||
|
||||
inline void UserLogImplPlatform(const char* pMessage)
|
||||
{
|
||||
OutputDebugStringA(pMessage);
|
||||
}
|
||||
|
||||
inline void GetTimeStamp(LogTimeStamp* pTimestamp)
|
||||
{
|
||||
GetSystemTimeAsFileTime(pTimestamp);
|
||||
}
|
||||
|
||||
inline size_t FormatTime(LogTimeStamp* pTimestamp, char* pBuf, size_t size)
|
||||
{
|
||||
SYSTEMTIME utc, stime;
|
||||
FileTimeToSystemTime(pTimestamp, &utc);
|
||||
SystemTimeToTzSpecificLocalTime(NULL, &utc, &stime);
|
||||
return FormatTimeCommon(pBuf, size, (uint32_t)stime.wHour, (uint32_t)stime.wMinute, (uint32_t)stime.wSecond, (uint32_t)stime.wMilliseconds);
|
||||
}
|
||||
|
||||
inline size_t FormatDate(LogTimeStamp* pTimestamp, char* pBuf, size_t size)
|
||||
{
|
||||
SYSTEMTIME utc, stime;
|
||||
FileTimeToSystemTime(pTimestamp, &utc);
|
||||
SystemTimeToTzSpecificLocalTime(NULL, &utc, &stime);
|
||||
return FormatDateCommon(pBuf, size, (uint32_t)stime.wYear, (uint32_t)stime.wMonth, (uint32_t)stime.wDay);
|
||||
}
|
||||
#else // !defined(_WIN32)
|
||||
typedef struct timeval LogTimeStamp;
|
||||
|
||||
inline void UserLogImplPlatform(const char* pMessage)
|
||||
{
|
||||
(void*)pMessage;
|
||||
}
|
||||
|
||||
inline void GetTimeStamp(LogTimeStamp* pTimestamp)
|
||||
{
|
||||
gettimeofday(pTimestamp, 0);
|
||||
}
|
||||
|
||||
inline size_t FormatTime(LogTimeStamp* pTimestamp, char* pBuf, size_t size)
|
||||
{
|
||||
const struct tm* ltm = localtime(&pTimestamp->tv_sec);
|
||||
int milliseconds = pTimestamp->tv_usec / 1000;
|
||||
return FormatTimeCommon(pBuf, size, (uint32_t)ltm->tm_hour, (uint32_t)ltm->tm_min, (uint32_t)ltm->tm_sec, (uint32_t)milliseconds);
|
||||
}
|
||||
|
||||
inline size_t FormatDate(LogTimeStamp* pTimestamp, char* pBuf, size_t size)
|
||||
{
|
||||
const struct tm* ltm = localtime(&pTimestamp->tv_sec);
|
||||
return FormatDateCommon(pBuf, size, (uint32_t)ltm->tm_year + 1900, (uint32_t)ltm->tm_mon + 1, (uint32_t)ltm->tm_mday);
|
||||
}
|
||||
#endif // defined(_WIN32)
|
||||
|
||||
}}
|
||||
|
||||
#ifndef NV_PERF_LOG_INF
|
||||
#define NV_PERF_LOG_INF(level_, ...) ::nv::perf::UserLog(LogSeverity::Inf, level_, __FUNCTION__, __VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifndef NV_PERF_LOG_WRN
|
||||
#define NV_PERF_LOG_WRN(level_, ...) ::nv::perf::UserLog(LogSeverity::Wrn, level_, __FUNCTION__, __VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifndef NV_PERF_LOG_ERR
|
||||
#define NV_PERF_LOG_ERR(level_, ...) ::nv::perf::UserLog(LogSeverity::Err, level_, __FUNCTION__, __VA_ARGS__)
|
||||
#endif
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
enum class LogSeverity
|
||||
{
|
||||
Inf,
|
||||
Wrn,
|
||||
Err,
|
||||
COUNT
|
||||
};
|
||||
|
||||
struct LogSettings
|
||||
{
|
||||
uint32_t volumeLevels[(unsigned)LogSeverity::COUNT] = { 50, 50, 50 };
|
||||
|
||||
#if defined(_WIN32)
|
||||
bool writePlatform = true;
|
||||
#else
|
||||
bool writePlatform = false;
|
||||
#endif
|
||||
bool writeStderr = true;
|
||||
FILE* writeFileFD = nullptr;
|
||||
bool appendToFile = true;
|
||||
LogSeverity flushFileSeverity = LogSeverity::Err;
|
||||
|
||||
bool logDate = true;
|
||||
bool logTime = true;
|
||||
|
||||
LogSettings()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
{
|
||||
const char* const pEnvValue = getenv("NV_PERF_LOG_ENABLE_PLATFORM");
|
||||
if (pEnvValue)
|
||||
{
|
||||
char* pEnd = nullptr;
|
||||
writePlatform = !!strtol(pEnvValue, &pEnd, 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
const char* const pEnvValue = getenv("NV_PERF_LOG_ENABLE_STDERR");
|
||||
if (pEnvValue)
|
||||
{
|
||||
char* pEnd = nullptr;
|
||||
writeStderr = !!strtol(pEnvValue, &pEnd, 0);
|
||||
}
|
||||
}
|
||||
{
|
||||
const char* const pEnvValue = getenv("NV_PERF_LOG_ENABLE_FILE");
|
||||
if (pEnvValue)
|
||||
{
|
||||
FILE* fp = fopen(pEnvValue, appendToFile ? "a" : "w");
|
||||
assert(fp);
|
||||
writeFileFD = fp;
|
||||
}
|
||||
}
|
||||
{
|
||||
const char* const pEnvValue = getenv("NV_PERF_LOG_FILE_FLUSH_SEVERITY");
|
||||
if (pEnvValue)
|
||||
{
|
||||
char* pEnd = nullptr;
|
||||
int severity = strtol(pEnvValue, &pEnd, 0);
|
||||
if (0 <= severity && severity < (int)LogSeverity::COUNT)
|
||||
{
|
||||
flushFileSeverity = (LogSeverity)severity;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~LogSettings()
|
||||
{
|
||||
if (writeFileFD)
|
||||
{
|
||||
fclose(writeFileFD);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
inline LogSettings* GetLogSettingsStorage_()
|
||||
{
|
||||
static LogSettings settings;
|
||||
return &settings;
|
||||
}
|
||||
|
||||
inline uint32_t GetLogVolumeLevel(LogSeverity severity)
|
||||
{
|
||||
LogSettings* pSettings = GetLogSettingsStorage_();
|
||||
if ((uint32_t)severity < 3)
|
||||
{
|
||||
return pSettings->volumeLevels[(uint32_t)severity];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Higher values produce more log output. 0 <= volumeLevel <= 100
|
||||
// Technically it's more like a noise floor (all messages below this level are treated as noise and discarded).
|
||||
inline void SetLogVolumeLevel(LogSeverity severity, uint32_t volumeLevel)
|
||||
{
|
||||
LogSettings* pSettings = GetLogSettingsStorage_();
|
||||
if ((uint32_t)severity < 3)
|
||||
{
|
||||
pSettings->volumeLevels[(uint32_t)severity] = volumeLevel;
|
||||
}
|
||||
}
|
||||
|
||||
inline void SetLogAppendToFile(bool enable)
|
||||
{
|
||||
LogSettings* pSettings = GetLogSettingsStorage_();
|
||||
pSettings->appendToFile = enable;
|
||||
}
|
||||
|
||||
inline void SetLogFlushSeverity(LogSeverity severity)
|
||||
{
|
||||
LogSettings* pSettings = GetLogSettingsStorage_();
|
||||
if (0 <= (int)severity && (int)severity < (int)LogSeverity::COUNT)
|
||||
{
|
||||
pSettings->flushFileSeverity = severity;
|
||||
}
|
||||
}
|
||||
|
||||
inline void SetLogDate(bool enable)
|
||||
{
|
||||
LogSettings* pSettings = GetLogSettingsStorage_();
|
||||
pSettings->logDate = enable;
|
||||
}
|
||||
|
||||
inline void SetLogTime(bool enable)
|
||||
{
|
||||
LogSettings* pSettings = GetLogSettingsStorage_();
|
||||
pSettings->logTime = enable;
|
||||
}
|
||||
|
||||
inline bool UserLogEnablePlatform(bool enable)
|
||||
{
|
||||
LogSettings* pSettings = GetLogSettingsStorage_();
|
||||
pSettings->writePlatform = enable;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool UserLogEnableStderr(bool enable)
|
||||
{
|
||||
LogSettings* pSettings = GetLogSettingsStorage_();
|
||||
pSettings->writeStderr = enable;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool UserLogEnableFile(const char* filename)
|
||||
{
|
||||
LogSettings* pSettings = GetLogSettingsStorage_();
|
||||
if (filename)
|
||||
{
|
||||
FILE* fp = fopen(filename, pSettings->appendToFile ? "a" : "w");
|
||||
if (!fp)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
pSettings->writeFileFD = fp;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void UserLogImplStderr(const char* pMessage)
|
||||
{
|
||||
fprintf(stderr, "%s", pMessage);
|
||||
}
|
||||
|
||||
inline void UserLogImplFile(const char* pMessage, FILE* fd)
|
||||
{
|
||||
fprintf(fd, "%s", pMessage);
|
||||
}
|
||||
|
||||
inline void UserLogImplFileFlush(FILE* fd)
|
||||
{
|
||||
fflush(fd);
|
||||
}
|
||||
|
||||
inline void UserLog(LogSeverity severity, uint32_t level, const char* pFunctionName, const char* pFormat, ...)
|
||||
{
|
||||
const uint32_t volumeLevel = GetLogVolumeLevel(severity);
|
||||
if (volumeLevel < level)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
LogSettings& settings = *GetLogSettingsStorage_();
|
||||
|
||||
va_list args;
|
||||
|
||||
va_start(args, pFormat);
|
||||
const int length = vsnprintf(nullptr, 0, pFormat, args);
|
||||
va_end(args);
|
||||
|
||||
std::string str;
|
||||
str.append(length + 1, ' ');
|
||||
va_start(args, pFormat);
|
||||
vsnprintf(&str[0], length+1, pFormat, args);
|
||||
va_end(args);
|
||||
str.back() = '\0'; // ensure NULL terminated
|
||||
|
||||
const char* const pPrefix = [&]() {
|
||||
switch (severity)
|
||||
{
|
||||
case (LogSeverity::Inf): return "NVPERF|INF|";
|
||||
case (LogSeverity::Wrn): return "NVPERF|WRN|";
|
||||
case (LogSeverity::Err): return "NVPERF|ERR|";
|
||||
default: return "NVPERF|???|";
|
||||
}
|
||||
}();
|
||||
|
||||
char datebuf[16];
|
||||
char timebuf[16];
|
||||
if (settings.logDate || settings.logTime)
|
||||
{
|
||||
LogTimeStamp time;
|
||||
GetTimeStamp(&time);
|
||||
if (settings.logDate)
|
||||
{
|
||||
FormatDate(&time, datebuf, sizeof(datebuf));
|
||||
}
|
||||
if (settings.logTime)
|
||||
{
|
||||
FormatTime(&time, timebuf, sizeof(timebuf));
|
||||
}
|
||||
}
|
||||
|
||||
if (settings.writePlatform)
|
||||
{
|
||||
UserLogImplPlatform(pPrefix);
|
||||
if (settings.logDate)
|
||||
{
|
||||
UserLogImplPlatform(datebuf);
|
||||
UserLogImplPlatform("|");
|
||||
}
|
||||
if (settings.logTime)
|
||||
{
|
||||
UserLogImplPlatform(timebuf);
|
||||
UserLogImplPlatform("|");
|
||||
}
|
||||
UserLogImplPlatform(pFunctionName);
|
||||
UserLogImplPlatform(" || ");
|
||||
UserLogImplPlatform(str.c_str());
|
||||
}
|
||||
if (settings.writeStderr)
|
||||
{
|
||||
UserLogImplStderr(pPrefix);
|
||||
if (settings.logDate)
|
||||
{
|
||||
UserLogImplStderr(datebuf);
|
||||
UserLogImplStderr("|");
|
||||
}
|
||||
if (settings.logTime)
|
||||
{
|
||||
UserLogImplStderr(timebuf);
|
||||
UserLogImplStderr("|");
|
||||
}
|
||||
UserLogImplStderr(pFunctionName);
|
||||
UserLogImplStderr(" || ");
|
||||
UserLogImplStderr(str.c_str());
|
||||
}
|
||||
if (settings.writeFileFD)
|
||||
{
|
||||
UserLogImplFile(pPrefix, settings.writeFileFD);
|
||||
if (settings.logDate)
|
||||
{
|
||||
UserLogImplFile(datebuf, settings.writeFileFD);
|
||||
UserLogImplFile("|", settings.writeFileFD);
|
||||
}
|
||||
if (settings.logTime)
|
||||
{
|
||||
UserLogImplFile(timebuf, settings.writeFileFD);
|
||||
UserLogImplFile("|", settings.writeFileFD);
|
||||
}
|
||||
UserLogImplFile(pFunctionName, settings.writeFileFD);
|
||||
UserLogImplFile(" || ", settings.writeFileFD);
|
||||
UserLogImplFile(str.c_str(), settings.writeFileFD);
|
||||
if (severity >= settings.flushFileSeverity)
|
||||
{
|
||||
UserLogImplFileFlush(settings.writeFileFD);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline bool InitializeNvPerf()
|
||||
{
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
NVPW_InitializeHost_Params initializeHostParams = { NVPW_InitializeHost_Params_STRUCT_SIZE };
|
||||
nvpaStatus = NVPW_InitializeHost(&initializeHostParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_InitalizeHost failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_InitializeTarget_Params initializeTargetParams = { NVPW_InitializeTarget_Params_STRUCT_SIZE };
|
||||
nvpaStatus = NVPW_InitializeTarget(&initializeTargetParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_InitializeTarget failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}}
|
||||
@@ -0,0 +1,299 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include "NvPerfMetricsEvaluator.h"
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
class MetricsConfigBuilder
|
||||
{
|
||||
protected:
|
||||
NVPW_MetricsEvaluator* m_pMetricsEvaluator; // not owned
|
||||
NVPA_RawMetricsConfig* m_pRawMetricsConfig; // owned
|
||||
NVPA_CounterDataBuilder* m_pCounterDataBuilder; // owned
|
||||
bool m_configuring;
|
||||
|
||||
protected:
|
||||
void MoveAssign(MetricsConfigBuilder&& rhs)
|
||||
{
|
||||
Reset();
|
||||
m_pMetricsEvaluator = rhs.m_pMetricsEvaluator;
|
||||
m_pRawMetricsConfig = rhs.m_pRawMetricsConfig;
|
||||
m_pCounterDataBuilder = rhs.m_pCounterDataBuilder;
|
||||
m_configuring = rhs.m_configuring;
|
||||
|
||||
rhs.m_pMetricsEvaluator = nullptr;
|
||||
rhs.m_pRawMetricsConfig = nullptr;
|
||||
rhs.m_pCounterDataBuilder = nullptr;
|
||||
}
|
||||
|
||||
public:
|
||||
~MetricsConfigBuilder()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
MetricsConfigBuilder() : m_pMetricsEvaluator(nullptr), m_pRawMetricsConfig(nullptr), m_pCounterDataBuilder(nullptr), m_configuring(false)
|
||||
{
|
||||
}
|
||||
MetricsConfigBuilder(MetricsConfigBuilder&& rhs) : m_pMetricsEvaluator(nullptr), m_pRawMetricsConfig(nullptr), m_pCounterDataBuilder(nullptr), m_configuring(false)
|
||||
{
|
||||
MoveAssign(std::forward<MetricsConfigBuilder>(rhs));
|
||||
}
|
||||
MetricsConfigBuilder& operator=(MetricsConfigBuilder&& rhs)
|
||||
{
|
||||
MoveAssign(std::forward<MetricsConfigBuilder>(rhs));
|
||||
return *this;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigParams = { NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE };
|
||||
rawMetricsConfigParams.pRawMetricsConfig = m_pRawMetricsConfig;
|
||||
NVPW_RawMetricsConfig_Destroy(&rawMetricsConfigParams);
|
||||
|
||||
NVPW_CounterDataBuilder_Destroy_Params counterDataBuilderParams = { NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE };
|
||||
counterDataBuilderParams.pCounterDataBuilder = m_pCounterDataBuilder;
|
||||
NVPW_CounterDataBuilder_Destroy(&counterDataBuilderParams);
|
||||
|
||||
m_pMetricsEvaluator = nullptr;
|
||||
m_pRawMetricsConfig = nullptr;
|
||||
m_pCounterDataBuilder = nullptr;
|
||||
}
|
||||
|
||||
bool Initialize(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPA_RawMetricsConfig* pRawMetricsConfig, const char* chipName)
|
||||
{
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
Reset(); // destroy any existing objects
|
||||
m_pMetricsEvaluator = pMetricsEvaluator;
|
||||
m_pRawMetricsConfig = pRawMetricsConfig;
|
||||
NVPW_CounterDataBuilder_Create_Params counterDataBuilderParams = { NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE };
|
||||
counterDataBuilderParams.pChipName = chipName;
|
||||
nvpaStatus = NVPW_CounterDataBuilder_Create(&counterDataBuilderParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
m_pCounterDataBuilder = counterDataBuilderParams.pCounterDataBuilder;
|
||||
|
||||
NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = { NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE };
|
||||
beginPassGroupParams.pRawMetricsConfig = m_pRawMetricsConfig;
|
||||
nvpaStatus = NVPW_RawMetricsConfig_BeginPassGroup(&beginPassGroupParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
m_configuring = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AddMetrics(const NVPW_MetricEvalRequest* pMetricEvalRequests, size_t numMetricEvalRequests)
|
||||
{
|
||||
NVPA_Status nvpaStatus;
|
||||
NVPW_MetricsEvaluator_GetMetricRawDependencies_Params getMetricRawDependenciesParams = { NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE };
|
||||
getMetricRawDependenciesParams.pMetricsEvaluator = m_pMetricsEvaluator;
|
||||
getMetricRawDependenciesParams.pMetricEvalRequests = pMetricEvalRequests;
|
||||
getMetricRawDependenciesParams.numMetricEvalRequests = numMetricEvalRequests;
|
||||
getMetricRawDependenciesParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
|
||||
getMetricRawDependenciesParams.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest);
|
||||
nvpaStatus = NVPW_MetricsEvaluator_GetMetricRawDependencies(&getMetricRawDependenciesParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "NVPW_MetricsEvaluator_GetMetricRawDependencies failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<const char*> rawDependencies(getMetricRawDependenciesParams.numRawDependencies);
|
||||
getMetricRawDependenciesParams.ppRawDependencies = rawDependencies.data();
|
||||
nvpaStatus = NVPW_MetricsEvaluator_GetMetricRawDependencies(&getMetricRawDependenciesParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "NVPW_MetricsEvaluator_GetMetricRawDependencies failed\n");
|
||||
return false;
|
||||
}
|
||||
for (const char* const pRawMetricName : rawDependencies)
|
||||
{
|
||||
NVPA_RawMetricRequest rawMetricRequest = { NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE };
|
||||
rawMetricRequest.pMetricName = pRawMetricName;
|
||||
rawMetricRequest.isolated = true;
|
||||
rawMetricRequest.keepInstances = true;
|
||||
|
||||
NVPW_CounterDataBuilder_AddMetrics_Params addMetricParams = { NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE };
|
||||
addMetricParams.numMetricRequests = 1;
|
||||
addMetricParams.pCounterDataBuilder = m_pCounterDataBuilder;
|
||||
addMetricParams.pRawMetricRequests = &rawMetricRequest;
|
||||
nvpaStatus = NVPW_CounterDataBuilder_AddMetrics(&addMetricParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "NVPW_CounterDataBuilder_AddMetrics failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_RawMetricsConfig_AddMetrics_Params configAddMetricParams = { NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE };
|
||||
configAddMetricParams.numMetricRequests = 1;
|
||||
configAddMetricParams.pRawMetricRequests = &rawMetricRequest;
|
||||
configAddMetricParams.pRawMetricsConfig = m_pRawMetricsConfig;
|
||||
nvpaStatus = NVPW_RawMetricsConfig_AddMetrics(&configAddMetricParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "NVPW_RawMetricsConfig_AddMetrics failed\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AddMetric(const char* pMetricName)
|
||||
{
|
||||
NVPW_MetricEvalRequest metricEvalRequest{};
|
||||
bool success = ToMetricEvalRequest(m_pMetricsEvaluator, pMetricName, metricEvalRequest);
|
||||
if (!success)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "ToMetricEvalRequest failed for metric: %s\n", pMetricName);
|
||||
return false;
|
||||
}
|
||||
success = AddMetrics(&metricEvalRequest, 1);
|
||||
if (!success)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "AddMetrics failed for metric: %s\n", pMetricName);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AddMetrics(const char* const pMetricNames[], size_t numMetrics)
|
||||
{
|
||||
bool success = true;
|
||||
for (size_t metricIdx = 0; metricIdx < numMetrics; ++metricIdx)
|
||||
{
|
||||
const bool addMetricSuccess = AddMetric(pMetricNames[metricIdx]);
|
||||
if (!addMetricSuccess)
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
if (!success)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PrepareConfigImage()
|
||||
{
|
||||
NVPA_Status nvpaStatus;
|
||||
m_configuring = false;
|
||||
|
||||
NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParam = { NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE };
|
||||
endPassGroupParam.pRawMetricsConfig = m_pRawMetricsConfig;
|
||||
nvpaStatus = NVPW_RawMetricsConfig_EndPassGroup(&endPassGroupParam);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_RawMetricsConfig_GenerateConfigImage_Params generateConfigImageParam = { NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE };
|
||||
generateConfigImageParam.pRawMetricsConfig = m_pRawMetricsConfig;
|
||||
nvpaStatus = NVPW_RawMetricsConfig_GenerateConfigImage(&generateConfigImageParam);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Start a new PassGroup so that subsequent AddMetrics() calls will succeed.
|
||||
// This will not result in optimal scheduling, but it obeys the principle of least surprise.
|
||||
NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = { NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE };
|
||||
beginPassGroupParams.pRawMetricsConfig = m_pRawMetricsConfig;
|
||||
nvpaStatus = NVPW_RawMetricsConfig_BeginPassGroup(&beginPassGroupParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
m_configuring = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns the buffer size needed for the ConfigImage, or zero on error.
|
||||
size_t GetConfigImageSize() const
|
||||
{
|
||||
NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParam = { NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE };
|
||||
getConfigImageParam.pBuffer = nullptr;
|
||||
getConfigImageParam.bytesAllocated = 0;
|
||||
getConfigImageParam.pRawMetricsConfig = m_pRawMetricsConfig;
|
||||
NVPA_Status nvpaStatus = NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParam);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
return getConfigImageParam.bytesCopied;
|
||||
}
|
||||
|
||||
// Copies the generated ConfigImage into pBuffer.
|
||||
bool GetConfigImage(size_t bufferSize, uint8_t* pBuffer) const
|
||||
{
|
||||
NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParam = { NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE };
|
||||
getConfigImageParam.pRawMetricsConfig = m_pRawMetricsConfig;
|
||||
getConfigImageParam.bytesAllocated = bufferSize;
|
||||
getConfigImageParam.pBuffer = pBuffer;
|
||||
NVPA_Status nvpaStatus = NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParam);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns the buffer size needed for the CounterDataPrefix, or zero on error.
|
||||
size_t GetCounterDataPrefixSize() const
|
||||
{
|
||||
NVPW_CounterDataBuilder_GetCounterDataPrefix_Params getCounterDataPrefixParams = { NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE };
|
||||
getCounterDataPrefixParams.bytesAllocated = 0;
|
||||
getCounterDataPrefixParams.pBuffer = nullptr;
|
||||
getCounterDataPrefixParams.pCounterDataBuilder = m_pCounterDataBuilder;
|
||||
NVPA_Status nvpaStatus = NVPW_CounterDataBuilder_GetCounterDataPrefix(&getCounterDataPrefixParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
return getCounterDataPrefixParams.bytesCopied;
|
||||
}
|
||||
|
||||
// Copies the generated CounterDataPrefix into pBuffer.
|
||||
bool GetCounterDataPrefix(size_t bufferSize, uint8_t* pBuffer) const
|
||||
{
|
||||
NVPW_CounterDataBuilder_GetCounterDataPrefix_Params getCounterDataPrefixParams = { NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE };
|
||||
getCounterDataPrefixParams.bytesAllocated = bufferSize;
|
||||
getCounterDataPrefixParams.pBuffer = pBuffer;
|
||||
getCounterDataPrefixParams.pCounterDataBuilder = m_pCounterDataBuilder;
|
||||
NVPA_Status nvpaStatus = NVPW_CounterDataBuilder_GetCounterDataPrefix(&getCounterDataPrefixParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
}}
|
||||
766
ruins64k/tools/NvPerfUtility/include/NvPerfMetricsEvaluator.h
Normal file
766
ruins64k/tools/NvPerfUtility/include/NvPerfMetricsEvaluator.h
Normal file
@@ -0,0 +1,766 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "NvPerfInit.h"
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
// Smart Pointer for NVPW_MetricsEvaluator
|
||||
class MetricsEvaluator
|
||||
{
|
||||
protected:
|
||||
NVPW_MetricsEvaluator* m_pMetricsEvaluator;
|
||||
std::vector<uint8_t> m_scratchBuffer;
|
||||
|
||||
private:
|
||||
// Prevent accidental use of "delete" keyword on this class' implicit conversions.
|
||||
// Introducing a second 'operator CompileErrorOnOperatorDelete*()' triggers an 'ambiguous conversion to void*'
|
||||
// on the 'delete', which catches the usage error at compile time. c.f. http://stackoverflow.com/a/3312507
|
||||
struct CompileErrorOnOperatorDelete;
|
||||
operator CompileErrorOnOperatorDelete*() const;
|
||||
|
||||
private:
|
||||
// non-copyable
|
||||
MetricsEvaluator(const MetricsEvaluator& rhs);
|
||||
MetricsEvaluator& operator=(const MetricsEvaluator& rhs);
|
||||
|
||||
public:
|
||||
~MetricsEvaluator()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
MetricsEvaluator()
|
||||
: m_pMetricsEvaluator()
|
||||
{
|
||||
}
|
||||
|
||||
// takes the ownership
|
||||
MetricsEvaluator(NVPW_MetricsEvaluator* pMetricsEvaluator, std::vector<uint8_t>&& scratchBuffer)
|
||||
: m_pMetricsEvaluator(pMetricsEvaluator)
|
||||
, m_scratchBuffer(std::move(scratchBuffer))
|
||||
{
|
||||
scratchBuffer.clear();
|
||||
}
|
||||
|
||||
MetricsEvaluator(MetricsEvaluator&& evaluator)
|
||||
: m_pMetricsEvaluator(evaluator.m_pMetricsEvaluator)
|
||||
, m_scratchBuffer(std::move(evaluator.m_scratchBuffer))
|
||||
{
|
||||
evaluator.m_pMetricsEvaluator = nullptr;
|
||||
evaluator.m_scratchBuffer.clear();
|
||||
}
|
||||
|
||||
MetricsEvaluator& operator=(MetricsEvaluator&& evaluator)
|
||||
{
|
||||
Reset();
|
||||
m_pMetricsEvaluator = evaluator.m_pMetricsEvaluator;
|
||||
m_scratchBuffer = std::move(evaluator.m_scratchBuffer);
|
||||
evaluator.m_pMetricsEvaluator = nullptr;
|
||||
evaluator.m_scratchBuffer.clear();
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator NVPW_MetricsEvaluator*() const
|
||||
{
|
||||
return m_pMetricsEvaluator;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
if (m_pMetricsEvaluator != nullptr)
|
||||
{
|
||||
NVPW_MetricsEvaluator_Destroy_Params destroyParams = { NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE };
|
||||
destroyParams.pMetricsEvaluator = m_pMetricsEvaluator;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_Destroy(&destroyParams);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_ERR(80, "NVPW_MetricsEvaluator_Destroy failed\n");
|
||||
}
|
||||
m_pMetricsEvaluator = nullptr;
|
||||
}
|
||||
m_scratchBuffer.clear();
|
||||
}
|
||||
};
|
||||
|
||||
class MetricsEnumerator
|
||||
{
|
||||
public:
|
||||
class Iterator
|
||||
{
|
||||
private:
|
||||
// note these are pointing to the .RO section of the library, so their lifetime are not bound to any particular metrics enumerator or metrics evaluator instance
|
||||
const char* m_pMetricNames;
|
||||
const size_t* m_pMetricNameBeginIndices;
|
||||
size_t m_numMetrics;
|
||||
size_t m_metricIndex;
|
||||
public:
|
||||
Iterator()
|
||||
: m_pMetricNames(nullptr)
|
||||
, m_pMetricNameBeginIndices(nullptr)
|
||||
, m_numMetrics(0)
|
||||
, m_metricIndex(0)
|
||||
{
|
||||
}
|
||||
|
||||
Iterator(const char* pMetricNames, const size_t* pMetricNameBeginIndices, size_t numMetrics, size_t metricIndex)
|
||||
: m_pMetricNames(pMetricNames)
|
||||
, m_pMetricNameBeginIndices(pMetricNameBeginIndices)
|
||||
, m_numMetrics(numMetrics)
|
||||
, m_metricIndex(metricIndex)
|
||||
{
|
||||
}
|
||||
|
||||
Iterator(const Iterator& iterator)
|
||||
: m_pMetricNames(iterator.m_pMetricNames)
|
||||
, m_pMetricNameBeginIndices(iterator.m_pMetricNameBeginIndices)
|
||||
, m_numMetrics(iterator.m_numMetrics)
|
||||
, m_metricIndex(iterator.m_metricIndex)
|
||||
{
|
||||
}
|
||||
|
||||
Iterator& operator=(const Iterator& rhs)
|
||||
{
|
||||
m_pMetricNames = rhs.m_pMetricNames;
|
||||
m_pMetricNameBeginIndices = rhs.m_pMetricNameBeginIndices;
|
||||
m_numMetrics = rhs.m_numMetrics;
|
||||
m_metricIndex = rhs.m_metricIndex;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator!=(const Iterator& rhs) const
|
||||
{
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
bool operator==(const Iterator& rhs) const
|
||||
{
|
||||
return m_pMetricNames == rhs.m_pMetricNames
|
||||
&& m_pMetricNameBeginIndices == rhs.m_pMetricNameBeginIndices
|
||||
&& m_numMetrics == rhs.m_numMetrics
|
||||
&& m_metricIndex == rhs.m_metricIndex;
|
||||
}
|
||||
|
||||
Iterator operator++()
|
||||
{
|
||||
if (m_metricIndex < m_numMetrics)
|
||||
{
|
||||
++m_metricIndex;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Iterator operator++(int)
|
||||
{
|
||||
Iterator prev = *this;
|
||||
++*this;
|
||||
return prev;
|
||||
}
|
||||
|
||||
// no validity check
|
||||
const char* operator*() const
|
||||
{
|
||||
const char* pMetricName = &m_pMetricNames[m_pMetricNameBeginIndices[m_metricIndex]];
|
||||
return pMetricName;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
// note these are pointing to the .RO section of the library, so their lifetime are not bound to any particular metrics evaluator instance
|
||||
const char* m_pMetricNames;
|
||||
const size_t* m_pMetricNameBeginIndices;
|
||||
size_t m_numMetrics;
|
||||
|
||||
public:
|
||||
MetricsEnumerator()
|
||||
: m_pMetricNames(nullptr)
|
||||
, m_pMetricNameBeginIndices(nullptr)
|
||||
, m_numMetrics(0)
|
||||
{
|
||||
}
|
||||
|
||||
MetricsEnumerator(const char* pMetricNames, const size_t* pMetricNameBeginIndices, size_t numMetrics)
|
||||
: m_pMetricNames(pMetricNames)
|
||||
, m_pMetricNameBeginIndices(pMetricNameBeginIndices)
|
||||
, m_numMetrics(numMetrics)
|
||||
{
|
||||
}
|
||||
|
||||
MetricsEnumerator(const MetricsEnumerator& metricsEnumerator)
|
||||
: m_pMetricNames(metricsEnumerator.m_pMetricNames)
|
||||
, m_pMetricNameBeginIndices(metricsEnumerator.m_pMetricNameBeginIndices)
|
||||
, m_numMetrics(metricsEnumerator.m_numMetrics)
|
||||
{
|
||||
}
|
||||
|
||||
MetricsEnumerator& operator=(const MetricsEnumerator& rhs)
|
||||
{
|
||||
m_pMetricNames = rhs.m_pMetricNames;
|
||||
m_pMetricNameBeginIndices = rhs.m_pMetricNameBeginIndices;
|
||||
m_numMetrics = rhs.m_numMetrics;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// no bounds check
|
||||
const char* operator[](size_t index) const
|
||||
{
|
||||
const char* pMetricName = &m_pMetricNames[m_pMetricNameBeginIndices[index]];
|
||||
return pMetricName;
|
||||
}
|
||||
|
||||
Iterator begin() const
|
||||
{
|
||||
return Iterator(m_pMetricNames, m_pMetricNameBeginIndices, m_numMetrics, 0);
|
||||
}
|
||||
|
||||
Iterator end() const
|
||||
{
|
||||
return Iterator(m_pMetricNames, m_pMetricNameBeginIndices, m_numMetrics, m_numMetrics);
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return m_numMetrics;
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return !m_numMetrics;
|
||||
}
|
||||
};
|
||||
|
||||
inline MetricsEnumerator EnumerateMetrics(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetMetricNames_Params metricsEvaluatorGetMetricNamesParams = { NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE };
|
||||
metricsEvaluatorGetMetricNamesParams.pMetricsEvaluator = pMetricsEvaluator;
|
||||
metricsEvaluatorGetMetricNamesParams.metricType = static_cast<uint8_t>(metricType);
|
||||
const NVPA_Status status = NVPW_MetricsEvaluator_GetMetricNames(&metricsEvaluatorGetMetricNamesParams);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
return MetricsEnumerator();
|
||||
}
|
||||
return MetricsEnumerator(metricsEvaluatorGetMetricNamesParams.pMetricNames, metricsEvaluatorGetMetricNamesParams.pMetricNameBeginIndices, metricsEvaluatorGetMetricNamesParams.numMetrics);
|
||||
}
|
||||
|
||||
inline MetricsEnumerator EnumerateCounters(NVPW_MetricsEvaluator* pMetricsEvaluator)
|
||||
{
|
||||
return EnumerateMetrics(pMetricsEvaluator, NVPW_METRIC_TYPE_COUNTER);
|
||||
}
|
||||
|
||||
inline MetricsEnumerator EnumerateRatios(NVPW_MetricsEvaluator* pMetricsEvaluator)
|
||||
{
|
||||
return EnumerateMetrics(pMetricsEvaluator, NVPW_METRIC_TYPE_RATIO);
|
||||
}
|
||||
|
||||
inline MetricsEnumerator EnumerateThroughputs(NVPW_MetricsEvaluator* pMetricsEvaluator)
|
||||
{
|
||||
return EnumerateMetrics(pMetricsEvaluator, NVPW_METRIC_TYPE_THROUGHPUT);
|
||||
}
|
||||
|
||||
inline const char* ToCString(NVPW_MetricType metricType)
|
||||
{
|
||||
switch (metricType)
|
||||
{
|
||||
case NVPW_METRIC_TYPE_COUNTER:
|
||||
return "Counter";
|
||||
case NVPW_METRIC_TYPE_RATIO:
|
||||
return "Ratio";
|
||||
case NVPW_METRIC_TYPE_THROUGHPUT:
|
||||
return "Throughput";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
inline const char* ToCString(NVPW_RollupOp rollupOp)
|
||||
{
|
||||
switch (rollupOp)
|
||||
{
|
||||
case NVPW_ROLLUP_OP_AVG:
|
||||
return ".avg";
|
||||
case NVPW_ROLLUP_OP_MAX:
|
||||
return ".max";
|
||||
case NVPW_ROLLUP_OP_MIN:
|
||||
return ".min";
|
||||
case NVPW_ROLLUP_OP_SUM:
|
||||
return ".sum";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
inline const char* ToCString(NVPW_Submetric submetric)
|
||||
{
|
||||
switch (submetric)
|
||||
{
|
||||
case NVPW_SUBMETRIC_NONE:
|
||||
return "";
|
||||
case NVPW_SUBMETRIC_PEAK_SUSTAINED:
|
||||
return ".peak_sustained";
|
||||
case NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE:
|
||||
return ".peak_sustained_active";
|
||||
case NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE_PER_SECOND:
|
||||
return ".peak_sustained_active.per_second";
|
||||
case NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED:
|
||||
return ".peak_sustained_elapsed";
|
||||
case NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED_PER_SECOND:
|
||||
return ".peak_sustained_elapsed.per_second";
|
||||
case NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME:
|
||||
return ".peak_sustained_frame";
|
||||
case NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME_PER_SECOND:
|
||||
return ".peak_sustained_frame.per_second";
|
||||
case NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION:
|
||||
return ".peak_sustained_region";
|
||||
case NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION_PER_SECOND:
|
||||
return ".peak_sustained_region.per_second";
|
||||
case NVPW_SUBMETRIC_PER_CYCLE_ACTIVE:
|
||||
return ".per_cycle_active";
|
||||
case NVPW_SUBMETRIC_PER_CYCLE_ELAPSED:
|
||||
return ".per_cycle_elapsed";
|
||||
case NVPW_SUBMETRIC_PER_CYCLE_IN_FRAME:
|
||||
return ".per_cycle_in_frame";
|
||||
case NVPW_SUBMETRIC_PER_CYCLE_IN_REGION:
|
||||
return ".per_cycle_in_region";
|
||||
case NVPW_SUBMETRIC_PER_SECOND:
|
||||
return ".per_second";
|
||||
case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ACTIVE:
|
||||
return ".pct_of_peak_sustained_active";
|
||||
case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ELAPSED:
|
||||
return ".pct_of_peak_sustained_elapsed";
|
||||
case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_FRAME:
|
||||
return ".pct_of_peak_sustained_frame";
|
||||
case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_REGION:
|
||||
return ".pct_of_peak_sustained_region";
|
||||
case NVPW_SUBMETRIC_MAX_RATE:
|
||||
return ".max_rate";
|
||||
case NVPW_SUBMETRIC_PCT:
|
||||
return ".pct";
|
||||
case NVPW_SUBMETRIC_RATIO:
|
||||
return ".ratio";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
inline const char* ToCString(const MetricsEnumerator& countersEnumerator, const MetricsEnumerator& ratiosEnumerator, const MetricsEnumerator& throughputsEnumerator, NVPW_MetricType metricType, size_t metricIndex)
|
||||
{
|
||||
if (metricType == NVPW_METRIC_TYPE_COUNTER)
|
||||
{
|
||||
if (metricIndex < countersEnumerator.size())
|
||||
{
|
||||
return countersEnumerator[metricIndex];
|
||||
}
|
||||
}
|
||||
else if (metricType == NVPW_METRIC_TYPE_RATIO)
|
||||
{
|
||||
if (metricIndex < ratiosEnumerator.size())
|
||||
{
|
||||
return ratiosEnumerator[metricIndex];
|
||||
}
|
||||
}
|
||||
else if (metricType == NVPW_METRIC_TYPE_THROUGHPUT)
|
||||
{
|
||||
if (metricIndex < throughputsEnumerator.size())
|
||||
{
|
||||
return throughputsEnumerator[metricIndex];
|
||||
}
|
||||
}
|
||||
NV_PERF_LOG_WRN(50, "ToCString failed\n");
|
||||
return "";
|
||||
}
|
||||
|
||||
inline const char* ToCString(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, size_t metricIndex)
|
||||
{
|
||||
if (metricType == NVPW_METRIC_TYPE_COUNTER)
|
||||
{
|
||||
const MetricsEnumerator countersEnumerator = EnumerateCounters(pMetricsEvaluator);
|
||||
if (metricIndex < countersEnumerator.size())
|
||||
{
|
||||
return countersEnumerator[metricIndex];
|
||||
}
|
||||
}
|
||||
else if (metricType == NVPW_METRIC_TYPE_RATIO)
|
||||
{
|
||||
const MetricsEnumerator ratiosEnumerator = EnumerateRatios(pMetricsEvaluator);
|
||||
if (metricIndex < ratiosEnumerator.size())
|
||||
{
|
||||
return ratiosEnumerator[metricIndex];
|
||||
}
|
||||
}
|
||||
else if (metricType == NVPW_METRIC_TYPE_THROUGHPUT)
|
||||
{
|
||||
const MetricsEnumerator throughputsEnumerator = EnumerateThroughputs(pMetricsEvaluator);
|
||||
if (metricIndex < throughputsEnumerator.size())
|
||||
{
|
||||
return throughputsEnumerator[metricIndex];
|
||||
}
|
||||
}
|
||||
NV_PERF_LOG_WRN(50, "ToCString failed\n");
|
||||
return "";
|
||||
}
|
||||
|
||||
inline std::string ToString(const MetricsEnumerator& countersEnumerator, const MetricsEnumerator& ratiosEnumerator, const MetricsEnumerator& throughputsEnumerator, const NVPW_MetricEvalRequest& metricEvalRequest)
|
||||
{
|
||||
std::string metricName(ToCString(countersEnumerator, ratiosEnumerator, throughputsEnumerator, static_cast<NVPW_MetricType>(metricEvalRequest.metricType), metricEvalRequest.metricIndex));
|
||||
if (metricEvalRequest.metricType == NVPW_METRIC_TYPE_COUNTER || metricEvalRequest.metricType == NVPW_METRIC_TYPE_THROUGHPUT)
|
||||
{
|
||||
metricName += ToCString(static_cast<NVPW_RollupOp>(metricEvalRequest.rollupOp));
|
||||
}
|
||||
metricName += ToCString(static_cast<NVPW_Submetric>(metricEvalRequest.submetric));
|
||||
return metricName;
|
||||
}
|
||||
|
||||
inline std::string ToString(NVPW_MetricsEvaluator* pMetricsEvaluator, const NVPW_MetricEvalRequest& metricEvalRequest)
|
||||
{
|
||||
std::string metricName(ToCString(pMetricsEvaluator, static_cast<NVPW_MetricType>(metricEvalRequest.metricType), metricEvalRequest.metricIndex));
|
||||
if (metricEvalRequest.metricType == NVPW_METRIC_TYPE_COUNTER || metricEvalRequest.metricType == NVPW_METRIC_TYPE_THROUGHPUT)
|
||||
{
|
||||
metricName += ToCString(static_cast<NVPW_RollupOp>(metricEvalRequest.rollupOp));
|
||||
}
|
||||
metricName += ToCString(static_cast<NVPW_Submetric>(metricEvalRequest.submetric));
|
||||
return metricName;
|
||||
}
|
||||
|
||||
inline bool ToMetricEvalRequest(NVPW_MetricsEvaluator* pMetricsEvaluator, const char* pMetricName, NVPW_MetricEvalRequest& metricEvalRequest)
|
||||
{
|
||||
NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params toMetricEvalRequestParams = { NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE };
|
||||
toMetricEvalRequestParams.pMetricsEvaluator = pMetricsEvaluator;
|
||||
toMetricEvalRequestParams.pMetricName = pMetricName;
|
||||
toMetricEvalRequestParams.pMetricEvalRequest = &metricEvalRequest;
|
||||
toMetricEvalRequestParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
|
||||
const NVPA_Status status = NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(&toMetricEvalRequestParams);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool GetMetricTypeAndIndex(NVPW_MetricsEvaluator* pMetricsEvaluator, const char* pMetricName, NVPW_MetricType& metricType, size_t& metricIndex)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params getMetricTypeAndIndexParams = { NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params_STRUCT_SIZE };
|
||||
getMetricTypeAndIndexParams.pMetricsEvaluator = pMetricsEvaluator;
|
||||
getMetricTypeAndIndexParams.pMetricName = pMetricName;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_GetMetricTypeAndIndex(&getMetricTypeAndIndexParams);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_GetMetricTypeAndIndex failed\n");
|
||||
return false;
|
||||
}
|
||||
metricType = static_cast<NVPW_MetricType>(getMetricTypeAndIndexParams.metricType);
|
||||
metricIndex = getMetricTypeAndIndexParams.metricIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool GetSupportedSubmetrics(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, std::vector<NVPW_Submetric>& submetrics)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params getSupportedSubmetrics = { NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params_STRUCT_SIZE };
|
||||
getSupportedSubmetrics.pMetricsEvaluator = pMetricsEvaluator;
|
||||
getSupportedSubmetrics.metricType = static_cast<uint8_t>(metricType);
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_GetSupportedSubmetrics(&getSupportedSubmetrics);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_ERR(80, "NVPW_MetricsEvaluator_GetSupportedSubmetrics failed for metric type: %u\n", getSupportedSubmetrics.metricType);
|
||||
return false;
|
||||
}
|
||||
submetrics.reserve(getSupportedSubmetrics.numSupportedSubmetrics);
|
||||
for (size_t ii = 0; ii < getSupportedSubmetrics.numSupportedSubmetrics; ++ii)
|
||||
{
|
||||
submetrics.push_back(static_cast<NVPW_Submetric>(getSupportedSubmetrics.pSupportedSubmetrics[ii]));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool MetricsEvaluatorSetDeviceAttributes(NVPW_MetricsEvaluator* pMetricsEvaluator, const uint8_t* pCounterDataImage, size_t counterDataImageSize)
|
||||
{
|
||||
NVPW_MetricsEvaluator_SetDeviceAttributes_Params setDeviceAttributesParams = { NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE };
|
||||
setDeviceAttributesParams.pMetricsEvaluator = pMetricsEvaluator;
|
||||
setDeviceAttributesParams.pCounterDataImage = pCounterDataImage;
|
||||
setDeviceAttributesParams.counterDataImageSize = counterDataImageSize;
|
||||
const NVPA_Status status = NVPW_MetricsEvaluator_SetDeviceAttributes(&setDeviceAttributesParams);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "NVPW_MetricsEvaluator_SetDeviceAttributes failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Evaluate the named metrics from (CounterDataImage, rangeIndex) and store them in pMetricValues.
|
||||
inline bool EvaluateToGpuValues(
|
||||
NVPW_MetricsEvaluator* pMetricsEvaluator,
|
||||
const uint8_t* pCounterDataImage,
|
||||
size_t counterDataImageSize,
|
||||
size_t rangeIndex,
|
||||
size_t numMetricEvalRequests,
|
||||
const NVPW_MetricEvalRequest* pMetricEvalRequests,
|
||||
double* pMetricValues)
|
||||
{
|
||||
NVPW_MetricsEvaluator_EvaluateToGpuValues_Params evaluateToGpuValuesParams = { NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE };
|
||||
evaluateToGpuValuesParams.pMetricsEvaluator = pMetricsEvaluator;
|
||||
evaluateToGpuValuesParams.pMetricEvalRequests = pMetricEvalRequests;
|
||||
evaluateToGpuValuesParams.numMetricEvalRequests = numMetricEvalRequests;
|
||||
evaluateToGpuValuesParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
|
||||
evaluateToGpuValuesParams.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest);
|
||||
evaluateToGpuValuesParams.pCounterDataImage = pCounterDataImage;
|
||||
evaluateToGpuValuesParams.counterDataImageSize = counterDataImageSize;
|
||||
evaluateToGpuValuesParams.rangeIndex = rangeIndex;
|
||||
evaluateToGpuValuesParams.isolated = (NVPA_Bool)true;
|
||||
evaluateToGpuValuesParams.pMetricValues = pMetricValues;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_EvaluateToGpuValues(&evaluateToGpuValuesParams);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_ERR(80, "NVPW_MetricsEvaluator_EvaluateToGpuValues failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool operator==(const NVPW_DimUnitFactor& lhs, const NVPW_DimUnitFactor& rhs)
|
||||
{
|
||||
return (lhs.dimUnit == rhs.dimUnit) && (lhs.exponent == rhs.exponent);
|
||||
}
|
||||
|
||||
inline bool operator<(const NVPW_DimUnitFactor& lhs, const NVPW_DimUnitFactor& rhs)
|
||||
{
|
||||
if (lhs.dimUnit != rhs.dimUnit)
|
||||
{
|
||||
return lhs.dimUnit < rhs.dimUnit;
|
||||
}
|
||||
if (lhs.exponent != rhs.exponent)
|
||||
{
|
||||
return lhs.exponent < rhs.exponent;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool GetMetricDimUnits(NVPW_MetricsEvaluator* pMetricsEvaluator, const NVPW_MetricEvalRequest& metricRequest, std::vector<NVPW_DimUnitFactor>& dimUnits)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetMetricDimUnits_Params getMetricDimUnitsParams = { NVPW_MetricsEvaluator_GetMetricDimUnits_Params_STRUCT_SIZE };
|
||||
getMetricDimUnitsParams.pMetricsEvaluator = pMetricsEvaluator;
|
||||
getMetricDimUnitsParams.pMetricEvalRequest = &metricRequest;
|
||||
getMetricDimUnitsParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
|
||||
getMetricDimUnitsParams.dimUnitFactorStructSize = NVPW_DimUnitFactor_STRUCT_SIZE;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_GetMetricDimUnits(&getMetricDimUnitsParams);
|
||||
if (status != NVPA_STATUS_SUCCESS || !getMetricDimUnitsParams.numDimUnits)
|
||||
{
|
||||
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_GetMetricDimUnits failed for metric = %s\n", ToString(pMetricsEvaluator, metricRequest).c_str());
|
||||
return false;
|
||||
}
|
||||
dimUnits.resize(getMetricDimUnitsParams.numDimUnits);
|
||||
getMetricDimUnitsParams.pDimUnits = dimUnits.data();
|
||||
status = NVPW_MetricsEvaluator_GetMetricDimUnits(&getMetricDimUnitsParams);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_GetMetricDimUnits failed for metric = %s\n", ToString(pMetricsEvaluator, metricRequest).c_str());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline const char* GetMetricDescription(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, size_t metricIndex)
|
||||
{
|
||||
if (metricType == NVPW_METRIC_TYPE_COUNTER)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetCounterProperties_Params params{ NVPW_MetricsEvaluator_GetCounterProperties_Params_STRUCT_SIZE };
|
||||
params.pMetricsEvaluator = pMetricsEvaluator;
|
||||
params.counterIndex = metricIndex;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_GetCounterProperties(¶ms);
|
||||
if (status == NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
return params.pDescription;
|
||||
}
|
||||
}
|
||||
else if (metricType == NVPW_METRIC_TYPE_RATIO)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetRatioMetricProperties_Params params{ NVPW_MetricsEvaluator_GetRatioMetricProperties_Params_STRUCT_SIZE };
|
||||
params.pMetricsEvaluator = pMetricsEvaluator;
|
||||
params.ratioMetricIndex = metricIndex;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_GetRatioMetricProperties(¶ms);
|
||||
if (status == NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
return params.pDescription;
|
||||
}
|
||||
}
|
||||
else if (metricType == NVPW_METRIC_TYPE_THROUGHPUT)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params params{ NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params_STRUCT_SIZE };
|
||||
params.pMetricsEvaluator = pMetricsEvaluator;
|
||||
params.throughputMetricIndex = metricIndex;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_GetThroughputMetricProperties(¶ms);
|
||||
if (status == NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
return params.pDescription;
|
||||
}
|
||||
}
|
||||
NV_PERF_LOG_WRN(50, "GetMetricDescription failed for metricType = %u, metricIndex = %u\n", (uint32_t)metricType, (uint32_t)metricIndex);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline const char* ToCString(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_HwUnit hwUnit)
|
||||
{
|
||||
NVPW_MetricsEvaluator_HwUnitToString_Params params{ NVPW_MetricsEvaluator_HwUnitToString_Params_STRUCT_SIZE };
|
||||
params.pMetricsEvaluator = pMetricsEvaluator;
|
||||
params.hwUnit = hwUnit;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_HwUnitToString(¶ms);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_WRN(50, "NVPW_MetricsEvaluator_HwUnitToString failed for hwUnit: %u\n", hwUnit);
|
||||
return nullptr;
|
||||
}
|
||||
return params.pHwUnitName;
|
||||
}
|
||||
|
||||
inline NVPW_HwUnit GetMetricHwUnit(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, size_t metricIndex)
|
||||
{
|
||||
if (metricType == NVPW_METRIC_TYPE_COUNTER)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetCounterProperties_Params params{ NVPW_MetricsEvaluator_GetCounterProperties_Params_STRUCT_SIZE };
|
||||
params.pMetricsEvaluator = pMetricsEvaluator;
|
||||
params.counterIndex = metricIndex;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_GetCounterProperties(¶ms);
|
||||
if (status == NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
return static_cast<NVPW_HwUnit>(params.hwUnit);
|
||||
}
|
||||
}
|
||||
else if (metricType == NVPW_METRIC_TYPE_RATIO)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetRatioMetricProperties_Params params{ NVPW_MetricsEvaluator_GetRatioMetricProperties_Params_STRUCT_SIZE };
|
||||
params.pMetricsEvaluator = pMetricsEvaluator;
|
||||
params.ratioMetricIndex = metricIndex;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_GetRatioMetricProperties(¶ms);
|
||||
if (status == NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
return static_cast<NVPW_HwUnit>(params.hwUnit);
|
||||
}
|
||||
}
|
||||
else if (metricType == NVPW_METRIC_TYPE_THROUGHPUT)
|
||||
{
|
||||
NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params params{ NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params_STRUCT_SIZE };
|
||||
params.pMetricsEvaluator = pMetricsEvaluator;
|
||||
params.throughputMetricIndex = metricIndex;
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_GetThroughputMetricProperties(¶ms);
|
||||
if (status == NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
return static_cast<NVPW_HwUnit>(params.hwUnit);
|
||||
}
|
||||
}
|
||||
NV_PERF_LOG_WRN(50, "GetMetricHwUnit failed for metricType = %u, metricIndex = %u\n", (uint32_t)metricType, (uint32_t)metricIndex);
|
||||
return NVPW_HW_UNIT_INVALID;
|
||||
}
|
||||
|
||||
inline const char* GetMetricHwUnitStr(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_MetricType metricType, size_t metricIndex)
|
||||
{
|
||||
const NVPW_HwUnit hwUnit = GetMetricHwUnit(pMetricsEvaluator, metricType, metricIndex);
|
||||
const char* pHwUnitStr = ToCString(pMetricsEvaluator, hwUnit);
|
||||
return pHwUnitStr;
|
||||
}
|
||||
|
||||
inline const char* ToCString(NVPW_MetricsEvaluator* pMetricsEvaluator, NVPW_DimUnitName dimUnit, bool plural)
|
||||
{
|
||||
NVPW_MetricsEvaluator_DimUnitToString_Params dimUnitToStringParams = { NVPW_MetricsEvaluator_DimUnitToString_Params_STRUCT_SIZE };
|
||||
dimUnitToStringParams.pMetricsEvaluator = pMetricsEvaluator;
|
||||
dimUnitToStringParams.dimUnit = static_cast<uint32_t>(dimUnit);
|
||||
NVPA_Status status = NVPW_MetricsEvaluator_DimUnitToString(&dimUnitToStringParams);
|
||||
if (status != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_WRN(80, "NVPW_MetricsEvaluator_DimUnitToString failed for dimUnit = %u\n", dimUnit);
|
||||
return "";
|
||||
}
|
||||
const char* pDimUnitStr = plural? dimUnitToStringParams.pPluralName : dimUnitToStringParams.pSingularName;
|
||||
return pDimUnitStr;
|
||||
}
|
||||
|
||||
// `getDimUnitStrFunctor` must be in the form of const char*(NVPW_DimUnitName dimUnit, bool plural)
|
||||
template <typename GetDimUnitStrFunctor>
|
||||
inline std::string ToString(const std::vector<NVPW_DimUnitFactor>& dimUnitFactors, GetDimUnitStrFunctor&& getDimUnitStrFunctor)
|
||||
{
|
||||
if (dimUnitFactors.empty())
|
||||
{
|
||||
return "<unitless>";
|
||||
}
|
||||
|
||||
std::stringstream sstream;
|
||||
size_t numeratorCount = 0;
|
||||
size_t denominatorCount = 0;
|
||||
auto isNumerator = [](const NVPW_DimUnitFactor& dimUnitFactor) {
|
||||
return dimUnitFactor.exponent > 0;
|
||||
};
|
||||
// if printNumerator == false, print the denominator
|
||||
auto printFormattedDimUnits = [&](size_t count, bool printNumerator) {
|
||||
if (count > 1)
|
||||
{
|
||||
sstream << "(";
|
||||
}
|
||||
bool isFirst = true;
|
||||
for (const NVPW_DimUnitFactor& dimUnitFactor : dimUnitFactors)
|
||||
{
|
||||
if (printNumerator != isNumerator(dimUnitFactor))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isFirst)
|
||||
{
|
||||
sstream << " * ";
|
||||
}
|
||||
const bool plural = printNumerator;
|
||||
sstream << getDimUnitStrFunctor(static_cast<NVPW_DimUnitName>(dimUnitFactor.dimUnit), plural);
|
||||
if (std::abs(dimUnitFactor.exponent) != 1)
|
||||
{
|
||||
sstream << "^" << (uint32_t)std::abs(dimUnitFactor.exponent);
|
||||
}
|
||||
isFirst = false;
|
||||
}
|
||||
if (count > 1)
|
||||
{
|
||||
sstream << ")";
|
||||
}
|
||||
};
|
||||
|
||||
for (const NVPW_DimUnitFactor& dimUnitFactor : dimUnitFactors)
|
||||
{
|
||||
isNumerator(dimUnitFactor) ? ++numeratorCount : ++denominatorCount;
|
||||
}
|
||||
|
||||
if (numeratorCount)
|
||||
{
|
||||
const bool printNumerator = true;
|
||||
printFormattedDimUnits(numeratorCount, printNumerator);
|
||||
}
|
||||
else
|
||||
{
|
||||
sstream << "1";
|
||||
}
|
||||
|
||||
if (denominatorCount)
|
||||
{
|
||||
sstream << " / ";
|
||||
const bool printNumerator = false;
|
||||
printFormattedDimUnits(denominatorCount, printNumerator);
|
||||
}
|
||||
return sstream.str();
|
||||
}
|
||||
|
||||
}}
|
||||
185
ruins64k/tools/NvPerfUtility/include/NvPerfOpenGL.h
Normal file
185
ruins64k/tools/NvPerfUtility/include/NvPerfOpenGL.h
Normal file
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "NvPerfInit.h"
|
||||
#include "NvPerfDeviceProperties.h"
|
||||
#include "nvperf_opengl_host.h"
|
||||
#include "nvperf_opengl_target.h"
|
||||
#include "GL/gl.h"
|
||||
#include <string.h>
|
||||
namespace nv { namespace perf {
|
||||
|
||||
// OpenGL Only Utilities
|
||||
//
|
||||
inline std::string OpenGLGetDeviceName()
|
||||
{
|
||||
const GLubyte* pRenderer = glGetString(GL_RENDERER);
|
||||
if (!pRenderer)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
return (const char*) pRenderer;
|
||||
}
|
||||
|
||||
inline bool OpenGLIsNvidiaDevice()
|
||||
{
|
||||
const GLubyte* pVendor = glGetString(GL_VENDOR);
|
||||
if (!pVendor)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (strstr((const char*)pVendor, "NVIDIA"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool OpenGLLoadDriver()
|
||||
{
|
||||
NVPW_OpenGL_LoadDriver_Params loadDriverParams = { NVPW_OpenGL_LoadDriver_Params_STRUCT_SIZE };
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_LoadDriver(&loadDriverParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_LoadDriver failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline size_t OpenGLGetNvperfDeviceIndex(size_t sliIndex = 0)
|
||||
{
|
||||
NVPW_OpenGL_GraphicsContext_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_OpenGL_GraphicsContext_GetDeviceIndex_Params_STRUCT_SIZE };
|
||||
getDeviceIndexParams.sliIndex = sliIndex;
|
||||
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_GraphicsContext_GetDeviceIndex(&getDeviceIndexParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return ~size_t(0);
|
||||
}
|
||||
|
||||
return getDeviceIndexParams.deviceIndex;
|
||||
}
|
||||
|
||||
inline DeviceIdentifiers OpenGLGetDeviceIdentifiers(size_t sliIndex = 0)
|
||||
{
|
||||
const size_t deviceIndex = OpenGLGetNvperfDeviceIndex(sliIndex);
|
||||
|
||||
DeviceIdentifiers deviceIdentifiers = GetDeviceIdentifiers(deviceIndex);
|
||||
return deviceIdentifiers;
|
||||
}
|
||||
|
||||
inline NVPW_Device_ClockStatus OpenGLGetDeviceClockState()
|
||||
{
|
||||
size_t nvperfDeviceIndex = OpenGLGetNvperfDeviceIndex();
|
||||
return GetDeviceClockState(nvperfDeviceIndex);
|
||||
}
|
||||
|
||||
inline bool OpenGLSetDeviceClockState(NVPW_Device_ClockSetting clockStatus)
|
||||
{
|
||||
size_t nvperfDeviceIndex = OpenGLGetNvperfDeviceIndex();
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
|
||||
}
|
||||
|
||||
inline bool OpenGLSetDeviceClockState(NVPW_Device_ClockStatus clockStatus)
|
||||
{
|
||||
size_t nvperfDeviceIndex = OpenGLGetNvperfDeviceIndex();
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
|
||||
}
|
||||
|
||||
inline size_t OpenGLCalculateMetricsEvaluatorScratchBufferSize(const char* pChipName)
|
||||
{
|
||||
NVPW_OpenGL_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParams = { NVPW_OpenGL_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE };
|
||||
calculateScratchBufferSizeParams.pChipName = pChipName;
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_MetricsEvaluator_CalculateScratchBufferSize(&calculateScratchBufferSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_OpenGL_MetricsEvaluator_CalculateScratchBufferSize failed\n");
|
||||
return 0;
|
||||
}
|
||||
return calculateScratchBufferSizeParams.scratchBufferSize;
|
||||
}
|
||||
|
||||
inline NVPW_MetricsEvaluator* OpenGLCreateMetricsEvaluator(uint8_t* pScratchBuffer, size_t scratchBufferSize, const char* pChipName)
|
||||
{
|
||||
NVPW_OpenGL_MetricsEvaluator_Initialize_Params initializeParams = { NVPW_OpenGL_MetricsEvaluator_Initialize_Params_STRUCT_SIZE };
|
||||
initializeParams.pScratchBuffer = pScratchBuffer;
|
||||
initializeParams.scratchBufferSize = scratchBufferSize;
|
||||
initializeParams.pChipName = pChipName;
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_MetricsEvaluator_Initialize(&initializeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_OpenGL_MetricsEvaluator_Initialize failed\n");
|
||||
return nullptr;
|
||||
}
|
||||
return initializeParams.pMetricsEvaluator;
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
inline NVPA_RawMetricsConfig* OpenGLCreateRawMetricsConfig(const char* pChipName)
|
||||
{
|
||||
NVPW_OpenGL_RawMetricsConfig_Create_Params configParams = { NVPW_OpenGL_RawMetricsConfig_Create_Params_STRUCT_SIZE };
|
||||
configParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
|
||||
configParams.pChipName = pChipName;
|
||||
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_RawMetricsConfig_Create(&configParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return configParams.pRawMetricsConfig;
|
||||
}
|
||||
|
||||
inline bool OpenGLIsGpuSupported(size_t sliIndex = 0)
|
||||
{
|
||||
const size_t deviceIndex = OpenGLGetNvperfDeviceIndex(sliIndex);
|
||||
|
||||
NVPW_OpenGL_Profiler_IsGpuSupported_Params params = { NVPW_OpenGL_Profiler_IsGpuSupported_Params_STRUCT_SIZE };
|
||||
params.deviceIndex = deviceIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_IsGpuSupported(¶ms);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_Profiler_IsGpuSupported failed on %s\n", OpenGLGetDeviceName().c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!params.isSupported)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "%s is not supported\n", OpenGLGetDeviceName().c_str());
|
||||
if (params.gpuArchitectureSupportLevel != NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED)
|
||||
{
|
||||
const DeviceIdentifiers deviceIdentifiers = OpenGLGetDeviceIdentifiers(sliIndex);
|
||||
NV_PERF_LOG_ERR(10, "Unsupported GPU architecture %s\n", deviceIdentifiers.pChipName);
|
||||
}
|
||||
if (params.sliSupportLevel == NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Devices in SLI configuration are not supported.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}}}
|
||||
336
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfiler.h
Normal file
336
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfiler.h
Normal file
@@ -0,0 +1,336 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#pragma once
|
||||
#include <list>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
#include "NvPerfCounterData.h"
|
||||
#include "NvPerfCounterConfiguration.h"
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
// safe defaults for realtime
|
||||
struct SessionOptions
|
||||
{
|
||||
size_t maxNumRanges = 16;
|
||||
size_t avgRangeNameLength = 128;
|
||||
size_t numTraceBuffers = 5; // recommended: SwapChainDepth + 2
|
||||
};
|
||||
|
||||
struct SetConfigParams
|
||||
{
|
||||
const uint8_t* pConfigImage;
|
||||
size_t configImageSize;
|
||||
const uint8_t* pCounterDataPrefix;
|
||||
size_t counterDataPrefixSize;
|
||||
size_t numPipelinedPasses;
|
||||
size_t numIsolatedPasses;
|
||||
uint16_t numNestingLevels;
|
||||
size_t numStatisticalSamples;
|
||||
|
||||
SetConfigParams()
|
||||
: pConfigImage()
|
||||
, configImageSize()
|
||||
, pCounterDataPrefix()
|
||||
, counterDataPrefixSize()
|
||||
, numPipelinedPasses()
|
||||
, numIsolatedPasses()
|
||||
, numNestingLevels()
|
||||
, numStatisticalSamples()
|
||||
{
|
||||
}
|
||||
|
||||
SetConfigParams(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
|
||||
: pConfigImage(configuration.configImage.data())
|
||||
, configImageSize(configuration.configImage.size())
|
||||
, pCounterDataPrefix(configuration.counterDataPrefix.data())
|
||||
, counterDataPrefixSize(configuration.counterDataPrefix.size())
|
||||
, numPipelinedPasses(configuration.numPipelinedPasses)
|
||||
, numIsolatedPasses(configuration.numIsolatedPasses)
|
||||
, numNestingLevels(numNestingLevels)
|
||||
, numStatisticalSamples(numStatisticalSamples)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
// out-param from DecodeCounters
|
||||
struct DecodeResult
|
||||
{
|
||||
bool onePassDecoded;
|
||||
bool allPassesDecoded;
|
||||
bool allStatisticalSamplesCollected;
|
||||
std::vector<uint8_t> counterDataImage; // if allPassesDecoded is true, this will be non-empty
|
||||
};
|
||||
|
||||
class RangeProfilerStateMachine
|
||||
{
|
||||
public: // types
|
||||
struct IProfilerApi
|
||||
{
|
||||
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const = 0;
|
||||
virtual bool SetConfig(const SetConfigParams& config) const = 0;
|
||||
virtual bool BeginPass() const = 0;
|
||||
virtual bool EndPass() const = 0;
|
||||
virtual bool PushRange(const char* pRangeName) = 0;
|
||||
virtual bool PopRange() = 0;
|
||||
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const = 0;
|
||||
};
|
||||
|
||||
protected: // types
|
||||
struct CounterStateMachine
|
||||
{
|
||||
// state updated per-pass
|
||||
size_t numPassesSubmitted; /// number of passes submitted (incremented at EndPass)
|
||||
size_t numStatisticalSamplesCollected; /// number of times all passes were collected
|
||||
|
||||
// state derived from the configuration
|
||||
size_t numPassesPerStatisticalSample; /// number of passes required by the {ConfigImage, numNestingLevels}
|
||||
size_t numStatisticalSamplesRequired; /// number of repeated samplings required by SetConfig
|
||||
std::vector<uint8_t> counterDataImage; /// opaque buffer containing HW counter data; updated in DecodeCounters on each frame
|
||||
std::vector<uint8_t> counterDataScratch; /// opaque buffer needed by DecodeCounters
|
||||
|
||||
bool AllPassesSubmitted() const
|
||||
{
|
||||
const bool allPassesSubmitted = (numPassesSubmitted == numPassesPerStatisticalSample * numStatisticalSamplesRequired);
|
||||
return allPassesSubmitted;
|
||||
}
|
||||
};
|
||||
|
||||
protected: // members
|
||||
IProfilerApi& m_profilerApi;
|
||||
bool m_inPass;
|
||||
|
||||
// Use std::list for stable iterators and a guarantee of no-copy.
|
||||
typedef std::list<SetConfigParams> ConfigQueue;
|
||||
typedef std::list<CounterStateMachine> CountersQueue;
|
||||
bool m_needSetConfig;
|
||||
ConfigQueue m_configQueue; // m_configQueue.front() is the active configuration (by SetConfig), and is popped after all passes are submitted
|
||||
CountersQueue m_countersQueue; // queued CounterData, which may lag the configQueue when frames are rendered asynchronously
|
||||
CountersQueue::iterator m_submitCounterItr; // points at the CounterData corresponding to m_configQueue.front()
|
||||
|
||||
private:
|
||||
// non-copyable
|
||||
RangeProfilerStateMachine(const RangeProfilerStateMachine&);
|
||||
|
||||
public:
|
||||
~RangeProfilerStateMachine()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
RangeProfilerStateMachine(IProfilerApi& profilerApi)
|
||||
: m_profilerApi(profilerApi)
|
||||
, m_inPass(false)
|
||||
, m_needSetConfig()
|
||||
, m_configQueue()
|
||||
, m_countersQueue()
|
||||
, m_submitCounterItr()
|
||||
{
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
m_submitCounterItr = {};
|
||||
m_countersQueue.clear();
|
||||
m_configQueue.clear();
|
||||
m_needSetConfig = false;
|
||||
m_inPass = false;
|
||||
}
|
||||
|
||||
bool IsInPass() const
|
||||
{
|
||||
return m_inPass;
|
||||
}
|
||||
|
||||
bool EnqueueCounterCollection(const SetConfigParams& config)
|
||||
{
|
||||
CounterStateMachine counterStateMachine = {};
|
||||
counterStateMachine.numPassesPerStatisticalSample = config.numPipelinedPasses + config.numIsolatedPasses * config.numNestingLevels;
|
||||
counterStateMachine.numStatisticalSamplesRequired = config.numStatisticalSamples;
|
||||
if (!m_profilerApi.CreateCounterData(config, counterStateMachine.counterDataImage, counterStateMachine.counterDataScratch))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_configQueue.empty())
|
||||
{
|
||||
m_needSetConfig = true;
|
||||
}
|
||||
m_configQueue.push_back(config);
|
||||
|
||||
const bool countersQueueWasEmpty = m_countersQueue.empty();
|
||||
m_countersQueue.emplace_back(std::move(counterStateMachine));
|
||||
if (countersQueueWasEmpty)
|
||||
{
|
||||
m_submitCounterItr = m_countersQueue.begin();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BeginPass()
|
||||
{
|
||||
if (m_inPass)
|
||||
{
|
||||
// TODO: error - must be called in session, but outside of a pass
|
||||
return false;
|
||||
}
|
||||
if (m_configQueue.empty())
|
||||
{
|
||||
// Do not enqueue additional HW data collection.
|
||||
return true;
|
||||
}
|
||||
|
||||
if (m_needSetConfig)
|
||||
{
|
||||
if (!m_profilerApi.SetConfig(m_configQueue.front()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
m_needSetConfig = false;
|
||||
}
|
||||
|
||||
if (!m_profilerApi.BeginPass())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
m_inPass = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndPass()
|
||||
{
|
||||
if (!m_inPass)
|
||||
{
|
||||
// TODO: error - must be called in session, and inside of a pass
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_configQueue.empty())
|
||||
{
|
||||
// Do not enqueue additional HW data collection.
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!m_profilerApi.EndPass())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
CounterStateMachine& counterStateMachine = *m_submitCounterItr;
|
||||
counterStateMachine.numPassesSubmitted += 1;
|
||||
if (counterStateMachine.AllPassesSubmitted())
|
||||
{
|
||||
++m_submitCounterItr;
|
||||
m_configQueue.pop_front();
|
||||
if (!m_configQueue.empty())
|
||||
{
|
||||
m_needSetConfig = true;
|
||||
}
|
||||
}
|
||||
|
||||
m_inPass = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PushRange(const char* pRangeName)
|
||||
{
|
||||
if (!m_inPass)
|
||||
{
|
||||
// TODO: error - must be called in session, and inside of a pass
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_configQueue.empty())
|
||||
{
|
||||
// Do not enqueue additional HW data collection.
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!m_profilerApi.PushRange(pRangeName))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PopRange()
|
||||
{
|
||||
if (!m_inPass)
|
||||
{
|
||||
// TODO: error - must be called in session, and inside of a pass
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_configQueue.empty())
|
||||
{
|
||||
// Do not enqueue additional HW data collection.
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!m_profilerApi.PopRange())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DecodeCounters(DecodeResult& decodeResult)
|
||||
{
|
||||
if (m_countersQueue.empty())
|
||||
{
|
||||
// TODO: error - nothing is queued for collection. see SetConfig ...
|
||||
return false;
|
||||
}
|
||||
|
||||
CounterStateMachine& counterStateMachine = m_countersQueue.front();
|
||||
|
||||
decodeResult = {};
|
||||
if (!m_profilerApi.DecodeCounters(counterStateMachine.counterDataImage, counterStateMachine.counterDataScratch, decodeResult.onePassDecoded, decodeResult.allPassesDecoded))
|
||||
{
|
||||
// TODO: error - the session must be torn down
|
||||
return false;
|
||||
}
|
||||
|
||||
if (decodeResult.allPassesDecoded)
|
||||
{
|
||||
counterStateMachine.numStatisticalSamplesCollected += 1;
|
||||
if (counterStateMachine.numStatisticalSamplesCollected == counterStateMachine.numStatisticalSamplesRequired)
|
||||
{
|
||||
decodeResult.allStatisticalSamplesCollected = true;
|
||||
decodeResult.counterDataImage = std::move(counterStateMachine.counterDataImage);
|
||||
m_countersQueue.pop_front();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AllPassesSubmitted() const
|
||||
{
|
||||
const bool allPassesSubmitted = m_configQueue.empty();
|
||||
return allPassesSubmitted;
|
||||
}
|
||||
};
|
||||
|
||||
}}}
|
||||
373
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfilerD3D11.h
Normal file
373
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfilerD3D11.h
Normal file
@@ -0,0 +1,373 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "NvPerfRangeProfiler.h"
|
||||
#include "NvPerfD3D11.h"
|
||||
#include <atlbase.h>
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
class RangeProfilerD3D11
|
||||
{
|
||||
private:
|
||||
struct ProfilerApi : RangeProfilerStateMachine::IProfilerApi
|
||||
{
|
||||
CComPtr<ID3D11DeviceContext> pDeviceContext;
|
||||
SessionOptions sessionOptions;
|
||||
|
||||
ProfilerApi()
|
||||
: pDeviceContext(nullptr)
|
||||
, sessionOptions()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const override
|
||||
{
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
NVPW_D3D11_Profiler_CounterDataImageOptions counterDataImageOptions = { NVPW_D3D11_Profiler_CounterDataImageOptions_STRUCT_SIZE };
|
||||
counterDataImageOptions.pCounterDataPrefix = config.pCounterDataPrefix;
|
||||
counterDataImageOptions.counterDataPrefixSize = config.counterDataPrefixSize;
|
||||
counterDataImageOptions.maxNumRanges = static_cast<uint32_t>(sessionOptions.maxNumRanges);
|
||||
counterDataImageOptions.maxNumRangeTreeNodes = static_cast<uint32_t>(2 * sessionOptions.maxNumRanges);
|
||||
counterDataImageOptions.maxRangeNameLength = static_cast<uint32_t>(sessionOptions.avgRangeNameLength);
|
||||
|
||||
NVPW_D3D11_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { NVPW_D3D11_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE };
|
||||
calculateSizeParams.counterDataImageOptionsSize = NVPW_D3D11_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
calculateSizeParams.pOptions = &counterDataImageOptions;
|
||||
nvpaStatus = NVPW_D3D11_Profiler_CounterDataImage_CalculateSize(&calculateSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
counterDataImage.resize(calculateSizeParams.counterDataImageSize);
|
||||
|
||||
NVPW_D3D11_Profiler_CounterDataImage_Initialize_Params initializeParams = { NVPW_D3D11_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE };
|
||||
initializeParams.counterDataImageOptionsSize = NVPW_D3D11_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
initializeParams.pOptions = &counterDataImageOptions;
|
||||
initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
initializeParams.pCounterDataImage = &counterDataImage[0];
|
||||
nvpaStatus = NVPW_D3D11_Profiler_CounterDataImage_Initialize(&initializeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_D3D11_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = { NVPW_D3D11_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE };
|
||||
scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage;
|
||||
nvpaStatus = NVPW_D3D11_Profiler_CounterDataImage_CalculateScratchBufferSize(&scratchBufferSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
counterDataScratch.resize(scratchBufferSizeParams.counterDataScratchBufferSize);
|
||||
|
||||
NVPW_D3D11_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { NVPW_D3D11_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE };
|
||||
initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage;
|
||||
initScratchBufferParams.counterDataScratchBufferSize = scratchBufferSizeParams.counterDataScratchBufferSize;
|
||||
initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratch[0];
|
||||
nvpaStatus = NVPW_D3D11_Profiler_CounterDataImage_InitializeScratchBuffer(&initScratchBufferParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool SetConfig(const SetConfigParams& config) const override
|
||||
{
|
||||
NVPW_D3D11_Profiler_DeviceContext_SetConfig_Params setConfigParams = { NVPW_D3D11_Profiler_DeviceContext_SetConfig_Params_STRUCT_SIZE };
|
||||
setConfigParams.pDeviceContext = pDeviceContext;
|
||||
setConfigParams.pConfig = config.pConfigImage;
|
||||
setConfigParams.configSize = config.configImageSize;
|
||||
setConfigParams.minNestingLevel = 1;
|
||||
setConfigParams.numNestingLevels = config.numNestingLevels;
|
||||
setConfigParams.passIndex = 0;
|
||||
setConfigParams.targetNestingLevel = 1;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_SetConfig(&setConfigParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool BeginPass() const override
|
||||
{
|
||||
NVPW_D3D11_Profiler_DeviceContext_BeginPass_Params beginPassParams = { NVPW_D3D11_Profiler_DeviceContext_BeginPass_Params_STRUCT_SIZE };
|
||||
beginPassParams.pDeviceContext = pDeviceContext;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_BeginPass(&beginPassParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool EndPass() const override
|
||||
{
|
||||
NVPW_D3D11_Profiler_DeviceContext_EndPass_Params endPassParams = { NVPW_D3D11_Profiler_DeviceContext_EndPass_Params_STRUCT_SIZE };
|
||||
endPassParams.pDeviceContext = pDeviceContext;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_EndPass(&endPassParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool PushRange(const char* pRangeName) override
|
||||
{
|
||||
NVPW_D3D11_Profiler_DeviceContext_PushRange_Params pushRangeParams = { NVPW_D3D11_Profiler_DeviceContext_PushRange_Params_STRUCT_SIZE };
|
||||
pushRangeParams.pDeviceContext = pDeviceContext;
|
||||
pushRangeParams.pRangeName = pRangeName;
|
||||
pushRangeParams.rangeNameLength = 0;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_PushRange(&pushRangeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool PopRange() override
|
||||
{
|
||||
NVPW_D3D11_Profiler_DeviceContext_PopRange_Params popParams = { NVPW_D3D11_Profiler_DeviceContext_PopRange_Params_STRUCT_SIZE };
|
||||
popParams.pDeviceContext = pDeviceContext;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_PopRange(&popParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const
|
||||
{
|
||||
NVPW_D3D11_Profiler_DeviceContext_DecodeCounters_Params decodeParams = { NVPW_D3D11_Profiler_DeviceContext_DecodeCounters_Params_STRUCT_SIZE };
|
||||
decodeParams.pDeviceContext = pDeviceContext;
|
||||
decodeParams.counterDataImageSize = counterDataImage.size();
|
||||
decodeParams.pCounterDataImage = counterDataImage.data();
|
||||
decodeParams.counterDataScratchBufferSize = counterDataScratch.size();
|
||||
decodeParams.pCounterDataScratchBuffer = counterDataScratch.data();
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_DecodeCounters(&decodeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
onePassDecoded = decodeParams.onePassCollected;
|
||||
allPassesDecoded = decodeParams.allPassesCollected;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Initialize(ID3D11DeviceContext* pDeviceContext_, SessionOptions sessionOptions_)
|
||||
{
|
||||
pDeviceContext = pDeviceContext_;
|
||||
sessionOptions = sessionOptions_;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
NVPW_D3D11_Profiler_DeviceContext_EndSession_Params endSessionParams = {NVPW_D3D11_Profiler_DeviceContext_EndSession_Params_STRUCT_SIZE};
|
||||
endSessionParams.pDeviceContext = pDeviceContext;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_EndSession(&endSessionParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_D3D11_Profiler_DeviceContext_EndSession failed, nvpaStatus = %d\n", nvpaStatus);
|
||||
}
|
||||
|
||||
sessionOptions = {};
|
||||
pDeviceContext = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
ProfilerApi m_profilerApi;
|
||||
RangeProfilerStateMachine m_stateMachine;
|
||||
|
||||
public:
|
||||
~RangeProfilerD3D11()
|
||||
{
|
||||
}
|
||||
|
||||
RangeProfilerD3D11(const RangeProfilerD3D11&) = delete;
|
||||
|
||||
RangeProfilerD3D11()
|
||||
: m_profilerApi()
|
||||
, m_stateMachine(m_profilerApi)
|
||||
{
|
||||
}
|
||||
// TODO: make this move friendly
|
||||
|
||||
RangeProfilerD3D11& operator=(const RangeProfilerD3D11&) = delete;
|
||||
|
||||
bool IsInSession() const
|
||||
{
|
||||
return !!m_profilerApi.pDeviceContext;
|
||||
}
|
||||
|
||||
bool IsInPass() const
|
||||
{
|
||||
return m_stateMachine.IsInPass();
|
||||
}
|
||||
|
||||
ID3D11DeviceContext* GetDeviceContext() const
|
||||
{
|
||||
return m_profilerApi.pDeviceContext;
|
||||
}
|
||||
|
||||
bool BeginSession(ID3D11DeviceContext* pDeviceContext, const SessionOptions& sessionOptions)
|
||||
{
|
||||
if (IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "already in a session\n");
|
||||
return false;
|
||||
}
|
||||
if (!nv::perf::D3D11IsNvidiaDevice(pDeviceContext) || !nv::perf::profiler::D3D11IsGpuSupported(pDeviceContext))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "device is not supported for profiling\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
NVPW_D3D11_Profiler_CalcTraceBufferSize_Params calcTraceBufferSizeParam = { NVPW_D3D11_Profiler_CalcTraceBufferSize_Params_STRUCT_SIZE };
|
||||
calcTraceBufferSizeParam.maxRangesPerPass = sessionOptions.maxNumRanges;
|
||||
calcTraceBufferSizeParam.avgRangeNameLength = sessionOptions.avgRangeNameLength;
|
||||
nvpaStatus = NVPW_D3D11_Profiler_CalcTraceBufferSize(&calcTraceBufferSizeParam);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_D3D11_Profiler_DeviceContext_BeginSession_Params beginSessionParams = { NVPW_D3D11_Profiler_DeviceContext_BeginSession_Params_STRUCT_SIZE };
|
||||
beginSessionParams.pDeviceContext = pDeviceContext;
|
||||
beginSessionParams.numTraceBuffers = sessionOptions.numTraceBuffers;
|
||||
beginSessionParams.traceBufferSize = calcTraceBufferSizeParam.traceBufferSize;
|
||||
beginSessionParams.maxRangesPerPass = sessionOptions.maxNumRanges;
|
||||
beginSessionParams.maxLaunchesPerPass = sessionOptions.maxNumRanges;
|
||||
nvpaStatus = NVPW_D3D11_Profiler_DeviceContext_BeginSession(&beginSessionParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_PRIVILEGE)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: profiling permissions not enabled. Please follow these instructions: https://developer.nvidia.com/ERR_NVGPUCTRPERM\n");
|
||||
}
|
||||
else if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: insufficient driver version. Please install the latest NVIDIA driver from https://www.nvidia.com\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: unknown error. It may be a resource conflict - only one profiler session can run at a time per GPU.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
m_profilerApi.sessionOptions = sessionOptions;
|
||||
m_profilerApi.pDeviceContext = pDeviceContext;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndSession()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_stateMachine.Reset();
|
||||
m_profilerApi.Reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EnqueueCounterCollection(const SetConfigParams& config)
|
||||
{
|
||||
const bool status = m_stateMachine.EnqueueCounterCollection(config);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool EnqueueCounterCollection(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
|
||||
{
|
||||
const bool status = m_stateMachine.EnqueueCounterCollection(SetConfigParams(configuration, numNestingLevels, numStatisticalSamples));
|
||||
return status;
|
||||
}
|
||||
|
||||
bool BeginPass()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.BeginPass();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool EndPass()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.EndPass();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool PushRange(const char* pRangeName)
|
||||
{
|
||||
const bool status = m_stateMachine.PushRange(pRangeName);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool PopRange()
|
||||
{
|
||||
const bool status = m_stateMachine.PopRange();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool DecodeCounters(DecodeResult& decodeResult)
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.DecodeCounters(decodeResult);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool AllPassesSubmitted() const
|
||||
{
|
||||
const bool allPassesSubmitted = m_stateMachine.AllPassesSubmitted();
|
||||
return allPassesSubmitted;
|
||||
}
|
||||
};
|
||||
}}}
|
||||
419
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfilerD3D12.h
Normal file
419
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfilerD3D12.h
Normal file
@@ -0,0 +1,419 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include "NvPerfInit.h"
|
||||
#include "NvPerfCounterConfiguration.h"
|
||||
#include "NvPerfRangeProfiler.h"
|
||||
#include "NvPerfD3D12.h"
|
||||
|
||||
struct ID3D12CommandQueue;
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
class RangeProfilerD3D12
|
||||
{
|
||||
protected:
|
||||
struct ProfilerApi : RangeProfilerStateMachine::IProfilerApi
|
||||
{
|
||||
CComPtr<ID3D12CommandQueue> pCommandQueue;
|
||||
SessionOptions sessionOptions;
|
||||
|
||||
ProfilerApi()
|
||||
: pCommandQueue(nullptr)
|
||||
, sessionOptions()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const override
|
||||
{
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
NVPW_D3D12_Profiler_CounterDataImageOptions counterDataImageOptions = { NVPW_D3D12_Profiler_CounterDataImageOptions_STRUCT_SIZE };
|
||||
counterDataImageOptions.pCounterDataPrefix = config.pCounterDataPrefix;
|
||||
counterDataImageOptions.counterDataPrefixSize = config.counterDataPrefixSize;
|
||||
counterDataImageOptions.maxNumRanges = static_cast<uint32_t>(sessionOptions.maxNumRanges);
|
||||
counterDataImageOptions.maxNumRangeTreeNodes = static_cast<uint32_t>(2 * sessionOptions.maxNumRanges);
|
||||
counterDataImageOptions.maxRangeNameLength = static_cast<uint32_t>(sessionOptions.avgRangeNameLength);
|
||||
|
||||
NVPW_D3D12_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { NVPW_D3D12_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE };
|
||||
calculateSizeParams.pOptions = &counterDataImageOptions;
|
||||
calculateSizeParams.counterDataImageOptionsSize = NVPW_D3D12_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
nvpaStatus = NVPW_D3D12_Profiler_CounterDataImage_CalculateSize(&calculateSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
counterDataImage.resize(calculateSizeParams.counterDataImageSize);
|
||||
|
||||
NVPW_D3D12_Profiler_CounterDataImage_Initialize_Params initializeParams = { NVPW_D3D12_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE };
|
||||
initializeParams.counterDataImageOptionsSize = NVPW_D3D12_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
initializeParams.pOptions = &counterDataImageOptions;
|
||||
initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
initializeParams.pCounterDataImage = &counterDataImage[0];
|
||||
nvpaStatus = NVPW_D3D12_Profiler_CounterDataImage_Initialize(&initializeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_D3D12_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = { NVPW_D3D12_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE };
|
||||
scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage;
|
||||
nvpaStatus = NVPW_D3D12_Profiler_CounterDataImage_CalculateScratchBufferSize(&scratchBufferSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
counterDataScratch.resize(scratchBufferSizeParams.counterDataScratchBufferSize);
|
||||
|
||||
NVPW_D3D12_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { NVPW_D3D12_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE };
|
||||
initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage;
|
||||
initScratchBufferParams.counterDataScratchBufferSize = scratchBufferSizeParams.counterDataScratchBufferSize;
|
||||
initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratch[0];
|
||||
|
||||
nvpaStatus = NVPW_D3D12_Profiler_CounterDataImage_InitializeScratchBuffer(&initScratchBufferParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool SetConfig(const SetConfigParams& config) const override
|
||||
{
|
||||
NVPW_D3D12_Profiler_Queue_SetConfig_Params setConfigParams = { NVPW_D3D12_Profiler_Queue_SetConfig_Params_STRUCT_SIZE };
|
||||
setConfigParams.pCommandQueue = pCommandQueue;
|
||||
setConfigParams.pConfig = config.pConfigImage;
|
||||
setConfigParams.configSize = config.configImageSize;
|
||||
setConfigParams.minNestingLevel = 1;
|
||||
setConfigParams.numNestingLevels = config.numNestingLevels;
|
||||
setConfigParams.passIndex = 0;
|
||||
setConfigParams.targetNestingLevel = 1;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_SetConfig(&setConfigParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool BeginPass() const override
|
||||
{
|
||||
NVPW_D3D12_Profiler_Queue_BeginPass_Params beginPassParams = { NVPW_D3D12_Profiler_Queue_BeginPass_Params_STRUCT_SIZE };
|
||||
beginPassParams.pCommandQueue = pCommandQueue;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_BeginPass(&beginPassParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool EndPass() const override
|
||||
{
|
||||
NVPW_D3D12_Profiler_Queue_EndPass_Params endPassParams = { NVPW_D3D12_Profiler_Queue_EndPass_Params_STRUCT_SIZE };
|
||||
endPassParams.pCommandQueue = pCommandQueue;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_EndPass(&endPassParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool PushRange(const char* pRangeName) override
|
||||
{
|
||||
NVPW_D3D12_Profiler_Queue_PushRange_Params pushRangeParams = {NVPW_D3D12_Profiler_Queue_PushRange_Params_STRUCT_SIZE};
|
||||
pushRangeParams.pRangeName = pRangeName;
|
||||
pushRangeParams.rangeNameLength = 0;
|
||||
pushRangeParams.pCommandQueue = pCommandQueue;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_PushRange(&pushRangeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool PopRange() override
|
||||
{
|
||||
NVPW_D3D12_Profiler_Queue_PopRange_Params popParams = {NVPW_D3D12_Profiler_Queue_PopRange_Params_STRUCT_SIZE};
|
||||
popParams.pCommandQueue = pCommandQueue;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_PopRange(&popParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const
|
||||
{
|
||||
NVPW_D3D12_Profiler_Queue_DecodeCounters_Params decodeParams = { NVPW_D3D12_Profiler_Queue_DecodeCounters_Params_STRUCT_SIZE };
|
||||
decodeParams.pCommandQueue = pCommandQueue;
|
||||
decodeParams.counterDataImageSize = counterDataImage.size();
|
||||
decodeParams.pCounterDataImage = counterDataImage.data();
|
||||
decodeParams.counterDataScratchBufferSize = counterDataScratch.size();
|
||||
decodeParams.pCounterDataScratchBuffer = counterDataScratch.data();
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_DecodeCounters(&decodeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
onePassDecoded = decodeParams.onePassCollected;
|
||||
allPassesDecoded = decodeParams.allPassesCollected;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Initialize(ID3D12CommandQueue* pCommandQueue_, const SessionOptions& sessionOptions_)
|
||||
{
|
||||
pCommandQueue = pCommandQueue_;
|
||||
sessionOptions = sessionOptions_;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
NVPW_D3D12_Profiler_Queue_EndSession_Params endSessionParams = {NVPW_D3D12_Profiler_Queue_EndSession_Params_STRUCT_SIZE};
|
||||
endSessionParams.pCommandQueue = pCommandQueue;
|
||||
endSessionParams.timeout = INFINITE;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Profiler_Queue_EndSession(&endSessionParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_D3D12_Profiler_Queue_EndSession failed, nvpaStatus = %d\n", nvpaStatus);
|
||||
}
|
||||
|
||||
sessionOptions = {};
|
||||
pCommandQueue = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
protected: // members
|
||||
ProfilerApi m_profilerApi;
|
||||
RangeProfilerStateMachine m_stateMachine;
|
||||
std::thread m_spgoThread;
|
||||
volatile bool m_spgoThreadExited;
|
||||
|
||||
private:
|
||||
// non-copyable
|
||||
RangeProfilerD3D12(const RangeProfilerD3D12&);
|
||||
|
||||
static void SpgoThreadProc(RangeProfilerD3D12* pRangeProfilerD3D12, ID3D12CommandQueue* pCommandQueue)
|
||||
{
|
||||
// Run continuously in the background, handling all BeginPass and EndPass GPU operations until EndSession().
|
||||
NVPW_D3D12_Queue_ServicePendingGpuOperations_Params serviceGpuOpsParams = { NVPW_D3D12_Queue_ServicePendingGpuOperations_Params_STRUCT_SIZE };
|
||||
serviceGpuOpsParams.pCommandQueue = pCommandQueue;
|
||||
serviceGpuOpsParams.numOperations = 0; // run until EndSession()
|
||||
serviceGpuOpsParams.timeout = INFINITE;
|
||||
NVPA_Status nvpaStatus = NVPW_D3D12_Queue_ServicePendingGpuOperations(&serviceGpuOpsParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
// TODO: log an error
|
||||
}
|
||||
|
||||
pRangeProfilerD3D12->m_spgoThreadExited = true;
|
||||
}
|
||||
|
||||
public:
|
||||
~RangeProfilerD3D12()
|
||||
{
|
||||
}
|
||||
|
||||
RangeProfilerD3D12()
|
||||
: m_profilerApi()
|
||||
, m_stateMachine(m_profilerApi)
|
||||
, m_spgoThread()
|
||||
, m_spgoThreadExited()
|
||||
{
|
||||
}
|
||||
// TODO: make this move friendly
|
||||
|
||||
bool IsInSession() const
|
||||
{
|
||||
return !!m_profilerApi.pCommandQueue;
|
||||
}
|
||||
|
||||
bool IsInPass() const
|
||||
{
|
||||
return m_stateMachine.IsInPass();
|
||||
}
|
||||
|
||||
ID3D12CommandQueue* GetCommandQueue() const
|
||||
{
|
||||
return m_profilerApi.pCommandQueue;
|
||||
}
|
||||
|
||||
bool BeginSession(
|
||||
ID3D12CommandQueue* pCommandQueue,
|
||||
const SessionOptions& sessionOptions)
|
||||
{
|
||||
if (IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "already in a session\n");
|
||||
return false;
|
||||
}
|
||||
if (!D3D12IsNvidiaDevice(pCommandQueue) || !D3D12IsGpuSupported(pCommandQueue))
|
||||
{
|
||||
// TODO: error - device is not supported for profiling
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
NVPW_D3D12_Profiler_CalcTraceBufferSize_Params calcTraceBufferSizeParam = { NVPW_D3D12_Profiler_CalcTraceBufferSize_Params_STRUCT_SIZE };
|
||||
calcTraceBufferSizeParam.maxRangesPerPass = sessionOptions.maxNumRanges;
|
||||
calcTraceBufferSizeParam.avgRangeNameLength = sessionOptions.avgRangeNameLength;
|
||||
nvpaStatus = NVPW_D3D12_Profiler_CalcTraceBufferSize(&calcTraceBufferSizeParam);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_D3D12_Profiler_Queue_BeginSession_Params beginSessionParams = { NVPW_D3D12_Profiler_Queue_BeginSession_Params_STRUCT_SIZE };
|
||||
beginSessionParams.pCommandQueue = pCommandQueue;
|
||||
beginSessionParams.numTraceBuffers = sessionOptions.numTraceBuffers;
|
||||
beginSessionParams.traceBufferSize = calcTraceBufferSizeParam.traceBufferSize;
|
||||
beginSessionParams.maxRangesPerPass = sessionOptions.maxNumRanges;
|
||||
beginSessionParams.maxLaunchesPerPass = sessionOptions.maxNumRanges;
|
||||
nvpaStatus = NVPW_D3D12_Profiler_Queue_BeginSession(&beginSessionParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_PRIVILEGE)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: profiling permissions not enabled. Please follow these instructions: https://developer.nvidia.com/ERR_NVGPUCTRPERM\n");
|
||||
}
|
||||
else if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: insufficient driver version. Please install the latest NVIDIA driver from https://www.nvidia.com\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: unknown error. It may be a resource conflict - only one profiler session can run at a time per GPU.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
m_spgoThreadExited = false;
|
||||
m_spgoThread = std::thread(SpgoThreadProc, this, pCommandQueue);
|
||||
|
||||
m_profilerApi.Initialize(pCommandQueue, sessionOptions);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndSession()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_stateMachine.Reset();
|
||||
m_profilerApi.Reset();
|
||||
m_spgoThread.join();
|
||||
m_spgoThreadExited = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EnqueueCounterCollection(const SetConfigParams& config)
|
||||
{
|
||||
const bool status = m_stateMachine.EnqueueCounterCollection(config);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool EnqueueCounterCollection(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
|
||||
{
|
||||
const bool status = m_stateMachine.EnqueueCounterCollection(SetConfigParams(configuration, numNestingLevels, numStatisticalSamples));
|
||||
return status;
|
||||
}
|
||||
|
||||
bool BeginPass()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.BeginPass();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool EndPass()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.EndPass();
|
||||
return status;
|
||||
}
|
||||
|
||||
// Convenience method to start a Queue-level range. For CommandLists, use D3D12RangeCommands::PushRange.
|
||||
bool PushRange(const char* pRangeName)
|
||||
{
|
||||
const bool status = m_stateMachine.PushRange(pRangeName);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Convenience method to end a Queue-level range. For CommandLists, use D3D12RangeCommands::PopRange.
|
||||
bool PopRange()
|
||||
{
|
||||
const bool status = m_stateMachine.PopRange();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool DecodeCounters(DecodeResult& decodeResult)
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_spgoThreadExited)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "the background thread exited; possible hang on subsequent CPU-waiting-on-GPU calls\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.DecodeCounters(decodeResult);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool AllPassesSubmitted() const
|
||||
{
|
||||
const bool allPassesSubmitted = m_stateMachine.AllPassesSubmitted();
|
||||
return allPassesSubmitted;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}}}
|
||||
401
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfilerOpenGL.h
Normal file
401
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfilerOpenGL.h
Normal file
@@ -0,0 +1,401 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include "NvPerfInit.h"
|
||||
#include "NvPerfCounterConfiguration.h"
|
||||
#include "NvPerfRangeProfiler.h"
|
||||
#include "NvPerfOpenGL.h"
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
class RangeProfilerOpenGL
|
||||
{
|
||||
protected:
|
||||
struct ProfilerApi : RangeProfilerStateMachine::IProfilerApi
|
||||
{
|
||||
size_t maxQueueRangesPerPass;
|
||||
size_t nextCommandBufferIdx;
|
||||
SessionOptions sessionOptions;
|
||||
NVPW_OpenGL_GraphicsContext* pGraphicsContext;
|
||||
|
||||
ProfilerApi()
|
||||
: maxQueueRangesPerPass(1)
|
||||
, nextCommandBufferIdx()
|
||||
, sessionOptions()
|
||||
, pGraphicsContext()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const override
|
||||
{
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
NVPW_OpenGL_Profiler_CounterDataImageOptions counterDataImageOption = { NVPW_OpenGL_Profiler_CounterDataImageOptions_STRUCT_SIZE };
|
||||
counterDataImageOption.pCounterDataPrefix = config.pCounterDataPrefix;
|
||||
counterDataImageOption.counterDataPrefixSize = config.counterDataPrefixSize;
|
||||
counterDataImageOption.maxNumRanges = static_cast<uint32_t>(sessionOptions.maxNumRanges);
|
||||
counterDataImageOption.maxNumRangeTreeNodes = static_cast<uint32_t>(2 * sessionOptions.maxNumRanges);
|
||||
counterDataImageOption.maxRangeNameLength = static_cast<uint32_t>(sessionOptions.avgRangeNameLength);
|
||||
|
||||
NVPW_OpenGL_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { NVPW_OpenGL_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE };
|
||||
calculateSizeParams.pOptions = &counterDataImageOption;
|
||||
calculateSizeParams.counterDataImageOptionsSize = NVPW_OpenGL_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
nvpaStatus = NVPW_OpenGL_Profiler_CounterDataImage_CalculateSize(&calculateSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_OpenGL_Profiler_CounterDataImage_Initialize_Params initializeParams = { NVPW_OpenGL_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE };
|
||||
initializeParams.counterDataImageOptionsSize = NVPW_OpenGL_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
initializeParams.pOptions = &counterDataImageOption;
|
||||
initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
|
||||
counterDataImage.resize(calculateSizeParams.counterDataImageSize);
|
||||
initializeParams.pCounterDataImage = &counterDataImage[0];
|
||||
nvpaStatus = NVPW_OpenGL_Profiler_CounterDataImage_Initialize(&initializeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_OpenGL_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = { NVPW_OpenGL_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE };
|
||||
scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage;
|
||||
nvpaStatus = NVPW_OpenGL_Profiler_CounterDataImage_CalculateScratchBufferSize(&scratchBufferSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
counterDataScratch.resize(scratchBufferSizeParams.counterDataScratchBufferSize);
|
||||
|
||||
NVPW_OpenGL_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { NVPW_OpenGL_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE };
|
||||
initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage;
|
||||
initScratchBufferParams.counterDataScratchBufferSize = scratchBufferSizeParams.counterDataScratchBufferSize;
|
||||
initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratch[0];
|
||||
|
||||
nvpaStatus = NVPW_OpenGL_Profiler_CounterDataImage_InitializeScratchBuffer(&initScratchBufferParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool SetConfig(const SetConfigParams& config) const override
|
||||
{
|
||||
NVPW_OpenGL_Profiler_GraphicsContext_SetConfig_Params setConfigParams = { NVPW_OpenGL_Profiler_GraphicsContext_SetConfig_Params_STRUCT_SIZE };
|
||||
setConfigParams.pConfig = config.pConfigImage;
|
||||
setConfigParams.configSize = config.configImageSize;
|
||||
setConfigParams.minNestingLevel = 1;
|
||||
setConfigParams.numNestingLevels = config.numNestingLevels;
|
||||
setConfigParams.passIndex = 0;
|
||||
setConfigParams.targetNestingLevel = 1;
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_SetConfig(&setConfigParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool BeginPass() const override
|
||||
{
|
||||
NVPW_OpenGL_Profiler_GraphicsContext_BeginPass_Params beginPassParams = { NVPW_OpenGL_Profiler_GraphicsContext_BeginPass_Params_STRUCT_SIZE };
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_BeginPass(&beginPassParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool EndPass() const override
|
||||
{
|
||||
NVPW_OpenGL_Profiler_GraphicsContext_EndPass_Params endPassParams = { NVPW_OpenGL_Profiler_GraphicsContext_EndPass_Params_STRUCT_SIZE };
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_EndPass(&endPassParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool PushRange(const char* pRangeName) override
|
||||
{
|
||||
NVPW_OpenGL_Profiler_GraphicsContext_PushRange_Params pushRangeParams = {NVPW_OpenGL_Profiler_GraphicsContext_PushRange_Params_STRUCT_SIZE};
|
||||
pushRangeParams.pRangeName = pRangeName;
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_PushRange(&pushRangeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_Profiler_GraphicsContext_PushRange failed, nvpaStatus = %d\n", nvpaStatus);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool PopRange() override
|
||||
{
|
||||
NVPW_OpenGL_Profiler_GraphicsContext_PopRange_Params popRangeParams = {NVPW_OpenGL_Profiler_GraphicsContext_PopRange_Params_STRUCT_SIZE};
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_PopRange(&popRangeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_Profiler_GraphicsContext_PopRange failed, nvpaStatus = %d\n", nvpaStatus);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const
|
||||
{
|
||||
NVPW_OpenGL_Profiler_GraphicsContext_DecodeCounters_Params decodeParams = { NVPW_OpenGL_Profiler_GraphicsContext_DecodeCounters_Params_STRUCT_SIZE };
|
||||
decodeParams.counterDataImageSize = counterDataImage.size();
|
||||
decodeParams.pCounterDataImage = counterDataImage.data();
|
||||
decodeParams.counterDataScratchBufferSize = counterDataScratch.size();
|
||||
decodeParams.pCounterDataScratchBuffer = counterDataScratch.data();
|
||||
decodeParams.pGraphicsContext = pGraphicsContext;
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_DecodeCounters(&decodeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
onePassDecoded = decodeParams.onePassCollected;
|
||||
allPassesDecoded = decodeParams.allPassesCollected;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Initialize(const SessionOptions& sessionOptions_)
|
||||
{
|
||||
NVPW_OpenGL_GetCurrentGraphicsContext_Params getCurrentGraphicsContextParams = {NVPW_OpenGL_GetCurrentGraphicsContext_Params_STRUCT_SIZE};
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_GetCurrentGraphicsContext(&getCurrentGraphicsContextParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
pGraphicsContext = getCurrentGraphicsContextParams.pGraphicsContext;
|
||||
sessionOptions = sessionOptions_;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
NVPW_OpenGL_Profiler_GraphicsContext_EndSession_Params endSessionParams = {NVPW_OpenGL_Profiler_GraphicsContext_EndSession_Params_STRUCT_SIZE};
|
||||
NVPA_Status nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_EndSession(&endSessionParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_OpenGL_Profiler_GraphicsContext_EndSession failed, nvpaStatus = %d\n", nvpaStatus);
|
||||
}
|
||||
sessionOptions = {};
|
||||
pGraphicsContext = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
protected: // members
|
||||
ProfilerApi m_profilerApi;
|
||||
RangeProfilerStateMachine m_stateMachine;
|
||||
|
||||
private:
|
||||
// non-copyable
|
||||
RangeProfilerOpenGL(const RangeProfilerOpenGL&);
|
||||
|
||||
public:
|
||||
~RangeProfilerOpenGL()
|
||||
{
|
||||
}
|
||||
|
||||
RangeProfilerOpenGL()
|
||||
: m_profilerApi()
|
||||
, m_stateMachine(m_profilerApi)
|
||||
{
|
||||
}
|
||||
|
||||
bool IsInSession() const
|
||||
{
|
||||
return m_profilerApi.pGraphicsContext;
|
||||
}
|
||||
|
||||
bool IsInPass() const
|
||||
{
|
||||
return m_stateMachine.IsInPass();
|
||||
}
|
||||
|
||||
bool SetMaxQueueRangesPerPass(size_t maxQueueRangesPerPass)
|
||||
{
|
||||
if (IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "SetMaxQueueRangesPerPass must be called before the session starts.\n");
|
||||
return false;
|
||||
}
|
||||
m_profilerApi.maxQueueRangesPerPass = maxQueueRangesPerPass;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BeginSession(
|
||||
const SessionOptions& sessionOptions)
|
||||
{
|
||||
if (IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "already in a session\n");
|
||||
return false;
|
||||
}
|
||||
if (!OpenGLIsNvidiaDevice() || !OpenGLIsGpuSupported())
|
||||
{
|
||||
// TODO: error - device is not supported for profiling
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
NVPW_OpenGL_Profiler_CalcTraceBufferSize_Params calcTraceBufferSizeParam = { NVPW_OpenGL_Profiler_CalcTraceBufferSize_Params_STRUCT_SIZE };
|
||||
calcTraceBufferSizeParam.maxRangesPerPass = sessionOptions.maxNumRanges;
|
||||
calcTraceBufferSizeParam.avgRangeNameLength = sessionOptions.avgRangeNameLength;
|
||||
nvpaStatus = NVPW_OpenGL_Profiler_CalcTraceBufferSize(&calcTraceBufferSizeParam);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_OpenGL_Profiler_GraphicsContext_BeginSession_Params beginSessionParams = { NVPW_OpenGL_Profiler_GraphicsContext_BeginSession_Params_STRUCT_SIZE };
|
||||
beginSessionParams.numTraceBuffers = sessionOptions.numTraceBuffers;
|
||||
beginSessionParams.traceBufferSize = calcTraceBufferSizeParam.traceBufferSize;
|
||||
beginSessionParams.maxRangesPerPass = sessionOptions.maxNumRanges;
|
||||
beginSessionParams.maxLaunchesPerPass = sessionOptions.maxNumRanges;
|
||||
nvpaStatus = NVPW_OpenGL_Profiler_GraphicsContext_BeginSession(&beginSessionParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_PRIVILEGE)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: profiling permissions not enabled. Please follow these instructions: https://developer.nvidia.com/nvidia-development-tools-solutions-ERR_NVGPUCTRPERM-permission-issue-performance-counters \n");
|
||||
}
|
||||
else if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: insufficient driver version. Please install the latest NVIDIA driver from https://www.nvidia.com \n");
|
||||
}
|
||||
else
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: unknown error. It may be a resource conflict - only one profiler session can run at a time per GPU.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!m_profilerApi.Initialize(sessionOptions))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndSession()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_stateMachine.Reset();
|
||||
m_profilerApi.Reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool EnqueueCounterCollection(const SetConfigParams& config)
|
||||
{
|
||||
const bool status = m_stateMachine.EnqueueCounterCollection(config);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool EnqueueCounterCollection(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
|
||||
{
|
||||
const bool status = m_stateMachine.EnqueueCounterCollection(SetConfigParams(configuration, numNestingLevels, numStatisticalSamples));
|
||||
return status;
|
||||
}
|
||||
|
||||
bool BeginPass()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.BeginPass();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool EndPass()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.EndPass();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool PushRange(const char* pRangeName)
|
||||
{
|
||||
if (!IsInPass())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.PushRange(pRangeName);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool PopRange()
|
||||
{
|
||||
if (!IsInPass())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.PopRange();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool DecodeCounters(DecodeResult& decodeResult)
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.DecodeCounters(decodeResult);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool AllPassesSubmitted() const
|
||||
{
|
||||
const bool allPassesSubmitted = m_stateMachine.AllPassesSubmitted();
|
||||
return allPassesSubmitted;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}}}
|
||||
574
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfilerVulkan.h
Normal file
574
ruins64k/tools/NvPerfUtility/include/NvPerfRangeProfilerVulkan.h
Normal file
@@ -0,0 +1,574 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include "NvPerfInit.h"
|
||||
#include "NvPerfCounterConfiguration.h"
|
||||
#include "NvPerfRangeProfiler.h"
|
||||
#include "NvPerfVulkan.h"
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
class RangeProfilerVulkan
|
||||
{
|
||||
protected:
|
||||
struct ProfilerApi : RangeProfilerStateMachine::IProfilerApi
|
||||
{
|
||||
VkQueue queue;
|
||||
VkDevice device;
|
||||
VkCommandPool commandPool;
|
||||
size_t maxQueueRangesPerPass;
|
||||
std::vector<VkCommandBuffer> rangeCommandBuffers;
|
||||
std::vector<VkFence> rangeFences;
|
||||
size_t nextCommandBufferIdx;
|
||||
SessionOptions sessionOptions;
|
||||
|
||||
ProfilerApi()
|
||||
: queue()
|
||||
, device()
|
||||
, commandPool()
|
||||
, maxQueueRangesPerPass(1)
|
||||
, nextCommandBufferIdx()
|
||||
, sessionOptions()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool CreateCounterData(const SetConfigParams& config, std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch) const override
|
||||
{
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
NVPW_VK_Profiler_CounterDataImageOptions counterDataImageOptions = { NVPW_VK_Profiler_CounterDataImageOptions_STRUCT_SIZE };
|
||||
counterDataImageOptions.pCounterDataPrefix = config.pCounterDataPrefix;
|
||||
counterDataImageOptions.counterDataPrefixSize = config.counterDataPrefixSize;
|
||||
counterDataImageOptions.maxNumRanges = static_cast<uint32_t>(sessionOptions.maxNumRanges);
|
||||
counterDataImageOptions.maxNumRangeTreeNodes = static_cast<uint32_t>(2 * sessionOptions.maxNumRanges);
|
||||
counterDataImageOptions.maxRangeNameLength = static_cast<uint32_t>(sessionOptions.avgRangeNameLength);
|
||||
|
||||
NVPW_VK_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { NVPW_VK_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE };
|
||||
calculateSizeParams.pOptions = &counterDataImageOptions;
|
||||
calculateSizeParams.counterDataImageOptionsSize = NVPW_VK_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
nvpaStatus = NVPW_VK_Profiler_CounterDataImage_CalculateSize(&calculateSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
counterDataImage.resize(calculateSizeParams.counterDataImageSize);
|
||||
|
||||
NVPW_VK_Profiler_CounterDataImage_Initialize_Params initializeParams = { NVPW_VK_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE };
|
||||
initializeParams.counterDataImageOptionsSize = NVPW_VK_Profiler_CounterDataImageOptions_STRUCT_SIZE;
|
||||
initializeParams.pOptions = &counterDataImageOptions;
|
||||
initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
initializeParams.pCounterDataImage = &counterDataImage[0];
|
||||
nvpaStatus = NVPW_VK_Profiler_CounterDataImage_Initialize(&initializeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_VK_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = { NVPW_VK_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE };
|
||||
scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage;
|
||||
nvpaStatus = NVPW_VK_Profiler_CounterDataImage_CalculateScratchBufferSize(&scratchBufferSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
counterDataScratch.resize(scratchBufferSizeParams.counterDataScratchBufferSize);
|
||||
|
||||
NVPW_VK_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { NVPW_VK_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE };
|
||||
initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
|
||||
initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage;
|
||||
initScratchBufferParams.counterDataScratchBufferSize = scratchBufferSizeParams.counterDataScratchBufferSize;
|
||||
initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratch[0];
|
||||
|
||||
nvpaStatus = NVPW_VK_Profiler_CounterDataImage_InitializeScratchBuffer(&initScratchBufferParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool SetConfig(const SetConfigParams& config) const override
|
||||
{
|
||||
NVPW_VK_Profiler_Queue_SetConfig_Params setConfigParams = { NVPW_VK_Profiler_Queue_SetConfig_Params_STRUCT_SIZE };
|
||||
setConfigParams.queue = queue;
|
||||
setConfigParams.pConfig = config.pConfigImage;
|
||||
setConfigParams.configSize = config.configImageSize;
|
||||
setConfigParams.minNestingLevel = 1;
|
||||
setConfigParams.numNestingLevels = config.numNestingLevels;
|
||||
setConfigParams.passIndex = 0;
|
||||
setConfigParams.targetNestingLevel = 1;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_SetConfig(&setConfigParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool BeginPass() const override
|
||||
{
|
||||
NVPW_VK_Profiler_Queue_BeginPass_Params beginPassParams = { NVPW_VK_Profiler_Queue_BeginPass_Params_STRUCT_SIZE };
|
||||
beginPassParams.queue = queue;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_BeginPass(&beginPassParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool EndPass() const override
|
||||
{
|
||||
NVPW_VK_Profiler_Queue_EndPass_Params endPassParams = { NVPW_VK_Profiler_Queue_EndPass_Params_STRUCT_SIZE };
|
||||
endPassParams.queue = queue;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_EndPass(&endPassParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Functor>
|
||||
bool SubmitRangeCommandBufferFunctor(Functor&& functor)
|
||||
{
|
||||
VkFence fence = rangeFences[nextCommandBufferIdx];
|
||||
VkResult vkResult = vkWaitForFences(device, 1, &fence, false, 0);
|
||||
if (vkResult == VK_TIMEOUT)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "No more command buffer available for queue level ranges, consider increasing sessionOptions.maxNumRange\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vkResult)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "vkWaitForFences failed, VkResult = %d\n", vkResult);
|
||||
return false;
|
||||
}
|
||||
|
||||
VkCommandBuffer commandBuffer = rangeCommandBuffers[nextCommandBufferIdx];
|
||||
++nextCommandBufferIdx;
|
||||
if (nextCommandBufferIdx >= rangeCommandBuffers.size())
|
||||
{
|
||||
nextCommandBufferIdx = 0;
|
||||
}
|
||||
|
||||
vkResult = vkResetCommandBuffer(commandBuffer, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
|
||||
if (vkResult)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "vkResetCommandBuffer failed, VkResult = %d\n", vkResult);
|
||||
return false;
|
||||
}
|
||||
|
||||
VkCommandBufferBeginInfo commandBufferBeginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
|
||||
vkResult = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
|
||||
if (vkResult)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "vkBeginCommandBuffer failed, VkResult = %d\n", vkResult);
|
||||
return false;
|
||||
}
|
||||
if (!functor(commandBuffer))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
vkResult = vkEndCommandBuffer(commandBuffer);
|
||||
if (vkResult)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "vkEndCommandBuffer failed, VkResult = %d\n", vkResult);
|
||||
return false;
|
||||
}
|
||||
|
||||
vkResult = vkResetFences(device, 1, &fence);
|
||||
if (vkResult)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "vkResetFences failed, VkResult = %d\n", vkResult);
|
||||
return false;
|
||||
}
|
||||
|
||||
VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
|
||||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &commandBuffer;
|
||||
vkResult = vkQueueSubmit(queue, 1, &submitInfo, fence);
|
||||
if (vkResult)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "vkQueueSubmit failed, VkResult = %d\n", vkResult);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool PushRange(const char* pRangeName) override
|
||||
{
|
||||
return SubmitRangeCommandBufferFunctor([&](VkCommandBuffer commandBuffer)
|
||||
{
|
||||
NVPW_VK_Profiler_CommandBuffer_PushRange_Params pushRangeParams = {NVPW_VK_Profiler_CommandBuffer_PushRange_Params_STRUCT_SIZE};
|
||||
pushRangeParams.commandBuffer = commandBuffer;
|
||||
pushRangeParams.pRangeName = pRangeName;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_CommandBuffer_PushRange(&pushRangeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_CommandBuffer_PushRange failed, nvpaStatus = %d\n", nvpaStatus);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
virtual bool PopRange() override
|
||||
{
|
||||
return SubmitRangeCommandBufferFunctor([&](VkCommandBuffer commandBuffer)
|
||||
{
|
||||
NVPW_VK_Profiler_CommandBuffer_PopRange_Params popRangeParams = {NVPW_VK_Profiler_CommandBuffer_PopRange_Params_STRUCT_SIZE};
|
||||
popRangeParams.commandBuffer = commandBuffer;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_CommandBuffer_PopRange(&popRangeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_CommandBuffer_PopRange failed, nvpaStatus = %d\n", nvpaStatus);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
virtual bool DecodeCounters(std::vector<uint8_t>& counterDataImage, std::vector<uint8_t>& counterDataScratch, bool& onePassDecoded, bool& allPassesDecoded) const
|
||||
{
|
||||
NVPW_VK_Profiler_Queue_DecodeCounters_Params decodeParams = { NVPW_VK_Profiler_Queue_DecodeCounters_Params_STRUCT_SIZE };
|
||||
decodeParams.queue = queue;
|
||||
decodeParams.counterDataImageSize = counterDataImage.size();
|
||||
decodeParams.pCounterDataImage = counterDataImage.data();
|
||||
decodeParams.counterDataScratchBufferSize = counterDataScratch.size();
|
||||
decodeParams.pCounterDataScratchBuffer = counterDataScratch.data();
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_DecodeCounters(&decodeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
onePassDecoded = decodeParams.onePassCollected;
|
||||
allPassesDecoded = decodeParams.allPassesCollected;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Initialize(VkDevice device_, VkQueue queue_, uint32_t queueFamilyIndex, const SessionOptions& sessionOptions_)
|
||||
{
|
||||
device = device_;
|
||||
queue = queue_;
|
||||
sessionOptions = sessionOptions_;
|
||||
|
||||
VkCommandPoolCreateInfo commandPoolCreateInfo = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
|
||||
commandPoolCreateInfo.queueFamilyIndex = queueFamilyIndex;
|
||||
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
||||
VkResult vkResult = vkCreateCommandPool(device, &commandPoolCreateInfo, nullptr, &commandPool);
|
||||
if (vkResult)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t maxRangeCommandBuffers = maxQueueRangesPerPass * 2 * sessionOptions.numTraceBuffers;
|
||||
rangeCommandBuffers.resize(maxRangeCommandBuffers);
|
||||
VkCommandBufferAllocateInfo commandBufferAllocateInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
|
||||
commandBufferAllocateInfo.commandPool = commandPool;
|
||||
commandBufferAllocateInfo.commandBufferCount = (uint32_t)maxRangeCommandBuffers;
|
||||
vkResult = vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, rangeCommandBuffers.data());
|
||||
if (vkResult)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
rangeFences.resize(maxRangeCommandBuffers);
|
||||
VkFenceCreateInfo fenceCreateInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO};
|
||||
fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT;
|
||||
for (auto& rangeFence : rangeFences)
|
||||
{
|
||||
vkResult = vkCreateFence(device, &fenceCreateInfo, nullptr, &rangeFence);
|
||||
if (vkResult)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
NVPW_VK_Profiler_Queue_EndSession_Params endSessionParams = {NVPW_VK_Profiler_Queue_EndSession_Params_STRUCT_SIZE};
|
||||
endSessionParams.queue = queue;
|
||||
endSessionParams.timeout = 0xFFFFFFFF;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_Queue_EndSession(&endSessionParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_Queue_EndSession failed, nvpaStatus = %d\n", nvpaStatus);
|
||||
}
|
||||
|
||||
sessionOptions = {};
|
||||
nextCommandBufferIdx = 0;
|
||||
|
||||
vkFreeCommandBuffers(device, commandPool, (uint32_t)rangeCommandBuffers.size(), rangeCommandBuffers.data());
|
||||
rangeCommandBuffers.clear();
|
||||
|
||||
vkDestroyCommandPool(device, commandPool, nullptr);
|
||||
commandPool = VK_NULL_HANDLE;
|
||||
|
||||
for (auto fence : rangeFences)
|
||||
{
|
||||
vkDestroyFence(device, fence, nullptr);
|
||||
}
|
||||
queue = VK_NULL_HANDLE;
|
||||
device = VK_NULL_HANDLE;
|
||||
}
|
||||
};
|
||||
|
||||
protected: // members
|
||||
ProfilerApi m_profilerApi;
|
||||
RangeProfilerStateMachine m_stateMachine;
|
||||
std::thread m_spgoThread;
|
||||
volatile bool m_spgoThreadExited;
|
||||
|
||||
private:
|
||||
// non-copyable
|
||||
RangeProfilerVulkan(const RangeProfilerVulkan&);
|
||||
|
||||
static void SpgoThreadProc(RangeProfilerVulkan* pRangeProfiler, VkQueue queue)
|
||||
{
|
||||
// Run continuously in the background, handling all BeginPass and EndPass GPU operations until EndSession().
|
||||
NVPW_VK_Queue_ServicePendingGpuOperations_Params serviceGpuOpsParams = { NVPW_VK_Queue_ServicePendingGpuOperations_Params_STRUCT_SIZE };
|
||||
serviceGpuOpsParams.queue = queue;
|
||||
serviceGpuOpsParams.numOperations = 0; // run until EndSession()
|
||||
serviceGpuOpsParams.timeout = 0xFFFFFFFF;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Queue_ServicePendingGpuOperations(&serviceGpuOpsParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
// TODO: log an error
|
||||
}
|
||||
|
||||
pRangeProfiler->m_spgoThreadExited = true;
|
||||
}
|
||||
|
||||
public:
|
||||
~RangeProfilerVulkan()
|
||||
{
|
||||
}
|
||||
|
||||
RangeProfilerVulkan()
|
||||
: m_profilerApi()
|
||||
, m_stateMachine(m_profilerApi)
|
||||
, m_spgoThread()
|
||||
, m_spgoThreadExited()
|
||||
{
|
||||
}
|
||||
// TODO: make this move friendly
|
||||
|
||||
bool IsInSession() const
|
||||
{
|
||||
return !!m_profilerApi.queue;
|
||||
}
|
||||
|
||||
bool IsInPass() const
|
||||
{
|
||||
return m_stateMachine.IsInPass();
|
||||
}
|
||||
|
||||
VkQueue GetVkQueue() const
|
||||
{
|
||||
return m_profilerApi.queue;
|
||||
}
|
||||
|
||||
bool SetMaxQueueRangesPerPass(size_t maxQueueRangesPerPass)
|
||||
{
|
||||
if (IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "SetMaxQueueRangesPerPass must be called before the session starts.\n");
|
||||
return false;
|
||||
}
|
||||
m_profilerApi.maxQueueRangesPerPass = maxQueueRangesPerPass;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BeginSession(
|
||||
VkInstance instance,
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkDevice device,
|
||||
VkQueue queue,
|
||||
uint32_t queueFamilyIndex,
|
||||
const SessionOptions& sessionOptions)
|
||||
{
|
||||
if (IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "already in a session\n");
|
||||
return false;
|
||||
}
|
||||
if (!VulkanIsNvidiaDevice(physicalDevice) || !VulkanIsGpuSupported(instance, physicalDevice, device))
|
||||
{
|
||||
// TODO: error - device is not supported for profiling
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPA_Status nvpaStatus;
|
||||
|
||||
NVPW_VK_Profiler_CalcTraceBufferSize_Params calcTraceBufferSizeParam = { NVPW_VK_Profiler_CalcTraceBufferSize_Params_STRUCT_SIZE };
|
||||
calcTraceBufferSizeParam.maxRangesPerPass = sessionOptions.maxNumRanges;
|
||||
calcTraceBufferSizeParam.avgRangeNameLength = sessionOptions.avgRangeNameLength;
|
||||
nvpaStatus = NVPW_VK_Profiler_CalcTraceBufferSize(&calcTraceBufferSizeParam);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
NVPW_VK_Profiler_Queue_BeginSession_Params beginSessionParams = { NVPW_VK_Profiler_Queue_BeginSession_Params_STRUCT_SIZE };
|
||||
beginSessionParams.instance = instance;
|
||||
beginSessionParams.physicalDevice = physicalDevice;
|
||||
beginSessionParams.device = device;
|
||||
beginSessionParams.queue = queue;
|
||||
beginSessionParams.pfnGetInstanceProcAddr = (void*)vkGetInstanceProcAddr;
|
||||
beginSessionParams.pfnGetDeviceProcAddr = (void*)vkGetDeviceProcAddr;
|
||||
beginSessionParams.numTraceBuffers = sessionOptions.numTraceBuffers;
|
||||
beginSessionParams.traceBufferSize = calcTraceBufferSizeParam.traceBufferSize;
|
||||
beginSessionParams.maxRangesPerPass = sessionOptions.maxNumRanges;
|
||||
beginSessionParams.maxLaunchesPerPass = sessionOptions.maxNumRanges;
|
||||
nvpaStatus = NVPW_VK_Profiler_Queue_BeginSession(&beginSessionParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_PRIVILEGE)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: profiling permissions not enabled. Please follow these instructions: https://developer.nvidia.com/ERR_NVGPUCTRPERM\n");
|
||||
}
|
||||
else if (nvpaStatus == NVPA_STATUS_INSUFFICIENT_DRIVER_VERSION)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: insufficient driver version. Please install the latest NVIDIA driver from https://www.nvidia.com\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Failed to start profiler session: unknown error. It may be a resource conflict - only one profiler session can run at a time per GPU.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
m_spgoThreadExited = false;
|
||||
m_spgoThread = std::thread(SpgoThreadProc, this, queue);
|
||||
if(!m_profilerApi.Initialize(device, queue, queueFamilyIndex, sessionOptions))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndSession()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_stateMachine.Reset();
|
||||
m_profilerApi.Reset();
|
||||
|
||||
m_spgoThread.join();
|
||||
m_spgoThreadExited = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool EnqueueCounterCollection(const SetConfigParams& config)
|
||||
{
|
||||
const bool status = m_stateMachine.EnqueueCounterCollection(config);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool EnqueueCounterCollection(const CounterConfiguration& configuration, uint16_t numNestingLevels = 1, size_t numStatisticalSamples = 1)
|
||||
{
|
||||
const bool status = m_stateMachine.EnqueueCounterCollection(SetConfigParams(configuration, numNestingLevels, numStatisticalSamples));
|
||||
return status;
|
||||
}
|
||||
|
||||
bool BeginPass()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.BeginPass();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool EndPass()
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.EndPass();
|
||||
return status;
|
||||
}
|
||||
|
||||
// Convenience method to start a Queue-level range. For CommandLists, use VulkanRangeCommands::PushRange.
|
||||
bool PushRange(const char* pRangeName)
|
||||
{
|
||||
const bool status = m_stateMachine.PushRange(pRangeName);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Convenience method to end a Queue-level range. For CommandLists, use VulkanRangeCommands::PopRange.
|
||||
bool PopRange()
|
||||
{
|
||||
const bool status = m_stateMachine.PopRange();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool DecodeCounters(DecodeResult& decodeResult)
|
||||
{
|
||||
if (!IsInSession())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "must be called in a session\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_spgoThreadExited)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "the background thread exited; possible hang on subsequent CPU-waiting-on-GPU calls\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool status = m_stateMachine.DecodeCounters(decodeResult);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool AllPassesSubmitted() const
|
||||
{
|
||||
const bool allPassesSubmitted = m_stateMachine.AllPassesSubmitted();
|
||||
return allPassesSubmitted;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}}}
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stddef.h>
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
struct ReportDefinition
|
||||
{
|
||||
const char* const* ppCounterNames;
|
||||
size_t numCounters;
|
||||
const char* const* ppRatioNames;
|
||||
size_t numRatios;
|
||||
const char* const* ppThroughputNames;
|
||||
size_t numThroughputs;
|
||||
|
||||
const char* pReportHtml;
|
||||
};
|
||||
|
||||
} }
|
||||
16171
ruins64k/tools/NvPerfUtility/include/NvPerfReportDefinitionGA10X.h
Normal file
16171
ruins64k/tools/NvPerfUtility/include/NvPerfReportDefinitionGA10X.h
Normal file
File diff suppressed because it is too large
Load Diff
14115
ruins64k/tools/NvPerfUtility/include/NvPerfReportDefinitionGV100.h
Normal file
14115
ruins64k/tools/NvPerfUtility/include/NvPerfReportDefinitionGV100.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string.h>
|
||||
#include "NvPerfInit.h"
|
||||
#include "NvPerfReportDefinition.h"
|
||||
#include "NvPerfReportDefinitionGV100.h"
|
||||
#include "NvPerfReportDefinitionTU10X.h"
|
||||
#include "NvPerfReportDefinitionTU11X.h"
|
||||
#include "NvPerfReportDefinitionGA10X.h"
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
namespace PerRangeReport {
|
||||
|
||||
inline ReportDefinition GetReportDefinition(const char* pChipName)
|
||||
{
|
||||
if (!strcmp(pChipName, "GV100"))
|
||||
{
|
||||
return gv100::PerRangeReport::GetReportDefinition();
|
||||
}
|
||||
else if (!strcmp(pChipName, "TU102") || !strcmp(pChipName, "TU104") || !strcmp(pChipName, "TU106"))
|
||||
{
|
||||
return tu10x::PerRangeReport::GetReportDefinition();
|
||||
}
|
||||
else if (!strcmp(pChipName, "TU116") || !strcmp(pChipName, "TU117"))
|
||||
{
|
||||
return tu11x::PerRangeReport::GetReportDefinition();
|
||||
}
|
||||
else if (!strcmp(pChipName, "GA102") || !strcmp(pChipName, "GA104") || !strcmp(pChipName, "GA106"))
|
||||
{
|
||||
return ga10x::PerRangeReport::GetReportDefinition();
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace PerRangeReport
|
||||
|
||||
namespace SummaryReport {
|
||||
|
||||
inline ReportDefinition GetReportDefinition(const char* pChipName)
|
||||
{
|
||||
if (!strcmp(pChipName, "GV100"))
|
||||
{
|
||||
return gv100::SummaryReport::GetReportDefinition();
|
||||
}
|
||||
else if (!strcmp(pChipName, "TU102") || !strcmp(pChipName, "TU104") || !strcmp(pChipName, "TU106"))
|
||||
{
|
||||
return tu10x::SummaryReport::GetReportDefinition();
|
||||
}
|
||||
else if (!strcmp(pChipName, "TU116") || !strcmp(pChipName, "TU117"))
|
||||
{
|
||||
return tu11x::SummaryReport::GetReportDefinition();
|
||||
}
|
||||
else if (!strcmp(pChipName, "GA102") || !strcmp(pChipName, "GA104") || !strcmp(pChipName, "GA106"))
|
||||
{
|
||||
return ga10x::SummaryReport::GetReportDefinition();
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace SummaryReport
|
||||
|
||||
} }
|
||||
15733
ruins64k/tools/NvPerfUtility/include/NvPerfReportDefinitionTU10X.h
Normal file
15733
ruins64k/tools/NvPerfUtility/include/NvPerfReportDefinitionTU10X.h
Normal file
File diff suppressed because it is too large
Load Diff
15720
ruins64k/tools/NvPerfUtility/include/NvPerfReportDefinitionTU11X.h
Normal file
15720
ruins64k/tools/NvPerfUtility/include/NvPerfReportDefinitionTU11X.h
Normal file
File diff suppressed because it is too large
Load Diff
1360
ruins64k/tools/NvPerfUtility/include/NvPerfReportGenerator.h
Normal file
1360
ruins64k/tools/NvPerfUtility/include/NvPerfReportGenerator.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,414 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "NvPerfReportGenerator.h"
|
||||
#include "NvPerfD3D11.h"
|
||||
#include "NvPerfRangeProfilerD3D11.h"
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
class ReportGeneratorD3D11
|
||||
{
|
||||
protected:
|
||||
struct ReportProfiler : ReportGeneratorStateMachine::IReportProfiler
|
||||
{
|
||||
RangeProfilerD3D11 rangeProfiler;
|
||||
|
||||
ReportProfiler()
|
||||
: rangeProfiler()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool IsInSession() const override
|
||||
{
|
||||
return rangeProfiler.IsInSession();
|
||||
}
|
||||
virtual bool IsInPass() const override
|
||||
{
|
||||
return rangeProfiler.IsInPass();
|
||||
}
|
||||
virtual bool EndSession() override
|
||||
{
|
||||
return rangeProfiler.EndSession();
|
||||
}
|
||||
virtual bool EnqueueCounterCollection(const SetConfigParams& config) override
|
||||
{
|
||||
return rangeProfiler.EnqueueCounterCollection(config);
|
||||
}
|
||||
virtual bool BeginPass() override
|
||||
{
|
||||
return rangeProfiler.BeginPass();
|
||||
}
|
||||
virtual bool EndPass() override
|
||||
{
|
||||
return rangeProfiler.EndPass();
|
||||
}
|
||||
virtual bool PushRange(const char* pRangeName) override
|
||||
{
|
||||
return rangeProfiler.PushRange(pRangeName);
|
||||
}
|
||||
virtual bool PopRange() override
|
||||
{
|
||||
return rangeProfiler.PopRange();
|
||||
}
|
||||
virtual bool DecodeCounters(DecodeResult& decodeResult) override
|
||||
{
|
||||
return rangeProfiler.DecodeCounters(decodeResult);
|
||||
}
|
||||
virtual bool AllPassesSubmitted() const override
|
||||
{
|
||||
return rangeProfiler.AllPassesSubmitted();
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
ReportProfiler m_reportProfiler;
|
||||
ReportGeneratorStateMachine m_stateMachine;
|
||||
|
||||
// When enabled, OnFrameStart() will check whether its argument's ID3D12Device == m_pDevice.
|
||||
bool m_enableDeviceContextValidation;
|
||||
CComPtr<ID3D11Device> m_pDevice;
|
||||
ReportGeneratorInitStatus m_initStatus; // the state of InitializeReportGenerator()
|
||||
|
||||
protected:
|
||||
bool BeginSessionWithOptions(ID3D11DeviceContext* pDeviceContext, const SessionOptions* pSessionOptions = nullptr)
|
||||
{
|
||||
SessionOptions sessionOptions = {};
|
||||
sessionOptions.maxNumRanges = ReportGeneratorStateMachine::MaxNumRangesDefault;
|
||||
if (pSessionOptions)
|
||||
{
|
||||
sessionOptions = *pSessionOptions;
|
||||
}
|
||||
|
||||
if (!m_reportProfiler.rangeProfiler.BeginSession(pDeviceContext, sessionOptions))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_reportProfiler.rangeProfiler.BeginSession failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsDeviceContextValid(ID3D11DeviceContext* pDeviceContext, const char* pFunctionName) const
|
||||
{
|
||||
if (!m_enableDeviceContextValidation)
|
||||
{
|
||||
return true; // when validation is disabled, always assume the pDeviceContext is valid
|
||||
}
|
||||
|
||||
if (!m_pDevice)
|
||||
{
|
||||
NV_PERF_LOG_WRN(50, "Cannot validate DeviceContext. Please call EnableDeviceContextValidation(true) before InitializeReportGenerator().\n");
|
||||
return true; // allow it to proceed unvalidated
|
||||
}
|
||||
|
||||
CComPtr<ID3D11Device> pDevice;
|
||||
pDeviceContext->GetDevice(&pDevice);
|
||||
if (!pDevice)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "pDeviceContext->GetDevice() failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!pDevice.IsEqualObject(m_pDevice))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "The pDeviceContext passed to %s does not match the ID3D11Device passed to InitializeReportGenerator().\n", pFunctionName);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
DeviceIdentifiers deviceIdentifiers;
|
||||
std::vector<std::string> additionalMetrics;
|
||||
|
||||
public:
|
||||
~ReportGeneratorD3D11()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
ReportGeneratorD3D11()
|
||||
: m_reportProfiler()
|
||||
, m_stateMachine(m_reportProfiler)
|
||||
, m_enableDeviceContextValidation(true)
|
||||
, m_pDevice()
|
||||
, m_initStatus(ReportGeneratorInitStatus::NeverCalled)
|
||||
, deviceIdentifiers()
|
||||
, additionalMetrics()
|
||||
{
|
||||
}
|
||||
|
||||
ReportGeneratorInitStatus GetInitStatus() const
|
||||
{
|
||||
return m_initStatus;
|
||||
}
|
||||
|
||||
/// Ends all current sessions and frees all internal memory.
|
||||
/// This object may be reused by calling InitializeReportGenerator() again.
|
||||
/// Does not reset deviceIdentifiers.
|
||||
void Reset()
|
||||
{
|
||||
if (m_reportProfiler.rangeProfiler.IsInSession())
|
||||
{
|
||||
const bool endSessionStatus = m_reportProfiler.rangeProfiler.EndSession();
|
||||
if (!endSessionStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_reportProfiler.EndSession failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
m_stateMachine.Reset();
|
||||
|
||||
m_pDevice.Release();
|
||||
if (m_initStatus != ReportGeneratorInitStatus::NeverCalled)
|
||||
{
|
||||
m_initStatus = ReportGeneratorInitStatus::Reset;
|
||||
}
|
||||
}
|
||||
|
||||
bool InitializeReportGenerator(ID3D11Device* pDevice)
|
||||
{
|
||||
m_pDevice.Release();
|
||||
m_initStatus = ReportGeneratorInitStatus::Failed;
|
||||
|
||||
// Can this device be profiled by Nsight Perf SDK?
|
||||
if (!nv::perf::D3D11IsNvidiaDevice(pDevice))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "%ls is not an NVIDIA Device\n", D3D11GetDeviceName(pDevice).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!InitializeNvPerf())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "InitializeNvPerf failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nv::perf::D3D11LoadDriver())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Could not load driver\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nv::perf::profiler::D3D11IsGpuSupported(pDevice))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "GPU is not supported\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
deviceIdentifiers = D3D11GetDeviceIdentifiers(pDevice);
|
||||
if (!deviceIdentifiers.pChipName)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Unrecognaized GPU\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto createMetricsEvaluator = [&](std::vector<uint8_t>& scratchBuffer) {
|
||||
const size_t scratchBufferSize = nv::perf::D3D11CalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
|
||||
if (!scratchBufferSize)
|
||||
{
|
||||
return (NVPW_MetricsEvaluator*)nullptr;
|
||||
}
|
||||
scratchBuffer.resize(scratchBufferSize);
|
||||
NVPW_MetricsEvaluator* pMetricsEvaluator = nv::perf::D3D11CreateMetricsEvaluator(scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
|
||||
return pMetricsEvaluator;
|
||||
};
|
||||
auto createRawMetricsConfig = [&]() {
|
||||
NVPA_RawMetricsConfig* pRawMetricsConfig = nv::perf::profiler::D3D11CreateRawMetricsConfig(deviceIdentifiers.pChipName);
|
||||
return pRawMetricsConfig;
|
||||
};
|
||||
|
||||
if (!m_stateMachine.InitializeReportMetrics(deviceIdentifiers, createMetricsEvaluator, createRawMetricsConfig, additionalMetrics))
|
||||
{
|
||||
NV_PERF_LOG_ERR(100, "m_stateMachine.InitializeReportMetrics failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_enableDeviceContextValidation)
|
||||
{
|
||||
m_pDevice = pDevice;
|
||||
}
|
||||
m_initStatus = ReportGeneratorInitStatus::Succeeded;
|
||||
|
||||
NV_PERF_LOG_INF(50, "Initialization succeeded\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Explicitly starts a session. This allows you to control resource allocation.
|
||||
/// Calling this function is optional; by default, OnFrameStart() will start a session if this isn't called.
|
||||
/// The session must be explicitly ended by calling Reset().
|
||||
/// The pDeviceContext must belong the ID3D11Device passed into InitializeReportGenerator().
|
||||
bool BeginSession(ID3D11DeviceContext* pDeviceContext, const SessionOptions* pSessionOptions = nullptr)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
if (!IsDeviceContextValid(pDeviceContext, "BeginSession"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
auto beginSessionFn = [&]() {
|
||||
return BeginSessionWithOptions(pDeviceContext, pSessionOptions);
|
||||
};
|
||||
if (!m_stateMachine.OnFrameStart(beginSessionFn))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Automatically starts collecting counters after StartCollectionOnNextFrame().
|
||||
/// Call this at the start of each frame.
|
||||
/// The pDeviceContext must belong the ID3D11Device passed into InitializeReportGenerator().
|
||||
bool OnFrameStart(ID3D11DeviceContext* pDeviceContext)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
if (!IsDeviceContextValid(pDeviceContext, "OnFrameStart"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
auto beginSessionFn = [&]() {
|
||||
return BeginSessionWithOptions(pDeviceContext);
|
||||
};
|
||||
if (!m_stateMachine.OnFrameStart(beginSessionFn))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Advances the counter-collection state-machine after rendering.
|
||||
/// Call this at the end of each frame.
|
||||
bool OnFrameEnd()
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!m_stateMachine.OnFrameEnd())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PushRange(const char* pRangeName)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
if (!m_reportProfiler.IsInPass())
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; not in a profiler pass");
|
||||
return false;
|
||||
}
|
||||
if (!m_reportProfiler.PushRange(pRangeName))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PopRange()
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
if (!m_reportProfiler.IsInPass())
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; not in a profiler pass");
|
||||
return false;
|
||||
}
|
||||
if (!m_reportProfiler.PopRange())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Reports true after StartCollectionOnNextFrame() is called, until the HTML Report has been written to disk.
|
||||
/// This state is cleared by OnFrameEnd().
|
||||
bool IsCollectingReport() const
|
||||
{
|
||||
return m_stateMachine.IsCollectingReport();
|
||||
}
|
||||
|
||||
/// Enqueues report collection, starting on the next frame.
|
||||
bool StartCollectionOnNextFrame(const char* pDirectoryName, AppendDateTime appendDateTime)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
return m_stateMachine.StartCollectionOnNextFrame(pDirectoryName, appendDateTime);
|
||||
}
|
||||
|
||||
/// Enables a frame-level parent range.
|
||||
/// When enabled (non-NULL, non-empty pRangeName), every frame will have a parent range.
|
||||
/// Pass in NULL or an empty string to disable this behavior.
|
||||
/// The pRangeName string is copied by value, and may be modified or freed after this function returns.
|
||||
void SetFrameLevelRangeName(const char* pRangeName)
|
||||
{
|
||||
m_stateMachine.SetFrameLevelRangeName(pRangeName);
|
||||
}
|
||||
|
||||
/// Retrieves the current frame-level parent range. An empty string signifies no parent range.
|
||||
const std::string& GetFrameLevelRangeName() const
|
||||
{
|
||||
return m_stateMachine.GetFrameLevelRangeName();
|
||||
}
|
||||
|
||||
/// Sets the number of Push/Pop nesting levels to collect in the report.
|
||||
void SetNumNestingLevels(uint16_t numNestingLevels)
|
||||
{
|
||||
m_stateMachine.SetNumNestingLevels(numNestingLevels);
|
||||
}
|
||||
|
||||
/// Retrieves the number of Push/Pop nesting levels being collected in the report.
|
||||
uint16_t GetNumNestingLevels() const
|
||||
{
|
||||
return m_stateMachine.GetNumNestingLevels();
|
||||
}
|
||||
|
||||
/// When enabled, OnFrameStart() will check whether its argument's ID3D11DeviceContext
|
||||
/// corresponds to the device passed into InitializeReportGenerator().
|
||||
void EnableDeviceContextValidation(bool enable = true)
|
||||
{
|
||||
m_enableDeviceContextValidation = enable;
|
||||
}
|
||||
};
|
||||
|
||||
}}}
|
||||
@@ -0,0 +1,394 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#pragma once
|
||||
#include "NvPerfReportGenerator.h"
|
||||
#include "NvPerfRangeProfilerD3D12.h"
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
class ReportGeneratorD3D12
|
||||
{
|
||||
protected:
|
||||
struct ReportProfiler : public ReportGeneratorStateMachine::IReportProfiler
|
||||
{
|
||||
RangeProfilerD3D12 rangeProfiler;
|
||||
|
||||
ReportProfiler()
|
||||
: rangeProfiler()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool IsInSession() const override
|
||||
{
|
||||
return rangeProfiler.IsInSession();
|
||||
}
|
||||
virtual bool IsInPass() const override
|
||||
{
|
||||
return rangeProfiler.IsInPass();
|
||||
}
|
||||
virtual bool EndSession() override
|
||||
{
|
||||
return rangeProfiler.EndSession();
|
||||
}
|
||||
virtual bool EnqueueCounterCollection(const SetConfigParams& config) override
|
||||
{
|
||||
return rangeProfiler.EnqueueCounterCollection(config);
|
||||
}
|
||||
virtual bool BeginPass() override
|
||||
{
|
||||
return rangeProfiler.BeginPass();
|
||||
}
|
||||
virtual bool EndPass() override
|
||||
{
|
||||
return rangeProfiler.EndPass();
|
||||
}
|
||||
virtual bool PushRange(const char* pRangeName) override
|
||||
{
|
||||
return rangeProfiler.PushRange(pRangeName);
|
||||
}
|
||||
virtual bool PopRange() override
|
||||
{
|
||||
return rangeProfiler.PopRange();
|
||||
}
|
||||
virtual bool DecodeCounters(DecodeResult& decodeResult) override
|
||||
{
|
||||
return rangeProfiler.DecodeCounters(decodeResult);
|
||||
}
|
||||
virtual bool AllPassesSubmitted() const override
|
||||
{
|
||||
return rangeProfiler.AllPassesSubmitted();
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
ReportProfiler m_reportProfiler;
|
||||
ReportGeneratorStateMachine m_stateMachine;
|
||||
|
||||
// When enabled, OnFrameStart() will check whether its argument's ID3D12Device == m_pDevice.
|
||||
bool m_enableCommandQueueValidation;
|
||||
CComPtr<ID3D12Device> m_pDevice;
|
||||
ReportGeneratorInitStatus m_initStatus; // the state of InitializeReportGenerator()
|
||||
|
||||
protected:
|
||||
bool BeginSessionWithOptions(ID3D12CommandQueue* pCommandQueue, const SessionOptions* pSessionOptions = nullptr)
|
||||
{
|
||||
SessionOptions sessionOptions = {};
|
||||
sessionOptions.maxNumRanges = ReportGeneratorStateMachine::MaxNumRangesDefault;
|
||||
if (pSessionOptions)
|
||||
{
|
||||
sessionOptions = *pSessionOptions;
|
||||
}
|
||||
|
||||
if (!m_reportProfiler.rangeProfiler.BeginSession(pCommandQueue, sessionOptions))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_reportProfiler.rangeProfiler.BeginSession failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsCommandQueueValid(ID3D12CommandQueue* pCommandQueue, const char* pFunctionName) const
|
||||
{
|
||||
if (!m_enableCommandQueueValidation)
|
||||
{
|
||||
return true; // when validation is disabled, always assume the CommandQueue is valid
|
||||
}
|
||||
|
||||
if (!m_pDevice)
|
||||
{
|
||||
NV_PERF_LOG_WRN(50, "Cannot validate CommandQueue. Please call EnableCommandQueueValidation(true) before InitializeReportGenerator().\n");
|
||||
return true; // allow it to proceed unvalidated
|
||||
}
|
||||
|
||||
CComPtr<ID3D12Device> pDevice;
|
||||
HRESULT hr = pCommandQueue->GetDevice(IID_PPV_ARGS(&pDevice));
|
||||
if (FAILED(hr) || !pDevice)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "pCommandQueue->GetDevice() failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!pDevice.IsEqualObject(m_pDevice))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "The pCommandQueue passed to %s does not match the ID3D12Device passed to InitializeReportGenerator().\n", pFunctionName);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
/// RangeCommands is safe to use on any CommandList belonging to the ID3D12Device used for initialization.
|
||||
/// RangeCommands perform no operation when called on unsupported or non-NVIDIA devices.
|
||||
D3D12RangeCommands rangeCommands;
|
||||
/// NVIDIA device identifiers.
|
||||
DeviceIdentifiers deviceIdentifiers;
|
||||
std::vector<std::string> additionalMetrics;
|
||||
|
||||
public:
|
||||
~ReportGeneratorD3D12()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
ReportGeneratorD3D12()
|
||||
: m_reportProfiler()
|
||||
, m_stateMachine(m_reportProfiler)
|
||||
, m_enableCommandQueueValidation(true)
|
||||
, m_pDevice()
|
||||
, m_initStatus(ReportGeneratorInitStatus::NeverCalled)
|
||||
, rangeCommands()
|
||||
, deviceIdentifiers()
|
||||
, additionalMetrics()
|
||||
{
|
||||
}
|
||||
|
||||
ReportGeneratorInitStatus GetInitStatus() const
|
||||
{
|
||||
return m_initStatus;
|
||||
}
|
||||
|
||||
/// Ends all current sessions and frees all internal memory.
|
||||
/// This object may be reused by calling InitializeReportGenerator() again.
|
||||
/// Does not reset rangeCommands and deviceIdentifiers.
|
||||
void Reset()
|
||||
{
|
||||
if (m_reportProfiler.rangeProfiler.IsInSession())
|
||||
{
|
||||
const bool endSessionStatus = m_reportProfiler.rangeProfiler.EndSession();
|
||||
if (!endSessionStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_reportProfiler.EndSession failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
m_stateMachine.Reset();
|
||||
|
||||
m_pDevice.Release();
|
||||
if (m_initStatus != ReportGeneratorInitStatus::NeverCalled)
|
||||
{
|
||||
m_initStatus = ReportGeneratorInitStatus::Reset;
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize this object on the provided ID3D12Device.
|
||||
bool InitializeReportGenerator(ID3D12Device* pDevice)
|
||||
{
|
||||
// Do this first, in case this object is re-initialized on a different device.
|
||||
rangeCommands.Initialize(pDevice);
|
||||
|
||||
m_pDevice.Release();
|
||||
m_initStatus = ReportGeneratorInitStatus::Failed;
|
||||
|
||||
// Can this device be profiled by Nsight Perf SDK?
|
||||
if (!nv::perf::D3D12IsNvidiaDevice(pDevice))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "%ls is not an NVIDIA Device\n", D3D12GetDeviceName(pDevice).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!InitializeNvPerf())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "InitializeNvPerf failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nv::perf::D3D12LoadDriver())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Could not load driver\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nv::perf::profiler::D3D12IsGpuSupported(pDevice))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "GPU is not supported\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
deviceIdentifiers = D3D12GetDeviceIdentifiers(pDevice);
|
||||
if (!deviceIdentifiers.pChipName)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Unrecognaized GPU\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto createMetricsEvaluator = [&](std::vector<uint8_t>& scratchBuffer) {
|
||||
const size_t scratchBufferSize = nv::perf::D3D12CalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
|
||||
if (!scratchBufferSize)
|
||||
{
|
||||
return (NVPW_MetricsEvaluator*)nullptr;
|
||||
}
|
||||
scratchBuffer.resize(scratchBufferSize);
|
||||
NVPW_MetricsEvaluator* pMetricsEvaluator = nv::perf::D3D12CreateMetricsEvaluator(scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
|
||||
return pMetricsEvaluator;
|
||||
};
|
||||
auto createRawMetricsConfig = [&]() {
|
||||
NVPA_RawMetricsConfig* pRawMetricsConfig = nv::perf::profiler::D3D12CreateRawMetricsConfig(deviceIdentifiers.pChipName);
|
||||
return pRawMetricsConfig;
|
||||
};
|
||||
if (!m_stateMachine.InitializeReportMetrics(deviceIdentifiers, createMetricsEvaluator, createRawMetricsConfig, additionalMetrics))
|
||||
{
|
||||
NV_PERF_LOG_ERR(100, "m_stateMachine.InitializeReportMetrics failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_enableCommandQueueValidation)
|
||||
{
|
||||
m_pDevice = pDevice;
|
||||
}
|
||||
m_initStatus = ReportGeneratorInitStatus::Succeeded;
|
||||
|
||||
NV_PERF_LOG_INF(50, "Initialization succeeded\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Explicitly starts a session. This allows you to control resource allocation.
|
||||
/// Calling this function is optional; by default, OnFrameStart() will start a session if this isn't called.
|
||||
/// The session must be explicitly ended by calling Reset().
|
||||
/// The pCommandQueue must belong the ID3D12Device passed into InitializeReportGenerator().
|
||||
bool BeginSession(ID3D12CommandQueue* pCommandQueue, const SessionOptions* pSessionOptions = nullptr)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
if (!IsCommandQueueValid(pCommandQueue, "BeginSession"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
auto beginSessionFn = [&]() {
|
||||
return BeginSessionWithOptions(pCommandQueue, pSessionOptions);
|
||||
};
|
||||
if (!m_stateMachine.BeginSession(beginSessionFn))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Automatically starts collecting counters after StartCollectionOnNextFrame().
|
||||
/// Call this at the start of each frame.
|
||||
/// The pCommandQueue must belong the ID3D12Device passed into InitializeReportGenerator().
|
||||
bool OnFrameStart(ID3D12CommandQueue* pCommandQueue)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
if (!IsCommandQueueValid(pCommandQueue, "OnFrameStart"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
auto beginSessionFn = [&]() {
|
||||
return BeginSessionWithOptions(pCommandQueue);
|
||||
};
|
||||
if (!m_stateMachine.OnFrameStart(beginSessionFn))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Advances the counter-collection state-machine after rendering.
|
||||
/// Call this at the end of each frame.
|
||||
bool OnFrameEnd()
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!m_stateMachine.OnFrameEnd())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Reports true after StartCollectionOnNextFrame() is called, until the HTML Report has been written to disk.
|
||||
/// This state is cleared by OnFrameEnd().
|
||||
bool IsCollectingReport() const
|
||||
{
|
||||
return m_stateMachine.IsCollectingReport();
|
||||
}
|
||||
|
||||
const std::string& GetReportDirectoryName() const
|
||||
{
|
||||
return m_stateMachine.GetReportDirectoryName();
|
||||
}
|
||||
|
||||
/// Enqueues report collection, starting on the next frame.
|
||||
bool StartCollectionOnNextFrame(const char* pDirectoryName, AppendDateTime appendDateTime)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
return m_stateMachine.StartCollectionOnNextFrame(pDirectoryName, appendDateTime);
|
||||
}
|
||||
|
||||
/// Enables a frame-level parent range.
|
||||
/// When enabled (non-NULL, non-empty pRangeName), every frame will have a parent range.
|
||||
/// This is also convenient for programs that have no CommandList-level ranges.
|
||||
/// Pass in NULL or an empty string to disable this behavior.
|
||||
/// The pRangeName string is copied by value, and may be modified or freed after this function returns.
|
||||
void SetFrameLevelRangeName(const char* pRangeName)
|
||||
{
|
||||
m_stateMachine.SetFrameLevelRangeName(pRangeName);
|
||||
}
|
||||
|
||||
/// Retrieves the current frame-level parent range. An empty string signifies no parent range.
|
||||
const std::string& GetFrameLevelRangeName() const
|
||||
{
|
||||
return m_stateMachine.GetFrameLevelRangeName();
|
||||
}
|
||||
|
||||
/// Sets the number of Push/Pop nesting levels to collect in the report.
|
||||
void SetNumNestingLevels(uint16_t numNestingLevels)
|
||||
{
|
||||
m_stateMachine.SetNumNestingLevels(numNestingLevels);
|
||||
}
|
||||
|
||||
/// Retrieves the number of Push/Pop nesting levels being collected in the report.
|
||||
uint16_t GetNumNestingLevels() const
|
||||
{
|
||||
return m_stateMachine.GetNumNestingLevels();
|
||||
}
|
||||
|
||||
/// Open the report directory in file browser after perf data collection.
|
||||
/// The default behavor is false, and can be changed by enviroment variable NV_PERF_OPEN_REPORT_DIR_AFTER_COLLECTION.
|
||||
void SetOpenReportDirectoryAfterCollection(bool openReportDirectoryAfterCollection)
|
||||
{
|
||||
m_stateMachine.SetOpenReportDirectoryAfterCollection(openReportDirectoryAfterCollection);
|
||||
}
|
||||
|
||||
/// When enabled, OnFrameStart() will check whether its argument's ID3D12Device
|
||||
/// corresponds to the device passed into InitializeReportGenerator().
|
||||
void EnableCommandQueueValidation(bool enable = true)
|
||||
{
|
||||
m_enableCommandQueueValidation = enable;
|
||||
}
|
||||
};
|
||||
|
||||
}}}
|
||||
@@ -0,0 +1,367 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#pragma once
|
||||
#include "NvPerfReportGenerator.h"
|
||||
#include "NvPerfRangeProfilerOpenGL.h"
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
class ReportGeneratorOpenGL
|
||||
{
|
||||
protected:
|
||||
struct ReportProfiler : public ReportGeneratorStateMachine::IReportProfiler
|
||||
{
|
||||
RangeProfilerOpenGL rangeProfiler;
|
||||
|
||||
ReportProfiler()
|
||||
: rangeProfiler()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool IsInSession() const override
|
||||
{
|
||||
return rangeProfiler.IsInSession();
|
||||
}
|
||||
virtual bool IsInPass() const override
|
||||
{
|
||||
return rangeProfiler.IsInPass();
|
||||
}
|
||||
virtual bool EndSession() override
|
||||
{
|
||||
return rangeProfiler.EndSession();
|
||||
}
|
||||
virtual bool EnqueueCounterCollection(const SetConfigParams& config) override
|
||||
{
|
||||
return rangeProfiler.EnqueueCounterCollection(config);
|
||||
}
|
||||
virtual bool BeginPass() override
|
||||
{
|
||||
return rangeProfiler.BeginPass();
|
||||
}
|
||||
virtual bool EndPass() override
|
||||
{
|
||||
return rangeProfiler.EndPass();
|
||||
}
|
||||
virtual bool PushRange(const char* pRangeName) override
|
||||
{
|
||||
return rangeProfiler.PushRange(pRangeName);
|
||||
}
|
||||
virtual bool PopRange() override
|
||||
{
|
||||
return rangeProfiler.PopRange();
|
||||
}
|
||||
virtual bool DecodeCounters(DecodeResult& decodeResult) override
|
||||
{
|
||||
return rangeProfiler.DecodeCounters(decodeResult);
|
||||
}
|
||||
virtual bool AllPassesSubmitted() const override
|
||||
{
|
||||
return rangeProfiler.AllPassesSubmitted();
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
ReportProfiler m_reportProfiler;
|
||||
ReportGeneratorStateMachine m_stateMachine;
|
||||
|
||||
// OpenGL device state, set at initialize
|
||||
ReportGeneratorInitStatus m_initStatus; // the state of InitializeReportGenerator()
|
||||
|
||||
protected:
|
||||
bool BeginSessionWithOptions(
|
||||
const SessionOptions* pSessionOptions = nullptr)
|
||||
{
|
||||
SessionOptions sessionOptions = {};
|
||||
sessionOptions.maxNumRanges = ReportGeneratorStateMachine::MaxNumRangesDefault;
|
||||
if (pSessionOptions)
|
||||
{
|
||||
sessionOptions = *pSessionOptions;
|
||||
}
|
||||
|
||||
if (!m_reportProfiler.rangeProfiler.BeginSession(sessionOptions))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_reportProfiler.rangeProfiler.BeginSession failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
DeviceIdentifiers deviceIdentifiers;
|
||||
std::vector<std::string> additionalMetrics;
|
||||
|
||||
public:
|
||||
~ReportGeneratorOpenGL()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
ReportGeneratorOpenGL()
|
||||
: m_reportProfiler()
|
||||
, m_stateMachine(m_reportProfiler)
|
||||
, m_initStatus(ReportGeneratorInitStatus::NeverCalled)
|
||||
, deviceIdentifiers()
|
||||
, additionalMetrics()
|
||||
{
|
||||
}
|
||||
|
||||
ReportGeneratorInitStatus GetInitStatus() const
|
||||
{
|
||||
return m_initStatus;
|
||||
}
|
||||
|
||||
/// Ends all current sessions and frees all internal memory.
|
||||
/// This object may be reused by calling InitializeReportGenerator() again.
|
||||
/// Does not reset rangeCommands and deviceIdentifiers.
|
||||
void Reset()
|
||||
{
|
||||
if (m_reportProfiler.rangeProfiler.IsInSession())
|
||||
{
|
||||
const bool endSessionStatus = m_reportProfiler.rangeProfiler.EndSession();
|
||||
if (!endSessionStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_reportProfiler.EndSession failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
m_stateMachine.Reset();
|
||||
|
||||
if (m_initStatus != ReportGeneratorInitStatus::NeverCalled)
|
||||
{
|
||||
m_initStatus = ReportGeneratorInitStatus::Reset;
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize this object on the provided current context.
|
||||
bool InitializeReportGenerator()
|
||||
{
|
||||
m_initStatus = ReportGeneratorInitStatus::Failed;
|
||||
|
||||
// Can this device be profiled by Nsight Perf SDK?
|
||||
if (!nv::perf::OpenGLIsNvidiaDevice())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "%s is not an NVIDIA Device\n", OpenGLGetDeviceName().c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!InitializeNvPerf())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "InitializeNvPerf failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nv::perf::OpenGLLoadDriver())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Could not load driver\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nv::perf::profiler::OpenGLIsGpuSupported())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "GPU is not supported\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
deviceIdentifiers = OpenGLGetDeviceIdentifiers();
|
||||
if (!deviceIdentifiers.pChipName)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Unrecognized GPU\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto createMetricsEvaluator = [&](std::vector<uint8_t>& scratchBuffer) {
|
||||
const size_t scratchBufferSize = nv::perf::OpenGLCalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
|
||||
if (!scratchBufferSize)
|
||||
{
|
||||
return (NVPW_MetricsEvaluator*)nullptr;
|
||||
}
|
||||
scratchBuffer.resize(scratchBufferSize);
|
||||
NVPW_MetricsEvaluator* pMetricsEvaluator = nv::perf::OpenGLCreateMetricsEvaluator(scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
|
||||
return pMetricsEvaluator;
|
||||
};
|
||||
auto createRawMetricsConfig = [&]() {
|
||||
NVPA_RawMetricsConfig* pRawMetricsConfig = nv::perf::profiler::OpenGLCreateRawMetricsConfig(deviceIdentifiers.pChipName);
|
||||
return pRawMetricsConfig;
|
||||
};
|
||||
if (!m_stateMachine.InitializeReportMetrics(deviceIdentifiers, createMetricsEvaluator, createRawMetricsConfig, additionalMetrics))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_stateMachine.InitializeReportMetrics failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_initStatus = ReportGeneratorInitStatus::Succeeded;
|
||||
|
||||
NV_PERF_LOG_INF(50, "Initialization succeeded\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Explicitly starts a session. This allows you to control resource allocation.
|
||||
/// Calling this function is optional; by default, OnFrameStart() will start a session if this isn't called.
|
||||
/// The session must be explicitly ended by calling Reset().
|
||||
bool BeginSession(const SessionOptions* pSessionOptions = nullptr)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
|
||||
auto beginSessionFn = [&]() {
|
||||
return BeginSessionWithOptions(pSessionOptions);
|
||||
};
|
||||
if (!m_stateMachine.BeginSession(beginSessionFn))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Automatically starts collecting counters for a report, after StartCollectionOnNextFrame().
|
||||
/// Call this at the start of each frame.
|
||||
bool OnFrameStart()
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
|
||||
auto beginSessionFn = [&]() {
|
||||
return BeginSessionWithOptions();
|
||||
};
|
||||
if (!m_stateMachine.OnFrameStart(beginSessionFn))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Advances the counter-collection state-machine after rendering.
|
||||
/// Call this at the end of each frame.
|
||||
bool OnFrameEnd()
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!m_stateMachine.OnFrameEnd())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PushRange(const char* pRangeName)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
if (!m_reportProfiler.IsInPass())
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; not in a profiler pass");
|
||||
return false;
|
||||
}
|
||||
if (!m_reportProfiler.PushRange(pRangeName))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PopRange()
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
if (!m_reportProfiler.IsInPass())
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; not in a profiler pass");
|
||||
return false;
|
||||
}
|
||||
if (!m_reportProfiler.PopRange())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Reports true after StartCollectionOnNextFrame() is called, until the HTML Report has been written to disk.
|
||||
/// This state is cleared by OnFrameEnd().
|
||||
bool IsCollectingReport() const
|
||||
{
|
||||
return m_stateMachine.IsCollectingReport();
|
||||
}
|
||||
|
||||
const std::string& GetReportDirectoryName() const
|
||||
{
|
||||
return m_stateMachine.GetReportDirectoryName();
|
||||
}
|
||||
|
||||
/// Enqueues report collection, starting on the next frame.
|
||||
bool StartCollectionOnNextFrame(const char* pDirectoryName, AppendDateTime appendDateTime)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
return m_stateMachine.StartCollectionOnNextFrame(pDirectoryName, appendDateTime);
|
||||
}
|
||||
|
||||
/// Enables a frame-level parent range.
|
||||
/// When enabled (non-NULL, non-empty pRangeName), every frame will have a parent range.
|
||||
/// This is also convenient for programs that have no CommandList-level ranges.
|
||||
/// Pass in NULL or an empty string to disable this behavior.
|
||||
/// The pRangeName string is copied by value, and may be modified or freed after this function returns.
|
||||
void SetFrameLevelRangeName(const char* pRangeName)
|
||||
{
|
||||
m_stateMachine.SetFrameLevelRangeName(pRangeName);
|
||||
}
|
||||
|
||||
/// Retrieves the current frame-level parent range. An empty string signifies no parent range.
|
||||
const std::string& GetFrameLevelRangeName() const
|
||||
{
|
||||
return m_stateMachine.GetFrameLevelRangeName();
|
||||
}
|
||||
|
||||
/// Sets the number of Push/Pop nesting levels to collect in the report.
|
||||
void SetNumNestingLevels(uint16_t numNestingLevels)
|
||||
{
|
||||
m_stateMachine.SetNumNestingLevels(numNestingLevels);
|
||||
}
|
||||
|
||||
/// Retrieves the number of Push/Pop nesting levels being collected in the report.
|
||||
uint16_t GetNumNestingLevels() const
|
||||
{
|
||||
return m_stateMachine.GetNumNestingLevels();
|
||||
}
|
||||
|
||||
/// Open the report directory in file browser after perf data collection.
|
||||
/// The default behavor is false, and can be changed by enviroment variable NV_PERF_OPEN_REPORT_DIR_AFTER_COLLECTION.
|
||||
void SetOpenReportDirectoryAfterCollection(bool openReportDirectoryAfterCollection)
|
||||
{
|
||||
m_stateMachine.SetOpenReportDirectoryAfterCollection(openReportDirectoryAfterCollection);
|
||||
}
|
||||
};
|
||||
}}}
|
||||
@@ -0,0 +1,358 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#pragma once
|
||||
#include "NvPerfReportGenerator.h"
|
||||
#include "NvPerfRangeProfilerVulkan.h"
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
class ReportGeneratorVulkan
|
||||
{
|
||||
protected:
|
||||
struct ReportProfiler : public ReportGeneratorStateMachine::IReportProfiler
|
||||
{
|
||||
RangeProfilerVulkan rangeProfiler;
|
||||
|
||||
ReportProfiler()
|
||||
: rangeProfiler()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool IsInSession() const override
|
||||
{
|
||||
return rangeProfiler.IsInSession();
|
||||
}
|
||||
virtual bool IsInPass() const override
|
||||
{
|
||||
return rangeProfiler.IsInPass();
|
||||
}
|
||||
virtual bool EndSession() override
|
||||
{
|
||||
return rangeProfiler.EndSession();
|
||||
}
|
||||
virtual bool EnqueueCounterCollection(const SetConfigParams& config) override
|
||||
{
|
||||
return rangeProfiler.EnqueueCounterCollection(config);
|
||||
}
|
||||
virtual bool BeginPass() override
|
||||
{
|
||||
return rangeProfiler.BeginPass();
|
||||
}
|
||||
virtual bool EndPass() override
|
||||
{
|
||||
return rangeProfiler.EndPass();
|
||||
}
|
||||
virtual bool PushRange(const char* pRangeName) override
|
||||
{
|
||||
return rangeProfiler.PushRange(pRangeName);
|
||||
}
|
||||
virtual bool PopRange() override
|
||||
{
|
||||
return rangeProfiler.PopRange();
|
||||
}
|
||||
virtual bool DecodeCounters(DecodeResult& decodeResult) override
|
||||
{
|
||||
return rangeProfiler.DecodeCounters(decodeResult);
|
||||
}
|
||||
virtual bool AllPassesSubmitted() const override
|
||||
{
|
||||
return rangeProfiler.AllPassesSubmitted();
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
ReportProfiler m_reportProfiler;
|
||||
ReportGeneratorStateMachine m_stateMachine;
|
||||
|
||||
// vulkan device state, set at initialize
|
||||
VkInstance m_instance;
|
||||
VkPhysicalDevice m_physicalDevice;
|
||||
VkDevice m_device;
|
||||
ReportGeneratorInitStatus m_initStatus; // the state of InitializeReportGenerator()
|
||||
|
||||
protected:
|
||||
bool BeginSessionWithOptions(
|
||||
VkInstance instance,
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkDevice device,
|
||||
VkQueue queue,
|
||||
uint32_t queueFamilyIndex,
|
||||
const SessionOptions* pSessionOptions = nullptr)
|
||||
{
|
||||
SessionOptions sessionOptions = {};
|
||||
sessionOptions.maxNumRanges = ReportGeneratorStateMachine::MaxNumRangesDefault;
|
||||
if (pSessionOptions)
|
||||
{
|
||||
sessionOptions = *pSessionOptions;
|
||||
}
|
||||
|
||||
if (!m_reportProfiler.rangeProfiler.BeginSession(instance, physicalDevice, device, queue, queueFamilyIndex, sessionOptions))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_reportProfiler.rangeProfiler.BeginSession failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
/// RangeCommands is safe to use on any CommandBuffer belonging to the VkDevice used for initialization.
|
||||
/// RangeCommands perform no operation when called on unsupported or non-NVIDIA devices.
|
||||
VulkanRangeCommands rangeCommands;
|
||||
DeviceIdentifiers deviceIdentifiers;
|
||||
std::vector<std::string> additionalMetrics;
|
||||
|
||||
public:
|
||||
~ReportGeneratorVulkan()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
ReportGeneratorVulkan()
|
||||
: m_reportProfiler()
|
||||
, m_stateMachine(m_reportProfiler)
|
||||
, m_instance(VK_NULL_HANDLE)
|
||||
, m_physicalDevice(VK_NULL_HANDLE)
|
||||
, m_device(VK_NULL_HANDLE)
|
||||
, m_initStatus(ReportGeneratorInitStatus::NeverCalled)
|
||||
, rangeCommands()
|
||||
, deviceIdentifiers()
|
||||
, additionalMetrics()
|
||||
{
|
||||
}
|
||||
|
||||
ReportGeneratorInitStatus GetInitStatus() const
|
||||
{
|
||||
return m_initStatus;
|
||||
}
|
||||
|
||||
/// Ends all current sessions and frees all internal memory.
|
||||
/// This object may be reused by calling InitializeReportGenerator() again.
|
||||
/// Does not reset rangeCommands and deviceIdentifiers.
|
||||
void Reset()
|
||||
{
|
||||
if (m_reportProfiler.rangeProfiler.IsInSession())
|
||||
{
|
||||
const bool endSessionStatus = m_reportProfiler.rangeProfiler.EndSession();
|
||||
if (!endSessionStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_reportProfiler.EndSession failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
m_stateMachine.Reset();
|
||||
|
||||
m_device = VK_NULL_HANDLE;
|
||||
m_physicalDevice = VK_NULL_HANDLE;
|
||||
m_instance = VK_NULL_HANDLE;
|
||||
if (m_initStatus != ReportGeneratorInitStatus::NeverCalled)
|
||||
{
|
||||
m_initStatus = ReportGeneratorInitStatus::Reset;
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize this object on the provided VkDevice.
|
||||
bool InitializeReportGenerator(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device)
|
||||
{
|
||||
// Do this first, in case this object was previously initialized on an NVIDIA device, and is now re-initialized on non-NVIDIA.
|
||||
rangeCommands.Initialize(physicalDevice);
|
||||
|
||||
m_instance = VK_NULL_HANDLE;
|
||||
m_physicalDevice = VK_NULL_HANDLE;
|
||||
m_device = VK_NULL_HANDLE;
|
||||
m_initStatus = ReportGeneratorInitStatus::Failed;
|
||||
|
||||
// Can this device be profiled by Nsight Perf SDK?
|
||||
if (!nv::perf::VulkanIsNvidiaDevice(physicalDevice))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "%ls is not an NVIDIA Device\n", VulkanGetDeviceName(physicalDevice).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!InitializeNvPerf())
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "InitializeNvPerf failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nv::perf::VulkanLoadDriver(instance))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Could not load driver\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nv::perf::profiler::VulkanIsGpuSupported(instance, physicalDevice, device))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "GPU is not supported\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
deviceIdentifiers = VulkanGetDeviceIdentifiers(instance, physicalDevice, device);
|
||||
if (!deviceIdentifiers.pChipName)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Unrecognized GPU\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto createMetricsEvaluator = [&](std::vector<uint8_t>& scratchBuffer) {
|
||||
const size_t scratchBufferSize = nv::perf::VulkanCalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
|
||||
if (!scratchBufferSize)
|
||||
{
|
||||
return (NVPW_MetricsEvaluator*)nullptr;
|
||||
}
|
||||
scratchBuffer.resize(scratchBufferSize);
|
||||
NVPW_MetricsEvaluator* pMetricsEvaluator = nv::perf::VulkanCreateMetricsEvaluator(scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
|
||||
return pMetricsEvaluator;
|
||||
};
|
||||
auto createRawMetricsConfig = [&]() {
|
||||
NVPA_RawMetricsConfig* pRawMetricsConfig = nv::perf::profiler::VulkanCreateRawMetricsConfig(deviceIdentifiers.pChipName);
|
||||
return pRawMetricsConfig;
|
||||
};
|
||||
if (!m_stateMachine.InitializeReportMetrics(deviceIdentifiers, createMetricsEvaluator, createRawMetricsConfig, additionalMetrics))
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "m_stateMachine.InitializeReportMetrics failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_instance = instance;
|
||||
m_physicalDevice = physicalDevice;
|
||||
m_device = device;
|
||||
m_initStatus = ReportGeneratorInitStatus::Succeeded;
|
||||
|
||||
NV_PERF_LOG_INF(50, "Initialization succeeded\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Explicitly starts a session. This allows you to control resource allocation.
|
||||
/// Calling this function is optional; by default, OnFrameStart() will start a session if this isn't called.
|
||||
/// The session must be explicitly ended by calling Reset().
|
||||
/// The queue must belong the VkDevice passed into InitializeReportGenerator().
|
||||
bool BeginSession(VkQueue queue, uint32_t queueFamilyIndex, const SessionOptions* pSessionOptions = nullptr)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
|
||||
auto beginSessionFn = [&]() {
|
||||
return BeginSessionWithOptions(m_instance, m_physicalDevice, m_device, queue, queueFamilyIndex, pSessionOptions);
|
||||
};
|
||||
if (!m_stateMachine.BeginSession(beginSessionFn))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Automatically starts collecting counters for a report, after StartCollectionOnNextFrame().
|
||||
/// Call this at the start of each frame.
|
||||
/// The queue must belong the VkDevice passed into InitializeReportGenerator().
|
||||
bool OnFrameStart(VkQueue queue, uint32_t queueFamilyIndex)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
|
||||
auto beginSessionFn = [&]() {
|
||||
return BeginSessionWithOptions(m_instance, m_physicalDevice, m_device, queue, queueFamilyIndex);
|
||||
};
|
||||
if (!m_stateMachine.OnFrameStart(beginSessionFn))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Advances the counter-collection state-machine after rendering.
|
||||
/// Call this at the end of each frame.
|
||||
bool OnFrameEnd()
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!m_stateMachine.OnFrameEnd())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Reports true after StartCollectionOnNextFrame() is called, until the HTML Report has been written to disk.
|
||||
/// This state is cleared by OnFrameEnd().
|
||||
bool IsCollectingReport() const
|
||||
{
|
||||
return m_stateMachine.IsCollectingReport();
|
||||
}
|
||||
|
||||
const std::string& GetReportDirectoryName() const
|
||||
{
|
||||
return m_stateMachine.GetReportDirectoryName();
|
||||
}
|
||||
|
||||
/// Enqueues report collection, starting on the next frame.
|
||||
bool StartCollectionOnNextFrame(const char* pDirectoryName, AppendDateTime appendDateTime)
|
||||
{
|
||||
if (m_initStatus != ReportGeneratorInitStatus::Succeeded)
|
||||
{
|
||||
NV_PERF_LOG_WRN(100, "skipping; the state of InitializeReportGenerator() is %s.\n", ToCString(m_initStatus));
|
||||
return false;
|
||||
}
|
||||
return m_stateMachine.StartCollectionOnNextFrame(pDirectoryName, appendDateTime);
|
||||
}
|
||||
|
||||
/// Enables a frame-level parent range.
|
||||
/// When enabled (non-NULL, non-empty pRangeName), every frame will have a parent range.
|
||||
/// This is also convenient for programs that have no CommandList-level ranges.
|
||||
/// Pass in NULL or an empty string to disable this behavior.
|
||||
/// The pRangeName string is copied by value, and may be modified or freed after this function returns.
|
||||
void SetFrameLevelRangeName(const char* pRangeName)
|
||||
{
|
||||
m_stateMachine.SetFrameLevelRangeName(pRangeName);
|
||||
}
|
||||
|
||||
/// Retrieves the current frame-level parent range. An empty string signifies no parent range.
|
||||
const std::string& GetFrameLevelRangeName() const
|
||||
{
|
||||
return m_stateMachine.GetFrameLevelRangeName();
|
||||
}
|
||||
|
||||
/// Sets the number of Push/Pop nesting levels to collect in the report.
|
||||
void SetNumNestingLevels(uint16_t numNestingLevels)
|
||||
{
|
||||
m_stateMachine.SetNumNestingLevels(numNestingLevels);
|
||||
}
|
||||
|
||||
/// Retrieves the number of Push/Pop nesting levels being collected in the report.
|
||||
uint16_t GetNumNestingLevels() const
|
||||
{
|
||||
return m_stateMachine.GetNumNestingLevels();
|
||||
}
|
||||
|
||||
/// Open the report directory in file browser after perf data collection.
|
||||
/// The default behavor is false, and can be changed by enviroment variable NV_PERF_OPEN_REPORT_DIR_AFTER_COLLECTION.
|
||||
void SetOpenReportDirectoryAfterCollection(bool openReportDirectoryAfterCollection)
|
||||
{
|
||||
m_stateMachine.SetOpenReportDirectoryAfterCollection(openReportDirectoryAfterCollection);
|
||||
}
|
||||
};
|
||||
}}}
|
||||
374
ruins64k/tools/NvPerfUtility/include/NvPerfVulkan.h
Normal file
374
ruins64k/tools/NvPerfUtility/include/NvPerfVulkan.h
Normal file
@@ -0,0 +1,374 @@
|
||||
/*
|
||||
* Copyright 2014-2021 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
#include "NvPerfInit.h"
|
||||
#include "NvPerfDeviceProperties.h"
|
||||
#include "nvperf_vulkan_host.h"
|
||||
#include "nvperf_vulkan_target.h"
|
||||
|
||||
namespace nv { namespace perf {
|
||||
|
||||
//
|
||||
// Vulkan Only Utilities
|
||||
//
|
||||
|
||||
inline std::string VulkanGetDeviceName(VkPhysicalDevice physicalDevice)
|
||||
{
|
||||
VkPhysicalDeviceProperties properties;
|
||||
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
|
||||
return properties.deviceName;
|
||||
}
|
||||
|
||||
inline bool VulkanIsNvidiaDevice(VkPhysicalDevice physicalDevice)
|
||||
{
|
||||
VkPhysicalDeviceProperties properties;
|
||||
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
|
||||
if (properties.vendorID != NVIDIA_VENDOR_ID)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline uint32_t VulkanGetInstanceApiVersion()
|
||||
{
|
||||
PFN_vkEnumerateInstanceVersion pfnVkEnumerateInstanceVersion = (PFN_vkEnumerateInstanceVersion)vkGetInstanceProcAddr(VK_NULL_HANDLE, "vkEnumerateInstanceVersion");
|
||||
//This API doesn't exist on 1.0 loader
|
||||
if (!pfnVkEnumerateInstanceVersion)
|
||||
{
|
||||
return VK_API_VERSION_1_0;
|
||||
}
|
||||
|
||||
uint32_t loaderVersion;
|
||||
VkResult res = pfnVkEnumerateInstanceVersion(&loaderVersion);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Couldn't enumerate instance version!\n");
|
||||
return 0;
|
||||
}
|
||||
return loaderVersion;
|
||||
}
|
||||
|
||||
inline uint32_t VulkanGetPhysicalDeviceApiVersion(VkPhysicalDevice physicalDevice)
|
||||
{
|
||||
VkPhysicalDeviceProperties properties;
|
||||
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
|
||||
return properties.apiVersion;
|
||||
}
|
||||
|
||||
//
|
||||
// Vulkan NvPerf Utilities
|
||||
//
|
||||
inline bool VulkanAppendInstanceRequiredExtensions(std::vector<const char*>& instanceExtensionNames)
|
||||
{
|
||||
NVPW_VK_Profiler_GetRequiredInstanceExtensions_Params getRequiredInstanceExtensionsParams = { NVPW_VK_Profiler_GetRequiredInstanceExtensions_Params_STRUCT_SIZE };
|
||||
getRequiredInstanceExtensionsParams.apiVersion = VulkanGetInstanceApiVersion();
|
||||
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_GetRequiredInstanceExtensions(&getRequiredInstanceExtensionsParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_GetRequiredInstanceExtensions failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!getRequiredInstanceExtensionsParams.isOfficiallySupportedVersion)
|
||||
{
|
||||
uint32_t major = VK_VERSION_MAJOR(getRequiredInstanceExtensionsParams.apiVersion);
|
||||
uint32_t minor = VK_VERSION_MINOR(getRequiredInstanceExtensionsParams.apiVersion);
|
||||
uint32_t patch = VK_VERSION_PATCH(getRequiredInstanceExtensionsParams.apiVersion);
|
||||
// not an error - NvPerf treats any unknown version as the same as its latest known version.
|
||||
// Unknown version warnings should be reported back to the Nsight Perf team to get official support
|
||||
NV_PERF_LOG_WRN(10, "Vulkan Instance API Version: %u.%u.%u - is not an officially supported version\n", major, minor, patch);
|
||||
}
|
||||
|
||||
for (uint32_t extensionIndex=0; extensionIndex < getRequiredInstanceExtensionsParams.numInstanceExtensionNames; ++ extensionIndex)
|
||||
{
|
||||
instanceExtensionNames.push_back(getRequiredInstanceExtensionsParams.ppInstanceExtensionNames[extensionIndex]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool VulkanAppendDeviceRequiredExtensions(VkInstance instance, VkPhysicalDevice physicalDevice, void* pfnGetInstanceProcAddr, std::vector<const char*>& deviceExtensionNames)
|
||||
{
|
||||
if (!VulkanIsNvidiaDevice(physicalDevice))
|
||||
{
|
||||
return true; // do nothing on non-NVIDIA devices
|
||||
}
|
||||
|
||||
NVPW_VK_Profiler_GetRequiredDeviceExtensions_Params getRequiredDeviceExtensionsParams = { NVPW_VK_Profiler_GetRequiredDeviceExtensions_Params_STRUCT_SIZE };
|
||||
getRequiredDeviceExtensionsParams.apiVersion = VulkanGetPhysicalDeviceApiVersion(physicalDevice);
|
||||
|
||||
// optional parameters - this allows NvPerf to query if certain advanced features are available for use
|
||||
getRequiredDeviceExtensionsParams.instance = instance;
|
||||
getRequiredDeviceExtensionsParams.physicalDevice = physicalDevice;
|
||||
getRequiredDeviceExtensionsParams.pfnGetInstanceProcAddr = pfnGetInstanceProcAddr;
|
||||
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_GetRequiredDeviceExtensions(&getRequiredDeviceExtensionsParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_GetRequiredDeviceExtensions failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!getRequiredDeviceExtensionsParams.isOfficiallySupportedVersion)
|
||||
{
|
||||
uint32_t major = VK_VERSION_MAJOR(getRequiredDeviceExtensionsParams.apiVersion);
|
||||
uint32_t minor = VK_VERSION_MINOR(getRequiredDeviceExtensionsParams.apiVersion);
|
||||
uint32_t patch = VK_VERSION_PATCH(getRequiredDeviceExtensionsParams.apiVersion);
|
||||
// not an error - NvPerf treats any unknown version as the same as its latest known version.
|
||||
// Unknown version warnings should be reported back to the Nsight Perf team to get official support
|
||||
NV_PERF_LOG_WRN(100, "Vulkan Device API Version: %u.%u.%u - is not an officially supported version\n", major, minor, patch);
|
||||
}
|
||||
|
||||
for (uint32_t extensionIndex=0; extensionIndex < getRequiredDeviceExtensionsParams.numDeviceExtensionNames; ++ extensionIndex)
|
||||
{
|
||||
deviceExtensionNames.push_back(getRequiredDeviceExtensionsParams.ppDeviceExtensionNames[extensionIndex]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool VulkanAppendRequiredExtensions(std::vector<const char*>& instanceExtensionNames, std::vector<const char*>& deviceExtensionNames)
|
||||
{
|
||||
bool status = VulkanAppendInstanceRequiredExtensions(instanceExtensionNames);
|
||||
if (!status)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
status = VulkanAppendDeviceRequiredExtensions(VK_NULL_HANDLE, VK_NULL_HANDLE, nullptr, deviceExtensionNames);
|
||||
if (!status)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool VulkanLoadDriver(VkInstance instance)
|
||||
{
|
||||
NVPW_VK_LoadDriver_Params loadDriverParams = { NVPW_VK_LoadDriver_Params_STRUCT_SIZE };
|
||||
loadDriverParams.instance = instance;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_LoadDriver(&loadDriverParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_VK_LoadDriver failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline size_t VulkanGetNvperfDeviceIndex(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, size_t sliIndex = 0)
|
||||
{
|
||||
NVPW_VK_Device_GetDeviceIndex_Params getDeviceIndexParams = { NVPW_VK_Device_GetDeviceIndex_Params_STRUCT_SIZE };
|
||||
getDeviceIndexParams.instance = instance;
|
||||
getDeviceIndexParams.physicalDevice = physicalDevice;
|
||||
getDeviceIndexParams.device = device;
|
||||
getDeviceIndexParams.sliIndex = sliIndex;
|
||||
getDeviceIndexParams.pfnGetInstanceProcAddr = (void*)vkGetInstanceProcAddr;
|
||||
getDeviceIndexParams.pfnGetDeviceProcAddr = (void*)vkGetDeviceProcAddr;
|
||||
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Device_GetDeviceIndex(&getDeviceIndexParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return ~size_t(0);
|
||||
}
|
||||
|
||||
return getDeviceIndexParams.deviceIndex;
|
||||
}
|
||||
|
||||
inline DeviceIdentifiers VulkanGetDeviceIdentifiers(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, size_t sliIndex = 0)
|
||||
{
|
||||
const size_t deviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device, sliIndex);
|
||||
|
||||
DeviceIdentifiers deviceIdentifiers = GetDeviceIdentifiers(deviceIndex);
|
||||
return deviceIdentifiers;
|
||||
}
|
||||
|
||||
inline NVPW_Device_ClockStatus VulkanGetDeviceClockState(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device)
|
||||
{
|
||||
size_t nvperfDeviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device);
|
||||
return GetDeviceClockState(nvperfDeviceIndex);
|
||||
}
|
||||
|
||||
inline bool VulkanSetDeviceClockState(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, NVPW_Device_ClockSetting clockStatus)
|
||||
{
|
||||
size_t nvperfDeviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device);
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
|
||||
}
|
||||
|
||||
inline bool VulkanSetDeviceClockState(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, NVPW_Device_ClockStatus clockStatus)
|
||||
{
|
||||
size_t nvperfDeviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device);
|
||||
return SetDeviceClockState(nvperfDeviceIndex, clockStatus);
|
||||
}
|
||||
|
||||
inline size_t VulkanCalculateMetricsEvaluatorScratchBufferSize(const char* pChipName)
|
||||
{
|
||||
NVPW_VK_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParams = { NVPW_VK_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE };
|
||||
calculateScratchBufferSizeParams.pChipName = pChipName;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_MetricsEvaluator_CalculateScratchBufferSize(&calculateScratchBufferSizeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_VK_MetricsEvaluator_CalculateScratchBufferSize failed\n");
|
||||
return 0;
|
||||
}
|
||||
return calculateScratchBufferSizeParams.scratchBufferSize;
|
||||
}
|
||||
|
||||
inline NVPW_MetricsEvaluator* VulkanCreateMetricsEvaluator(uint8_t* pScratchBuffer, size_t scratchBufferSize, const char* pChipName)
|
||||
{
|
||||
NVPW_VK_MetricsEvaluator_Initialize_Params initializeParams = { NVPW_VK_MetricsEvaluator_Initialize_Params_STRUCT_SIZE };
|
||||
initializeParams.pScratchBuffer = pScratchBuffer;
|
||||
initializeParams.scratchBufferSize = scratchBufferSize;
|
||||
initializeParams.pChipName = pChipName;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_MetricsEvaluator_Initialize(&initializeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(20, "NVPW_VK_MetricsEvaluator_Initialize failed\n");
|
||||
return nullptr;
|
||||
}
|
||||
return initializeParams.pMetricsEvaluator;
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
namespace nv { namespace perf { namespace profiler {
|
||||
|
||||
inline NVPA_RawMetricsConfig* VulkanCreateRawMetricsConfig(const char* pChipName)
|
||||
{
|
||||
NVPW_VK_RawMetricsConfig_Create_Params configParams = { NVPW_VK_RawMetricsConfig_Create_Params_STRUCT_SIZE };
|
||||
configParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
|
||||
configParams.pChipName = pChipName;
|
||||
|
||||
NVPA_Status nvpaStatus = NVPW_VK_RawMetricsConfig_Create(&configParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return configParams.pRawMetricsConfig;
|
||||
}
|
||||
|
||||
inline bool VulkanIsGpuSupported(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device, size_t sliIndex = 0)
|
||||
{
|
||||
const size_t deviceIndex = VulkanGetNvperfDeviceIndex(instance, physicalDevice, device, sliIndex);
|
||||
|
||||
NVPW_VK_Profiler_IsGpuSupported_Params params = { NVPW_VK_Profiler_IsGpuSupported_Params_STRUCT_SIZE };
|
||||
params.deviceIndex = deviceIndex;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_IsGpuSupported(¶ms);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "NVPW_VK_Profiler_IsGpuSupported failed on %s\n", VulkanGetDeviceName(physicalDevice).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!params.isSupported)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "%s is not supported\n", VulkanGetDeviceName(physicalDevice).c_str());
|
||||
if (params.gpuArchitectureSupportLevel != NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED)
|
||||
{
|
||||
const DeviceIdentifiers deviceIdentifiers = VulkanGetDeviceIdentifiers(instance, physicalDevice, device, sliIndex);
|
||||
NV_PERF_LOG_ERR(10, "Unsupported GPU architecture %s\n", deviceIdentifiers.pChipName);
|
||||
}
|
||||
if (params.sliSupportLevel == NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED)
|
||||
{
|
||||
NV_PERF_LOG_ERR(10, "Devices in SLI configuration are not supported.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool VulkanPushRange(VkCommandBuffer commandBuffer, const char* pRangeName)
|
||||
{
|
||||
NVPW_VK_Profiler_CommandBuffer_PushRange_Params pushRangeParams = { NVPW_VK_Profiler_CommandBuffer_PushRange_Params_STRUCT_SIZE };
|
||||
pushRangeParams.pRangeName = pRangeName;
|
||||
pushRangeParams.rangeNameLength = 0;
|
||||
pushRangeParams.commandBuffer = commandBuffer;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_CommandBuffer_PushRange(&pushRangeParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "NVPW_VK_Profiler_CommandBuffer_PushRange failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
inline bool VulkanPopRange(VkCommandBuffer commandBuffer)
|
||||
{
|
||||
NVPW_VK_Profiler_CommandBuffer_PopRange_Params popParams = { NVPW_VK_Profiler_CommandBuffer_PopRange_Params_STRUCT_SIZE };
|
||||
popParams.commandBuffer = commandBuffer;
|
||||
NVPA_Status nvpaStatus = NVPW_VK_Profiler_CommandBuffer_PopRange(&popParams);
|
||||
if (nvpaStatus)
|
||||
{
|
||||
NV_PERF_LOG_ERR(50, "NVPW_VK_Profiler_CommandBuffer_PopRange failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool VulkanPushRange_Nop(VkCommandBuffer commandBuffer, const char* pRangeName)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
inline bool VulkanPopRange_Nop(VkCommandBuffer commandBuffer)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
struct VulkanRangeCommands
|
||||
{
|
||||
bool isNvidiaDevice;
|
||||
bool(*PushRange)(VkCommandBuffer commandBuffer, const char* pRangeName);
|
||||
bool(*PopRange)(VkCommandBuffer commandBuffer);
|
||||
|
||||
public:
|
||||
VulkanRangeCommands()
|
||||
: isNvidiaDevice(false)
|
||||
, PushRange(&VulkanPushRange_Nop)
|
||||
, PopRange(&VulkanPopRange_Nop)
|
||||
{
|
||||
}
|
||||
|
||||
void Initialize(bool isNvidiaDevice_)
|
||||
{
|
||||
isNvidiaDevice = isNvidiaDevice_;
|
||||
if (isNvidiaDevice_)
|
||||
{
|
||||
PushRange = &VulkanPushRange;
|
||||
PopRange = &VulkanPopRange;
|
||||
}
|
||||
else
|
||||
{
|
||||
PushRange = &VulkanPushRange_Nop;
|
||||
PopRange = &VulkanPopRange_Nop;
|
||||
}
|
||||
}
|
||||
|
||||
void Initialize(VkPhysicalDevice physicalDevice)
|
||||
{
|
||||
const bool isNvidiaDevice_ = VulkanIsNvidiaDevice(physicalDevice);
|
||||
return Initialize(isNvidiaDevice_);
|
||||
}
|
||||
};
|
||||
|
||||
}}}
|
||||
Reference in New Issue
Block a user