ugonight
ugonight

Reputation: 21

How to implement double buffering without using a swap chain in Direct3D12?(RenderTarget is CUDA interop)

I am working on a program that uses Direct3D12 as a filter to convert image data in CUDA memory, with Direct3D12 input as a texture and output as a RenderTarget. I don’t display anything on the screen with DirectX, so I want to avoid using a swap chain. However, I want to implement double buffering to improve rendering speed. (I want to improve the processing speed per frame, even if the output is delayed by one frame.)

So, I tried incrementing the frame index manually instead of using the swap chain’s GetCurrentBackBufferIndex(), but the processing speed did not change between when the frame count was 1 and when it was 2.

void DxApplication::MoveToNextFrame()
{
    const UINT64 currentFenceValue = m_fenceValues[m_frameIndex];
    cudaExternalSemaphoreWaitParams externalSemaphoreWaitParams;
    memset(&externalSemaphoreWaitParams, 0, sizeof(externalSemaphoreWaitParams));

    externalSemaphoreWaitParams.params.fence.value = currentFenceValue;
    externalSemaphoreWaitParams.flags = 0;

    cudaEventRecord(m_event1, m_streamInOut);

    cudaWaitExternalSemaphoresAsync(
        &m_externalSemaphore, &externalSemaphoreWaitParams, 1, m_streamToRun);

    cudaStreamWaitEvent(m_streamToRun, m_event1);
    UpdateCudaTextureData();
    UpdateCudaOutData();

    cudaExternalSemaphoreSignalParams externalSemaphoreSignalParams;
    memset(&externalSemaphoreSignalParams, 0, sizeof(externalSemaphoreSignalParams));
    m_fenceValues[m_frameIndex] = currentFenceValue + 1;
    externalSemaphoreSignalParams.params.fence.value = m_fenceValues[m_frameIndex];
    externalSemaphoreSignalParams.flags = 0;

    cudaSignalExternalSemaphoresAsync(
        &m_externalSemaphore, &externalSemaphoreSignalParams, 1, m_streamToRun);

    cudaEventRecord(m_event2, m_streamToRun);
    cudaStreamWaitEvent(m_streamInOut, m_event2);

    // Update the frame index.
    //m_frameIndex = m_swapChain->GetCurrentBackBufferIndex();
    m_frameIndex = ++m_frameIndex % FrameCount;

    // If the next frame is not ready to be rendered yet, wait until it is ready.
    if (m_fence->GetCompletedValue() < m_fenceValues[m_frameIndex]) {
        ThrowIfFailed(m_fence->SetEventOnCompletion(m_fenceValues[m_frameIndex],
            m_fenceEvent));
        WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE);
    }

    // Set the fence value for the next frame.
    m_fenceValues[m_frameIndex] = currentFenceValue + 2;
}

The full code can be found here

I would appreciate it if you could tell me any suitable methods or sample programs that could be a reference for achieving this.

Upvotes: 2

Views: 131

Answers (0)

Related Questions