inc/core/palQueue.h

/*
 ***********************************************************************************************************************
 *
 *  Copyright (c) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 *  Permission is hereby granted, free of charge, to any person obtaining a copy
 *  of this software and associated documentation files (the "Software"), to deal
 *  in the Software without restriction, including without limitation the rights
 *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 *  copies of the Software, and to permit persons to whom the Software is
 *  furnished to do so, subject to the following conditions:
 *
 *  The above copyright notice and this permission notice shall be included in all
 *  copies or substantial portions of the Software.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 *  SOFTWARE.
 *
 **********************************************************************************************************************/
/**
 ***********************************************************************************************************************
 * @file  palQueue.h
 * @brief Defines the Platform Abstraction Library (PAL) IQueue interface and related types.
 ***********************************************************************************************************************
 */

#pragma once

#include "pal.h"
#include "palDestroyable.h"

namespace Pal
{

// Forward declarations.
class ICmdBuffer;
class IFence;
class IGpuMemory;
class IImage;
class IPrivateScreen;
class IQueueSemaphore;
class IScreen;
class ISwapChain;
struct CmdBufInfo;
struct GpuMemSubAllocInfo;
struct GpuMemoryRef;
struct DoppRef;

enum class VirtualGpuMemAccessMode : uint32;

/// Specifies whether presents are windowed or fullscreen.  This will determine whether the present is performed via a
/// BLT or flip.
enum class PresentMode : uint32
{
    Windowed   = 0x0,
    Fullscreen = 0x1,
    Count
};

/// Defines flags for describing which types of present modes are supported on a given queue.
enum PresentModeSupport : uint32
{
    SupportWindowedPresent          = 0x1,
    SupportWindowedPriorBlitPresent = 0x2,
    SupportFullscreenPresent        = 0x4,
};

/// Defines submit-time bottlenecks which PAL can potentially optimize.
enum class SubmitOptMode : uint32
{
    Default           = 0, ///< PAL will enable optimizations when generally efficient.
    Disabled          = 1, ///< Disable all optimizations that could be detrimental in special cases.
    MinKernelSubmits  = 2, ///< Minimize the overhead of launching command buffers on the CPU and GPU.
    MinGpuCmdOverhead = 3, ///< Minimize the overhead of reading command buffer commands on the GPU.
    Count
};

/// Defines the execution priority for a queue, specified either at queue creation or via IQueue::SetExecutionPriority()
/// on platforms that support it.
/// QueuePriority::Low corresponds to the default priority,
/// so clients should be careful when converting API enums to this enum
/// (e.g., VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT is specified to mean default priority,
///  so should be translated to QueuePriority::Low).
enum class QueuePriority : uint32
{
    Low      =  0,    ///< The low priority - Default one.
    Medium   =  1,    ///< The medium priority
    High     =  2,    ///< The High priority
    VeryLow  =  3,    ///< The lowest priority.
    Realtime =  4,    ///< Real time priority.
};

/// Specifies properties for @ref IQueue creation.  Input structure to IDevice::CreateQueue().
struct QueueCreateInfo
{
    QueueType     queueType;     ///< Selects which type of queue to create.
    EngineType    engineType;    ///< Selects which type of engine to create.
    uint32        engineIndex;   ///< Which instance of the specified engine type to query. For example, there
                                 ///  can be multiple compute queues, so this parameter distinguished between them.
    SubmitOptMode submitOptMode; ///< A hint telling PAL which submit-time bottlenecks should be optimized, if any.
    QueuePriority priority;      ///< A hint telling PAL to create queue with proper priority.
                                 ///  It is only supported if supportQueuePriority is set in DeviceProperties.
    struct
    {
        uint32 placeholder1              :  1; ///< Reserved field. Set to 0.
        uint32 windowedPriorBlit :  1; ///< All windowed presents on this queue are notifications that the client
                                       ///  has manually done a blit present
        uint32 placeholder2              :  1; ///< Reserved field. Set to 0.

#if PAL_AMDGPU_BUILD && (PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 479)
        uint32 enableGpuMemoryPriorities :  1; ///< Enables support for GPU memory priorities on this Queue. This is
                                               ///  optional because enabling the feature requires a small amount of
                                               ///  memory overhead per-Queue for bookkeeping purposes.
#else
        uint32 placeholder3              :  1; ///< Reserved field. Set to 0.
#endif
        uint32 reserved                  : 28; ///< Reserved for future use.
    };

    uint32 numReservedCu;           ///< The number of reserved compute units for RT CU queue

    uint32 persistentCeRamOffset;   ///< Byte offset to the beginning of the region of CE RAM which this Queue should
                                    ///  preserve across consecutive submissions.  Must be a multiple of 32.  It is an
                                    ///  error to specify a nonzero value here if the the Device does not support
                                    ///  @ref supportPersistentCeRam for the Engine this Queue will attach to.
    uint32 persistentCeRamSize;     ///< Amount of CE RAM space which this Queue should preserve across consecutive
                                    ///  submissions.  Units are in DWORDs, and this must be a multiple of 8.  It is an
                                    ///  error to specify a nonzero value here if the the Device does not support
                                    ///  @ref supportPersistentCeRam for the Engine this Queue will attach to.

};

/// Specifies all information needed to execute a set of command buffers.  Input structure to IQueue::Submit().
///
/// Some members of this structure are not supported on all platforms.  The client must check the appropriate properties
/// structures to determine if the corresponding features are supported:
/// + pGpuMemoryRefs:    Support is indicated by supportPerSubmitMemRefs in @ref DeviceProperties.
/// + ppBlockIfFlipping: Support is indicated by supportBlockIfFlipping in @ref PlatformProperties.  If it is supported,
///                      the client must not specify a blockIfFlippingCount greater than MaxBlockIfFlippingCount.
///
/// @note If this queue is running in physical submission mode (due to hardware restrictions), the gpuMemRefCount and
///       pGpuMemoryRefs arguments to this method are ignored because the command buffers themselves contain their own
///       GPU memory reference lists.
struct SubmitInfo
{
    uint32                  cmdBufferCount;       ///< Number of command buffers to be submitted.
    ICmdBuffer*const*       ppCmdBuffers;         ///< Array of cmdBufferCount command buffers to be submitted.
    const CmdBufInfo*       pCmdBufInfoList;      ///< Null, or an array of cmdBufferCount structs providing additional
                                                  ///  info about the command buffers being submitted.  If non-null,
                                                  ///  elements are ignored if their isValid flag is false.
    uint32                  gpuMemRefCount;       ///< Number of GPU memory references for this submit.
    const GpuMemoryRef*     pGpuMemoryRefs;       ///< Array of gpuMemRefCount GPU memory references.  Can be null if
                                                  ///  gpuMemRefCount is zero.  The GPU memory objects will be made
                                                  ///  resident for the duration of this submit.
    uint32                  doppRefCount;         ///< Number of DOPP desktop texture references for this submit.
    const DoppRef*          pDoppRefs;            ///< Array of doppRefCount DOPP texture references.  Can be null if
                                                  ///  doppRefCount is zero.
    uint32                  externPhysMemCount;   ///< Number of entries in ppExternPhysMem.
    const IGpuMemory**      ppExternPhysMem;      ///< Array of external physical memory allocations to be initialized
                                                  ///  as part of this submit.  The first submit that references a
                                                  ///  particular external physical memory allocation must include
                                                  ///  that allocation in this list.  Subsequent submits that reference
                                                  ///  the same allocation should not include it in this list, as it
                                                  ///  would trigger redundant GPU page table initialization.
    uint32                  blockIfFlippingCount; ///< Number of GPU memory objects to protect when flipped.
    const IGpuMemory*const* ppBlockIfFlipping;    ///< Array of blockIfFlippingCount GPU memory objects.  Can be null if
                                                  ///  blockIfFlippingCount is zero.  The command buffers will not be
                                                  ///  scheduled to the GPU while a fullscreen (flip) present is queued
                                                  ///  for any of these GPU memory allocations.
    IFence*                 pFence;               ///< Null, or a fence object to be signaled once the last command
                                                  ///  buffer in the submission completes execution.
};

/// The value of blockIfFlippingCount in @ref SubmitInfo cannot be greater than this value.
constexpr uint32 MaxBlockIfFlippingCount = 16;

/// The value of imageCount in @ref MgpuSlsInfo cannot be greater than this value.
constexpr uint32 MaxMgpuSlsImageCount = 3;

/// MGpu Sls Present Info.
/// The on-screen of MGpu Sls consists of multiple allocations located on different Gpus. Take the simplest
/// (Master-Monitor0, Slave-Monitor1) case as an example, Monitor0 shows part of an allocation on Master, Monitor1
/// shows part of another allocation on Slave. Umd has to notify Kmd both allocations. PAL's pSrcImage is the Master
/// allocation and this structure contains the Slave allocations.
struct MgpuSlsInfo
{
    uint32        vidPnSourceId;                ///< VidPN source ID, required by Kmd to process Sls present.
    uint32        imageCount;                   ///< number of allocations to be displayed.
    const IImage* pImage[MaxMgpuSlsImageCount]; ///< allocation per visible Slave Gpu, having the same
                                                /// property/dimensions. MgpuSlsInfo is provided to the Master's flip
                                                /// engine, so pImage[i] is a peer image.
};

/// Specifies properties for the presentation of an image to the screen.  Input structure to IQueue::PresentDirect().
struct PresentDirectInfo
{
    union
    {
        struct
        {
            uint32 fullscreenDoNotWait :  1; ///< Fail the present immediately if the present queue is full.
            uint32 srcIsTypedBuffer    :  1; ///< True if the source is a typed buffer instead of an image.
            uint32 dstIsTypedBuffer    :  1; ///< True if the destination is a typed buffer instead of an image.
            uint32 reserved            : 29; ///< Reserved for future use.
        };
        uint32 u32All;       ///< Flags packed as 32-bit uint.
    } flags;                 ///< Present flags.

    OsWindowHandle hWindow;         ///< Native OS window handle that this image should be presented to.
    PresentMode    presentMode;     ///< Chooses between windowed and fullscreen present.
    uint32         presentInterval; ///< Must be an integer from 0 to 4.  0 indicates that the present should
                                    ///  occur immediately (may tear), and 1-4 indicates the present should
                                    ///  occur after 1 to 4 vertical syncs.  Only valid for fullscreen presents.
    union
    {
        IImage*        pSrcImage;       ///< Optional: The image to be presented.  If null, the present will not
                                        ///  occur but PAL may still call into the OS on certain platforms that
                                        ///  expect it.
        IGpuMemory*    pSrcTypedBuffer; ///< The typed buffer to be presented.  If null, the present will not occur
                                        ///  but PAL may still call into the OS on certain platforms that expect it.
    };
    union
    {
        IImage*        pDstImage;       ///< Optional: copy from the source image to this image.  If null, PAL will
                                        ///  automatically copy into the appropriate platform-specific destination.
                                        ///  This is only supported for windowed mode presents.
        IGpuMemory*    pDstTypedBuffer; ///< The typed buffer to be presented.  If null, the present will not occur
                                        ///  but PAL may still call into the OS on certain platforms that expect it.
    };

    MgpuSlsInfo    mgpuSlsInfo;     ///< Optional, MGpu Sls Present Info. imageCount = 0 if not a Mgpu Sls present.
};

/// Specifies properties for the presentation of an image to the screen.  Input structure to IQueue::PresentSwapChain().
struct PresentSwapChainInfo
{
    PresentMode presentMode;      ///< Chooses between windowed and fullscreen present.
    IImage*     pSrcImage;        ///< The image to be presented.
    ISwapChain* pSwapChain;       ///< The swap chain associated with the source image.
    uint32      imageIndex;       ///< The index of the source image within the swap chain. Owership of this image
                                  ///  index will be released back to the swap chain if this call succeeds.
    union
    {
        struct
        {
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 466
            uint32 turboSyncEnabled     :  1;   ///< Whether TurboSync is enabled.
#endif
            uint32 notifyOnly           :  1;   ///< True if it is a notify-only present
#if (PAL_CLIENT_INTERFACE_MAJOR_VERSION < 466)
            uint32 reserved             : 30;   ///< Reserved for future use.
#else
            uint32 reserved             : 31;   ///< Reserved for future use.
#endif
        };
        uint32 u32All;                          ///< Flags packed as 32-bit uint.
    } flags;                                    ///< PresentSwapChainInfo flags.
};

/// Specifies a mapping from a range of pages in a virtual GPU memory object to a range of pages in a real GPU memory
/// object.  Input to IQueue::RemapVirtualMemoryPages().
///
/// When mapping pages of a virtual GPU memory object to a range of pages in a real GPU memory object on a remote GPU,
/// the client must point pRealGpuMem at a peer GPU memory object created on the input queue's device instead of the
/// actual real GPU memory object created on the remote device.  This is required for two reasons:
///   1. PAL can only view remote GPU memory using peer objects.
///   2. PAL enforces a separation of state between different IDevice object families.
///
/// virtualStartOffset and size must be aligned to the virtualMemPageSize member of @ref DeviceProperties.
/// realStartOffset must be aligned to the realMemAllocGranularity member of @ref DeviceProperties.
struct VirtualMemoryRemapRange
{
    IGpuMemory*             pVirtualGpuMem;     ///< Virtual GPU memory object whose mapping is being updated.
    gpusize                 virtualStartOffset; ///< Start of the page range to be updated, in bytes.
    IGpuMemory*             pRealGpuMem;        ///< Real GPU memory object the virtual range should point at.
    gpusize                 realStartOffset;    ///< Start of the page range in the real GPU memory object, in bytes.
    gpusize                 size;               ///< Size of the mapping range, in bytes.
    VirtualGpuMemAccessMode virtualAccessMode;  ///< Access mode for virtual GPU memory's unmapped pages.
                                                ///  This parameter is ignored on some platforms.
};

/// Specifies a set of page mappings to copy between virtual GPU memory objects. The source and destination can be the
/// same memory object and the source and destination regions may overlap. Input to IQueue::CopyVirtualMemoryPageMappings().
///
/// srcStartOffset, dstStartOffset, and size must be aligned to the virtualMemPageSize member of @ref DeviceProperties.
struct VirtualMemoryCopyPageMappingsRange
{
    IGpuMemory* pSrcGpuMem;     ///< Virtual GPU memory object whose mapping is being copied from.
    gpusize     srcStartOffset; ///< Start of the copy source range, in bytes.
    IGpuMemory* pDstGpuMem;     ///< Virtual GPU memory object whose mapping is being copied to.
    gpusize     dstStartOffset; ///< Start of the copy destination range, in bytes.
    gpusize     size;           ///< Size of the mapping range, in bytes.
};

/// Specifies kernel level information about a context.
struct KernelContextInfo
{
    union
    {
        struct
        {
            uint32 hasDebugVmid        :  1; ///< True if the context has acquired the debug vmid.
            uint32 hasHighPriorityVmid :  1; ///< True if the context has acquired the high priority vmid.
            uint32 reserved            : 30; ///< Reserved for future use.
        };
        uint32 u32All;                       ///< Flags packed as 32-bit uint.
    } flags;                                 ///< Context flags.

    uint64 contextIdentifier;                ///< Kernel scheduler context identifier.
};

/**
 ***********************************************************************************************************************
 * @interface IQueue
 * @brief     Represents a queue of work for a particular GPU engine on a device.
 *
 * An IQueue object is a virtual representation of a hardware engine on the device. Multiple IQueue objects can be
 * created and have work submitted on them in parallel. Work is submitted to a queue through @ref ICmdBuffer objects,
 * and work can be synchronized between multiple queues using @ref IQueueSemaphore objects.
 *
 * @see IDevice::GetQueue()
 ***********************************************************************************************************************
 */
class IQueue : public IDestroyable
{
public:
    /// Submits a group of root command buffers for execution on this queue.
    ///
    /// @param [in] submitInfo Specifies all command buffers to execute along with other residency and synchronization
    ///                        information.  See @ref SubmitInfo for additional, important documentation.
    ///
    /// @returns Success if the command buffer was successfully submitted.  Otherwise, one of the following errors may
    ///          be returned:
    ///          + ErrorInvalidPointer if:
    ///              - any of the array inputs are null when their counts are non-zero.
    ///              - any members of non-null point arrays are null.
    ///          + ErrorTooManyMemoryReferences if the total number of memory references (device/queue global and
    ///            per-command buffer) is too large.
    ///          + ErrorInvalidValue if blockIfFlippingCount is too large.
    ///          + ErrorIncompleteCommandBuffer if any of the submitted command buffers are not properly constructed.
    ///          + ErrorIncompatibleQueue if any submitted command buffer does not match this queue's type (e.g.,
    ///            universal, graphics, DMA).
    virtual Result Submit(
        const SubmitInfo& submitInfo) = 0;

    /// Waits for all previous submission on this queue to complete before control is returned to the caller.
    ///
    /// @returns Success if wait for submissions completed.  Otherwise an error indicates reason for unsuccessful wait,
    ///          for example due to lost device.
    virtual Result WaitIdle() = 0;

    /// Inserts a semaphore signal into the GPU queue.  The semaphore will be signaled once all previously submitted
    /// work on this queue has completed.
    ///
    /// @param [in] pQueueSemaphore     Semaphore to signal.
    /// @param [in] value               timeline Semaphore point value to signal, ignored for non-timeline semaphores.
    ///
    /// @returns Success if the semaphore signal was successfully queued.  Otherwise, one of the following errors may be
    ///          returned:
    ///          + ErrorUnknown if the OS scheduler rejects the signal for unknown reasons.
    virtual Result SignalQueueSemaphore(
        IQueueSemaphore* pQueueSemaphore, uint64 value = 0) = 0;

    /// Inserts a semaphore wait into the GPU queue.  The queue will be stalled until the specified semaphore is
    /// signaled.
    ///
    /// @param [in] pQueueSemaphore     Semaphore to wait on.
    /// @param [in] value               timeline semaphore point value to wait on, ignored for non-timeline semaphores.
    ///
    /// @returns Success if the semaphore wait was successfully queued.  Otherwise, one of the following errors may be
    ///          returned:
    ///          + ErrorUnknown if the OS scheduler rejects the wait for unknown reasons.
    virtual Result WaitQueueSemaphore(
        IQueueSemaphore* pQueueSemaphore, uint64 value = 0) = 0;

    /// This function passes application information to KMD for application specific power optimizations.
    /// Power configuration are restored to default when all application queues are destroyed.
    ///
    /// @param [in]  pFileName  Application executable name
    /// @param [in]  pPathName  Path to the application
    ///
    /// @returns Success if the information is passed successfully.  Otherwise, one of the following errors may be
    ///          returned:
    ///          + Unsupported if this function is not available on this OS or if the queue context is null.
    ///          + ErrorUnknown if an unexpected internal error occurs.
    virtual Result UpdateAppPowerProfile(
        const wchar_t* pFileName,
        const wchar_t* pPathName) = 0;

    /// Queues the specified image for presentation on the screen.  This function directly queues the presentation
    /// request based on the input parameters without special synchronization considerations like a swap chain present.
    /// All previous work done on this queue will complete before the image is displayed.
    ///
    /// This function should never be called with a swap chain presentable image because it won't release ownership of
    /// the presentable image index, eventually deadlocking the swap chain.
    ///
    /// Overall support for direct presents can be queried at platform creation time via supportNonSwapChainPresents
    /// in @ref PlatformProperties.  Support for particular present modes is specifed via supportedDirectPresentModes
    /// in @ref DeviceProperties.
    ///
    /// @note  Any images specified in presentInfo must be made resident before calling this function.
    ///
    /// @param [in] presentInfo Specifies the source image and destination window for the present as well as other
    ///                         properties.
    ///
    /// @returns Success if the present was successfully queued.  Otherwise, one of the following errors may be
    ///          returned:
    ///          + ErrorInvalidValue if the flip interval is invalid.
    ///          + ErrorInvalidValue if the present mode doesn't match the capabilities of the image.
    ///          + ErrorInvalidFlags if the present flags don't match the capabilities of the image.
    virtual Result PresentDirect(
        const PresentDirectInfo& presentInfo) = 0;

    /// Queues the specified image for presentation on the screen.  This function uses the provided swap chain to
    /// determine exactly how the image should be presented (e.g., can the user see tearing).  See @ref ISwapChain for
    /// more information on swap chain presentation.  All previous work done on this queue will complete before the
    /// image is displayed, but future work may execute before the present is completed because swap chain present
    /// execution may be asynchronous to the queue that initiated present.
    ///
    /// Assuming the presentInfo is valid, this function will always release ownership of the presentable image index
    /// even if PAL encounters an error while executing the present.
    ///
    /// Queue support for swap chain presents is specified via supportsSwapChainPresents in @ref DeviceProperties.
    /// Support for particular PresentModes is queried per SwapChainMode via IDevice::GetSwapChainInfo().
    ///
    /// @note  The source image specified in presentInfo must be made resident before calling this function.
    ///
    /// @param [in] presentInfo Specifies the source image, swap chain, and basic presentation information.
    ///
    /// @returns Success if the present was successfully queued.  Otherwise, one of the following errors may be
    ///          returned:
    ///          + ErrorInvalidPointer if the source image or swap chain are null.
    ///          + ErrorInvalidValue if the present mode doesn't match the capabilities of the image or if the image
    ///                              index isn't valid within the swap chain.
    virtual Result PresentSwapChain(
        const PresentSwapChainInfo& presentInfo) = 0;

    /// Inserts a delay of a specified amount of time before processing more commands on this queue.
    ///
    /// Only available on timer queues.  Useful in conjunction with queue semaphores to implement frame pacing.
    ///
    /// @param [in] delay Time, in milliseconds, to delay before processing more commands on this queue.
    ///
    /// @returns Success if the delay was successfully queued.  Otherwise, one of the following errors may be returned:
    ///          + ErrorInvalidValue if delay is less than 0.
    virtual Result Delay(
        float delay) = 0;

    /// Inserts a delay of a specified amount of time on this queue after a vsync on a private display object.
    ///
    /// Only available on timer queues.  Useful in conjunction with queue semaphores to implement pacing of GPU and CPU
    /// operations for rendering and presentation in VR as this allows GPU commands of next frame to be sent early but
    /// blocks GPU execution until after vsync.
    ///
    /// @param [in] delayInUs Time, in microseconds, to delay before processing more commands on this queue.
    /// @param [in] pScreen The private screen object that the vsync is occurring and the delay is waiting on.
    ///
    /// @returns Success if the delay was successfully queued.  Otherwise, one of the following errors may be returned:
    ///          + ErrorInvalidValue if delay is less than 0.
    virtual Result DelayAfterVsync(
        float                 delayInUs,
        const IPrivateScreen* pScreen) = 0;

    /// Updates page mappings for virtual GPU memory allocations.
    ///
    /// @param [in] rangeCount  Number of ranges to remap (i.e., size of the pRanges array).
    /// @param [in] pRanges     Defines the set of remappings from virtual GPU memory object pages to real GPU
    ///                         memory object pages.
    /// @param [in] doNotWait   If true, then this paging operation will be executed on the Queue immediately, without
    ///                         waiting for any previous rendering to finish first. On platforms that don't support
    ///                         this, the flag will be ignored.
    /// @param [in] pFence      Optional. Pointer to an IFence, which will be signaled after the VA remapping.
    ///
    /// @returns Success if the remappings were executed successfully.  It is assumed that the following conditions are
    ///          met for the input to this function:
    ///          + rangeCount is not 0.
    ///          + The page range for all members of pRanges are valid.
    ///          + pRanges is not null.
    ///          + pVirtualGpuMem is not null for any member of pRanges.
    ///          + pRanges does not specify a real GPU memory object as a virtual GPU memory object or vice versa.
    virtual Result RemapVirtualMemoryPages(
        uint32                         rangeCount,
        const VirtualMemoryRemapRange* pRanges,
        bool                           doNotWait,
        IFence*                        pFence) = 0;

    /// Copies page mappings from one virtual GPU memory object to another.
    ///
    /// @param [in] rangeCount  Number of ranges to copy (i.e., size of the pRanges array).
    /// @param [in] pRanges     Defines the set of page mappings to copy between virtual GPU memory objects.
    /// @param [in] doNotWait   If true, then this paging operation will be executed on the Queue immediately, without
    ///                         waiting for any previous rendering to finish first. On platforms that don't support
    ///                         this, the flag will be ignored.
    ///
    /// @returns Success if the mappings were copied successfully.  It is assumed that the following conditions are
    ///          met for the input to this function:
    ///          + rangeCount is not 0.
    ///          + The page range for all members of pRanges are valid.
    ///          + pRanges is not null.
    ///          + pSrcGpuMem or pDstGpuMem is not null for any member of pRanges.
    ///          + pRanges does not specify a real GPU memory object as source or destination
    virtual Result CopyVirtualMemoryPageMappings(
        uint32                                    rangeCount,
        const VirtualMemoryCopyPageMappingsRange* pRanges,
        bool                                      doNotWait) = 0;

    /// Associates the provided Fence object with the last submission on this queue object. The Fence can be used via
    /// GetStatus() to get the status of the last Submit, however no event will be created/set for the Fence so
    /// WaitForFences() should NOT be called on the fence after this association.
    ///
    /// @see IFence::GetStatus()
    /// @see IFence::WaitForFences()
    ///
    /// @param [in] pFence   Fence object to be associated with the last Submit on this queue
    ///
    /// @returns Success if the association was successful. ErrorUnavailable will be returned in there has not yet been
    ///          a Submit on this queue.
    virtual Result AssociateFenceWithLastSubmit(
        IFence* pFence) = 0;

    /// Set execution priority for the current queue, it allows to elevate execution priority of submitted command
    /// buffers, but it has no effect on command buffers that have already been submitted for execution. Elevating
    /// the queue priority to medium or high would allow to temporary stall a low priority queue execution and execute
    /// its work as soon as the low priority queue starts draining.
    ///
    /// @param [in] priority The priority level of the queue.
    virtual void SetExecutionPriority(
        QueuePriority priority) = 0;

    /// Returns a list of GPU memory allocations used by this queue.
    ///
    /// @param [in,out] pNumEntries    Input value specifies the available size in pAllocInfoList; output value
    ///                                reports the number of GPU memory allocations.
    /// @param [out]    pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input.  On output it
    ///                                will reflect the number of allocations that make up this queue.  If
    ///                                pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number
    ///                                of entries in the pAllocInfoList array.  On output, pNumEntries reflects the
    ///                                number of entries in pAllocInfoList that are valid.
    /// @returns Success if the allocation info was successfully written to the buffer.
    ///          + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed.
    ///          + ErrorInvalidPointer if pNumEntries is nullptr.
    virtual Result QueryAllocationInfo(
        size_t*                    pNumEntries,
        GpuMemSubAllocInfo* const  pAllocInfoList) = 0;

    /// Returns the QueueType for the queue
    virtual QueueType Type() const = 0;

    /// Returns the EngineType for the queue
    virtual EngineType GetEngineType() const = 0;

    /// Queries the kernel context info associated with this queue and copies it into pKernelContextInfo.
    ///
    /// Only supported on Windows platforms.
    ///
    /// @param [out] pKernelContextInfo Pointer to a KernelContextInfo struct to copy the information into.
    /// @returns Success if the information is successfully copied into the output struct.
    ///          + ErrorInvalidPointer if pKernelContextInfo is nullptr.
    ///          + ErrorUnavailable if kernel context information is not available on the current platform.
    virtual Result QueryKernelContextInfo(KernelContextInfo* pKernelContextInfo) const = 0;

    /// Returns the value of the associated arbitrary client data pointer.
    /// Can be used to associate arbitrary data with a particular PAL object.
    ///
    /// @returns Pointer to client data.
    PAL_INLINE void* GetClientData() const
    {
        return m_pClientData;
    }

    /// Sets the value of the associated arbitrary client data pointer.
    /// Can be used to associate arbitrary data with a particular PAL object.
    ///
    /// @param  [in]    pClientData     A pointer to arbitrary client data.
    PAL_INLINE void SetClientData(
        void* pClientData)
    {
        m_pClientData = pClientData;
    }

protected:
    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
    /// called the proper create method.
    IQueue() : m_pClientData(nullptr) {}

    /// @internal Destructor.  Prevent use of delete operator on this interface.  Queues will be destroyed when the
    /// associated device is destroyed.
    virtual ~IQueue() { }

private:
    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
    /// and set via SetClientData().
    /// For non-top-layer objects, this will point to the layer above the current object.
    void* m_pClientData;
};

} // Pal