forked from GPUOpen-Drivers/pal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpalCmdBuffer.h
3566 lines (3282 loc) · 206 KB
/
palCmdBuffer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palCmdBuffer.h
* @brief Defines the Platform Abstraction Library (PAL) ICmdBuffer interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDevice.h"
#include "palGpuMemory.h"
#include "palImage.h"
#include "palMsaaState.h"
#include "palPipeline.h"
#include "palQueryPool.h"
/// HSA kernel dispatch packet typedef
typedef struct hsa_kernel_dispatch_packet_s hsa_kernel_dispatch_packet_t;
/// AMD kernel code typedef
typedef struct amd_kernel_code_s amd_kernel_code_t;
namespace Util { class VirtualLinearAllocator; }
namespace Pal
{
// Forward declarations.
class IBorderColorPalette;
class ICmdAllocator;
class ICmdBuffer;
class IColorBlendState;
class IColorTargetView;
class IDepthStencilState;
class IDepthStencilView;
class IGpuEvent;
class IGpuMemory;
class IIndirectCmdGenerator;
class IMsaaState;
class IPerfExperiment;
class IQueue;
class IScissorState;
class IViewportState;
class IQueryPool;
enum class PerfTraceMarkerType : uint32;
enum class PointOrigin : uint32;
struct VideoCodecInfo;
struct VideoCodecAuxInfo;
/// Specifies a pipeline bind point (i.e., compute or graphics).
enum class PipelineBindPoint : uint32
{
Compute = 0x0,
Graphics = 0x1,
Count
};
/// Fully specifies a type of graphics primitive and vertex ordering for geometry.
enum class PrimitiveTopology : uint32
{
PointList = 0x0,
LineList = 0x1,
LineStrip = 0x2,
TriangleList = 0x3,
TriangleStrip = 0x4,
RectList = 0x5,
QuadList = 0x6,
QuadStrip = 0x7,
LineListAdj = 0x8,
LineStripAdj = 0x9,
TriangleListAdj = 0xA,
TriangleStripAdj = 0xB,
Patch = 0xC,
TriangleFan = 0xD
};
/// Specifies how triangle primitives should be rasterized.
enum class FillMode : uint32
{
Points = 0x0,
Wireframe = 0x1,
Solid = 0x2
};
/// Specifies the triangle face direction that should result in culled primitives.
enum class CullMode : uint32
{
_None = 0x0, ///< All triangles are rasterized.
Front = 0x1, ///< Front facing triangles are culled.
Back = 0x2, ///< Back facing triangles are culled.
FrontAndBack = 0x3, ///< All triangles are culled.
// Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either
// undefing None before including this header or using _None when dealing with PAL.
#ifndef None
None = _None, ///< All triangles are rasterized.
#endif
};
/// Specifies vertex winding order corresponding to a front facing triangle. @see CullMode.
enum class FaceOrientation : uint32
{
Ccw = 0x0, ///< Counter-clockwise vertex winding primitives are front facing.
Cw = 0x1 ///< Clockwise vertex winding primitives are front facing.
};
/// Specifies which vertex of a primitive is the _provoking vertex_. This impacts which vertex's "flat" VS outputs
/// are passed to the PS (i.e., flat shading).
enum class ProvokingVertex : uint32
{
First = 0x0,
Last = 0x1
};
/// Specifies bit size of each element in an index buffer.
enum class IndexType : uint32
{
Idx8 = 0x0,
Idx16 = 0x1,
Idx32 = 0x2,
Count
};
/// Specifies a memory atomic operation that can be performed from command buffers with ICmdBuffer::CmdMemoryAtomic().
enum class AtomicOp : uint32
{
AddInt32 = 0x00,
SubInt32 = 0x01,
MinUint32 = 0x02,
MaxUint32 = 0x03,
MinSint32 = 0x04,
MaxSint32 = 0x05,
AndInt32 = 0x06,
OrInt32 = 0x07,
XorInt32 = 0x08,
IncUint32 = 0x09,
DecUint32 = 0x0A,
AddInt64 = 0x0B,
SubInt64 = 0x0C,
MinUint64 = 0x0D,
MaxUint64 = 0x0E,
MinSint64 = 0x0F,
MaxSint64 = 0x10,
AndInt64 = 0x11,
OrInt64 = 0x12,
XorInt64 = 0x13,
IncUint64 = 0x14,
DecUint64 = 0x15,
Count
};
/// Specifies the point in the GPU pipeline where an action should take place.
///
/// Relevant operations include setting GPU events, waiting on GPU events in hardware, or writing timestamps.
///
/// @note The numeric value of these enums are ordered such that a "newState < oldState" comparison will generally yield
/// true if a stall is necessary to resolve a hazard between those two pipe points. This guideline does not
/// hold up when comparing PreRasterization or PostPs with PostCs, as CS work is not properly pipelined with
/// graphics shader work.
///
/// @see ICmdBuffer::CmdSetEvent()
/// @see ICmdBuffer::CmdResetEvent()
/// @see ICmdBuffer::CmdPredicateEvent()
/// @see ICmdBuffer::CmdBarrier()
/// @see ICmdBuffer::CmdWriteTimestamp()
/// @see ICmdBuffer::CmdWriteImmediate()
enum HwPipePoint : uint32
{
HwPipeTop = 0x0, ///< Earliest possible point in the GPU pipeline (CP PFP).
HwPipePostIndexFetch = 0x1, ///< Indirect arguments and index buffer data have been fetched for
/// all prior draws/dispatches (CP ME).
HwPipePreRasterization = 0x3, ///< All prior generated VS/HS/DS/GS waves have completed.
HwPipePostPs = 0x4, ///< All prior generated PS waves have completed.
HwPipeBottom = 0x7, ///< All prior GPU work (graphics, compute, or BLT) has completed.
// The following points apply to compute-specific work:
HwPipePreCs = HwPipePostIndexFetch, ///< As late as possible before CS waves are launched (CP ME).
HwPipePostCs = 0x5, ///< All prior generated CS waves have completed.
// The following points apply to BLT-specific work:
HwPipePreBlt = HwPipePostIndexFetch, ///< As late as possible before BLT operations are launched.
HwPipePostBlt = 0x6 ///< All prior requested BLTs have completed.
};
/// Bitmask values that can be OR'ed together to specify a synchronization scope. See srcStageMask and dstStageMask in
/// @ref AcquireReleaseInfo.
///
/// When specifying an execution dependency at a synchronization point where previous operations must *happen-before*
/// future operations, a mask of these flags specifies a *synchronization scope* that restricts which stages of prior
/// draws, dispatches, or BLTs must *happen-before* which stages of future draws, dispatches, or BLTs.
enum PipelineStageFlag : uint32
{
PipelineStageTopOfPipe = 0x00000001,
PipelineStageFetchIndirectArgs = 0x00000002,
PipelineStageFetchIndices = 0x00000004,
PipelineStageVs = 0x00000008,
PipelineStageHs = 0x00000010,
PipelineStageDs = 0x00000020,
PipelineStageGs = 0x00000040,
PipelineStagePs = 0x00000080,
PipelineStageEarlyDsTarget = 0x00000100,
PipelineStageLateDsTarget = 0x00000200,
PipelineStageColorTarget = 0x00000400,
PipelineStageCs = 0x00000800,
PipelineStageBlt = 0x00001000,
PipelineStageBottomOfPipe = 0x00002000,
PipelineStageAllStages = 0x00003FFF
};
/// Bitmask values that can be ORed together to specify all potential usages of an image at a point in time. Such a
/// mask should be specified in the usages field of ImageLayout. These combined usages can be examined by PAL to infer
/// the layout (i.e., compression state) of the image.
///
/// @note There is no layout corresponding to CmdClear*(). The layout flags passed to those functions will determine
/// the expected image layout at that time, and the CmdClear*() implementation will execute a clear that keeps the
/// layout the same.
enum ImageLayoutUsageFlags : uint32
{
LayoutUninitializedTarget = 0x00000001, ///< Initial state of any image that can be used as a color or
/// depth/stencil target. A layout transition out of this state will
/// likely result in a mask RAM initialization BLT. If this bit is
/// set, no other bits may be set.
LayoutColorTarget = 0x00000002, ///< Color target bound via CmdBindTargets(). This bit is exclusive
/// with LayoutDepthStencilTarget.
LayoutDepthStencilTarget = 0x00000004, ///< Depth/stencil target bound via CmdBindTargets(). This bit is
/// exclusive with LayoutColorTarget.
LayoutShaderRead = 0x00000008, ///< Any shader read state including texture, UAV, constant buffer,
/// vertex buffer.
LayoutShaderFmaskBasedRead = 0x00000010, ///< Images in this state support the load_fptr AMD IL instruction,
/// which will read decompressed fmask in order to access compressed
/// MSAA color data from a shader.
LayoutShaderWrite = 0x00000020, ///< Writeable UAV.
LayoutCopySrc = 0x00000040, ///< CmdCopyImage(), CmdCopyImageToMemory(), CmdScaledCopyImage or
/// CmdCopyTiledImageToMemory() source image.
LayoutCopyDst = 0x00000080, ///< CmdCopyImage(), CmdCopyMemoryToImage(), CmdScaledCopyImage or
/// CmdCopyMemoryToTiledImage() destination image.
LayoutResolveSrc = 0x00000100, ///< CmdResolveImage() source.
LayoutResolveDst = 0x00000200, ///< CmdResolveImage() destination.
LayoutPresentWindowed = 0x00000400, ///< Windowed-mode IQueue::Present().
LayoutPresentFullscreen = 0x00000800, ///< Fullscreen (flip) present. Layout must be supported by the
/// display engine.
LayoutUncompressed = 0x00001000, ///< Metadata fully decompressed/expanded layout
LayoutAllUsages = 0x00001FFF
};
/// Bitmask values that can be ORed together to specify all potential engines an image might be used on. Such a
/// mask should be specified in the engines field of ImageLayout.
///
/// If the client API is unable to determine which engines might be used, it should specify all possible engines
/// corresponding to the usage flags.
enum ImageLayoutEngineFlags : uint32
{
LayoutUniversalEngine = 0x1,
LayoutComputeEngine = 0x2,
LayoutDmaEngine = 0x4,
LayoutVideoEncodeEngine = 0x8,
LayoutVideoDecodeEngine = 0x10,
LayoutVideoJpegDecodeEngine = 0x20,
LayoutAllEngines = 0x3F
};
/// Bitmask values that can be ORed together to specify previous output usage and upcoming input usages of an image or
/// GPU memory in a ICmdBuffer::CmdBarrier() call to ensure cache coherency between those usages.
enum CacheCoherencyUsageFlags : uint32
{
CoherCpu = 0x00000001, ///< Data read or written by CPU.
CoherShader = 0x00000002, ///< Data read or written by a GPU shader.
CoherCopy = 0x00000004, ///< Data read or written by a ICmdBuffer::CmdCopy*() call.
CoherColorTarget = 0x00000008, ///< Color target.
CoherDepthStencilTarget = 0x00000010, ///< Depth stencil target.
CoherResolve = 0x00000020, ///< Source or destination of a CmdResolveImage() call.
CoherClear = 0x00000040, ///< Destination of a CmdClear() call.
CoherIndirectArgs = 0x00000080, ///< Source argument data read by CmdDrawIndirect() and similar functions.
CoherIndexData = 0x00000100, ///< Index buffer data.
CoherQueueAtomic = 0x00000200, ///< Destination of a CmdMemoryAtomic() call.
CoherTimestamp = 0x00000400, ///< Destination of a CmdWriteTimestamp() call. It can be extended to
/// represent general or other types of L2 access. For example, in
/// gl2UncachedCpuCoherency it also indicates IGpuEvent write to
/// GL2 will be uncached, because we don't have a CoherEvent flag.
CoherCeLoad = 0x00000800, ///< Source of a CmdLoadCeRam() call.
CoherCeDump = 0x00001000, ///< Destination of CmdDumpCeRam() call.
CoherStreamOut = 0x00002000, ///< Data written as stream output.
CoherMemory = 0x00004000, ///< Data read or written directly from/to memory
CoherAllUsages = 0x00007FFF
};
/// Bitmask values for the flags parameter of ICmdBuffer::CmdClearColorImage().
enum ClearColorImageFlags : uint32
{
ColorClearAutoSync = 0x00000001, ///< PAL will automatically insert required CmdBarrier() synchronization before
/// and after the clear assuming all subresources to be cleared are currently
/// ready for rendering as a color target (as is required by API convention in
/// DX12). Allows reduced sync costs in some situations since PAL knows
/// the details of how the clear will be performed.
};
/// Bitmask values for the flags parameter of ICmdBuffer::CmdClearDepthStencil().
enum ClearDepthStencilFlags : uint32
{
DsClearAutoSync = 0x00000001, ///< PAL will automatically insert required CmdBarrier() synchronization before
/// and after the clear assuming all subresources to be cleared are currently
/// ready for rendering as a depth/stencil target (as is required by API convention
/// in DX12). Allows reduced sync costs in some situations since PAL knows the
/// details of how the clear will be performed.
};
/// Specifies properties for creation of an ICmdBuffer object. Input structure to IDevice::CreateCmdBuffer().
struct CmdBufferCreateInfo
{
ICmdAllocator* pCmdAllocator; ///< The command buffer will use this command allocator to allocate
/// all GPU memory. If the client specifies a null pCmdAllocator,
/// it must call ICmdBuffer::Reset with a non-null pCmdAllocator
/// before calling ICmdBuffer::Begin.
QueueType queueType; ///< Type of queue commands in this command buffer will target.
/// This defines the set of allowed actions in the command buffer.
EngineType engineType; ///< Type of engine the queue commands will run on.
EngineSubType engineSubType; ///< Sub type of engine the queue commands will run on.
union
{
struct
{
/// Indicates that this command buffer will be a "nested" command buffer, instead of a normal, "root"
/// command buffer. Nested command buffers differ from root command buffers in how they are sent to the
/// GPU for execution: root command buffers must be submitted to the hardware by calling
/// @ref IQueue::Submit, whereas nested command buffers can only be submitted by being executed by a root
/// command buffer.
///
/// Currently, only Universal and Compute command buffers can be nested. Nesting DMA command buffers is
/// meaningless and unsupported. It is an error to attempt to create a nested DMA command buffer.
///
/// @see ICmdBuffer::CmdExecuteNestedCmdBuffers.
uint32 nested : 1;
/// Dedicated CUs are reserved for this queue. Thus we have to skip CU mask programming.
uint32 realtimeComputeUnits : 1;
/// Reserved for future use.
uint32 reserved : 30;
};
/// Flags packed as 32-bit uint.
uint32 u32All;
} flags; ///< Command buffer creation flags.
};
/// Specifies which states will not be bound in a nested command buffer, and instead must be inherited from the calling
/// root-level command buffer.
union InheritedStateFlags
{
struct
{
/// Color and depth target views are inherited from the root-level command buffer. The nested command buffer
/// should not modify this state.
uint32 targetViewState : 1;
/// Reserved for future usage.
uint32 reserved : 31;
};
/// Flags packed as 32-bit uint.
uint32 u32All;
};
/// Specifies parameters inherited from primary command buffer into nested command buffer.
struct InheritedStateParams
{
uint32 colorTargetCount; ///< Number of color targets bound in the
/// root-level command buffer.
SwizzledFormat colorTargetSwizzledFormats[MaxColorTargets]; ///< Format and swizzle for each color
/// target.
uint32 sampleCount[MaxColorTargets]; ///< Sample count for each color target.
InheritedStateFlags stateFlags; ///< States that are inherited from the
/// calling root-level command buffer.
};
/// Specifies optional hints to control command buffer building optimizations.
union CmdBufferBuildFlags
{
struct
{
/// Optimize command buffer building for large sets of draw or dispatch operations that are GPU front-end
/// limited. These optimizations include removing redundant PM4 commands and reducing the VGT prim group size.
/// This flag might increase the CPU overhead of building command buffers.
uint32 optimizeGpuSmallBatch : 1;
/// Optimize command buffer building for exclusive command buffer submission. Command buffers built with this
/// flag cannot be submitted if they have already been submitted previously unless the caller guarantees that
/// they are no longer in use. This flag allows PAL to modify the contents of command buffers during
/// submission.
uint32 optimizeExclusiveSubmit : 1;
/// Optimize command buffer building for single command buffer submission. Command buffers built with this flag
/// cannot be submitted more than once. This flag allows PAL to modify the contents of command buffers during
/// submission. This flag is a stricter version of optimizeExclusiveSubmit, it is not necessary to set
/// optimizeExclusiveSubmit if this flag is set.
uint32 optimizeOneTimeSubmit : 1;
/// Attempt to prefetch shader code into cache before launching draws or dispatches with a freshly bound
/// pipeline object. This optimization might increase the CPU overhead of building command buffers and/or
/// introduce additional front-end GPU bottlenecks.
uint32 prefetchShaders : 1;
/// Attempt to prefetch the command buffer into cache to avoid bottlenecking the GPU front-end.
/// This optimization might slightly increase the overhead of some GPU copies and other front-end reads/writes.
uint32 prefetchCommands : 1;
/// Indicates the command buffer will use one or more constant engine commands: CmdLoadCeRam(), CmdDumpCeRam(),
/// or CmdWriteCeRam()
uint32 usesCeRamCmds : 1;
/// Indicates that the client prefers that this command buffer use a CPU update path for updating the contents
/// of the vertex buffer, stream-out and user-data-spill tables instead of using CE RAM. Ignored for command
/// buffers on queues or engines which don't support CE RAM.
///
/// It is expected that the CPU update path will be slightly more efficient for scenarios where these tables'
/// contents are fully updated often, while the CE RAM path is expected to be more efficient at handling sparse
/// updates.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 475
/// This flag has no effect prior to interface version 475.0.
#endif
uint32 useCpuPathForTableUpdates : 1;
/// Indicates that the client would prefer that this nested command buffer not be launched using an IB2 packet.
/// The calling command buffer will either inline this command buffer into itself or use IB chaining based on if
/// the optimizeExclusiveSubmit flag is also set. This flag is ignored for root command buffers.
uint32 disallowNestedLaunchViaIb2 : 1;
/// Reserved for future use.
uint32 reserved : 24;
};
/// Flags packed as 32-bit uint.
uint32 u32All;
};
/// Specifies options that direct command buffer building.
struct CmdBufferBuildInfo
{
/// Command buffer build flags, specifies optional hints to control command buffer build optimizations.
CmdBufferBuildFlags flags;
/// Command buffer inherited state and params. If non-null, related state is assumed set in root-level and nested
/// command buffer should not modify the software states. Any software params that may be needed within nested
/// command buffer needs to be provided here.
const InheritedStateParams* pInheritedState;
/// If non-null, the command buffer will begin with all states set as they are in this previously built command
/// buffer. Any state specified in pInheritedState is excluded if it is also provided.
const ICmdBuffer* pStateInheritCmdBuffer;
/// Optional allocator for PAL to use when allocating temporary memory during command buffer building. PAL will
/// stop using this allocator once command building ends. If no allocator is provided PAL will use an internally
/// managed allocator instead which may be less efficient. PAL will use this allocator in two ways:
/// + Temporary storage within a single command building call. PAL will rewind the allocator before returning to
/// free all memory allocated within the call.
/// + Temporary storage for the entire command building period. When Begin() is called, PAL will save the current
/// position of the allocator and rewind the allocator to that point when End() is called. If the client also
/// wishes to allocate temporary storage that lasts between command building function calls they must allocate it
/// before calling Begin() or PAL will accidentally free it.
Util::VirtualLinearAllocator* pMemAllocator;
};
/// Specifies info on how a compute shader should use resources.
struct DynamicComputeShaderInfo
{
uint32 maxWavesPerCu; ///< Limits the number of waves in flight per compute unit. This can be used to selectively
/// throttle certain workloads that bottleneck multiqueue applications. For ease of use, a
/// value of zero means no limit is set. The remaining valid values are in the range [1, 40]
/// and specify the maximum number of waves per compute unit. If the hardware has one wave
/// limit control for multiple shader stages PAL will select the most strict limit.
uint32 maxThreadGroupsPerCu; ///< Override the maximum number of threadgroups that a particular CS can run on,
/// throttling it, to enable more graphics work to complete. 0 disables the limit.
uint32 ldsBytesPerTg; ///< Override the amount of LDS space used per thread-group for this pipeline, in bytes.
/// Zero indicates that the LDS size determined at pipeline-compilation time will be used.
};
/// Specifies info on how a graphics shader should use resources.
struct DynamicGraphicsShaderInfo
{
uint32 maxWavesPerCu; ///< Limits the number of waves in flight per compute unit. This can be used to selectively
/// throttle certain workloads that bottleneck multiqueue applications. For ease of use, a
/// value of zero means no limit is set. The remaining valid values are in the range [1, 40]
/// and specify the maximum number of waves per compute unit. If the hardware has one wave
/// limit control for multiple shader stages PAL will select the most strict limit.
uint32 cuEnableMask; ///< This mask is AND-ed with a PAL decided CU enable mask mask to further allow limiting of
/// enabled CUs. If the hardware has one CU enable mask for multiple shader stages PAL will
/// select the most strict limit. A value of 0 will be ignored.
};
/// Specifies info on how graphics shaders should use resources.
struct DynamicGraphicsShaderInfos
{
DynamicGraphicsShaderInfo vs; ///< Dynamic Vertex shader information.
DynamicGraphicsShaderInfo hs; ///< Dynamic Hull shader information.
DynamicGraphicsShaderInfo ds; ///< Dynamic Domain shader information.
DynamicGraphicsShaderInfo gs; ///< Dynamic Geometry shader information.
DynamicGraphicsShaderInfo ps; ///< Dynamic Pixel shader information.
};
/// Specifies parameters for binding a pipeline.
/// @see ICmdBuffer::CmdBindPipeline
struct PipelineBindParams
{
PipelineBindPoint pipelineBindPoint; ///< Specifies which type of pipeline is to be bound (compute or graphics).
const IPipeline* pPipeline; ///< New pipeline to be bound. Can be null in order to unbind a previously
/// bound pipeline without binding a new one.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 471
uint64 apiPsoHash; ///< 64-bit identifier provided by client driver based on the Pipeline State
/// Object. There exists a many-to-one correlation for ApiPsoHash to
/// internalPipelineHash to map the two.
#endif
union
{
DynamicComputeShaderInfo cs; ///< Dynamic Compute shader information.
DynamicGraphicsShaderInfos graphics; ///< Dynamic Graphics shader information.
};
};
/// Specifies per-MRT color target view and current image state. Used as input to ICmdBuffer::CmdBindTargets().
struct ColorTargetBindInfo
{
const IColorTargetView* pColorTargetView; ///< Color target view to bind.
ImageLayout imageLayout; ///< Specifies the current image layout based on bitmasks of currently
/// allowed operations and engines that may perform those operations.
/// At minimum, the LayoutColorTarget usage flag and
/// LayoutUniversalEngine engine flag must be set.
};
/// Specifies depth/stencil view and current image state of the depth and stencil aspects. Used as input to
/// ICmdBuffer::CmdBindTargets().
struct DepthStencilBindInfo
{
const IDepthStencilView* pDepthStencilView; ///< Depth/stencil target view to bind.
ImageLayout depthLayout; ///< Specifies the current image layout of the depth aspect based on
/// bitmasks of currently allowed operations and engines that may
/// perform those operations. At minimum, the
/// LayoutDepthStencilTarget usage flag and LayoutUniversalEngine
/// engine flag must be set. Ignored if the specified view does not
/// have a depth aspect.
ImageLayout stencilLayout; ///< Specifies the current image layout of the stencil aspect based on
/// bitmasks of currently allowed operations and engines that may
/// perform those operations. At minimum, the
/// LayoutDepthStencilTarget usage flag and LayoutUniversalEngine
/// engine flag must be set. Ignored if the specified view does not
/// have a stencil aspect.
};
/// Represents a GPU memory or image transition as part of a barrier.
///
/// A single transition will ensure cache coherency of dirty data in the specific set of source caches with the
/// specified set of destination caches. The source and destination designation is relative to the barrier itself
/// and does not indicate whether a particular cache is a read or write cache. The transition is making dirty data
/// in the srcCacheMask visible to the caches indicated by dstCacheMask. srcCacheMask, therefore, is always expected
/// to be a write cache. For a well defined program writes should only be done through one bind point so we should only
/// expect one bit to be set for srcCacheMask whereas dstCacheMask can have multiple bits set that may be read,
/// read/write or write caches. If the both cache masks are zero the client is indicating that no cache coherency
/// operations are required but PAL may still issue issue coherency operations to make the results of layout changes
/// available.
///
/// In addition, for images, the client can initiate a change of layout
/// usage/engine flags which may result in a decompression BLT.
///
/// @note There is no range provided to control the range of addresses that will be flushed/invalidated in GPU caches
/// as there is no hardware feature on current GPUs to support this.
struct BarrierTransition
{
uint32 srcCacheMask; ///< Bitmask of @ref CacheCoherencyUsageFlags describing previous write operations whose
/// results need to be visible for subsequent operations.
uint32 dstCacheMask; ///< Bitmask of @ref CacheCoherencyUsageFlags describing the operations expected to read
/// data flushed from the caches indicated by the srcCacheMask.
struct
{
const IImage* pImage; ///< If non-null, indicates this transition only applies to the specified image.
/// The remaining members of this structure are ignored if this member is null.
SubresRange subresRange; ///< Subset of pImage this transition applies to. If newLayout includes @ref
/// LayoutUninitializedTarget this range must cover all subresources of pImage
/// unless the perSubresInit image create flag was specified.
ImageLayout oldLayout; ///< Specifies the current image layout based on bitmasks of allowed operations and
/// engines up to this point. These masks imply the previous compression state. No
/// usage flags should ever be set in oldLayout.usages that correspond to usages
/// that are not supported by the engine that is performing the transition. The
/// queue type performing the transition must be set in oldLayout.engines.
ImageLayout newLayout; ///< Specifies the upcoming image layout based on bitmasks of allowed operations and
/// engines after this point. These masks imply the upcoming compression state.
/// point. This usage mask implies the upcoming compressions state. A difference
/// between oldLayoutUsageMask and newLayoutUsageMask may result in a
/// decompression.
/// Specifies a custom sample pattern over a 2x2 pixel quad. The position for each sample is specified on a
/// grid where the pixel center is <0,0>, the top left corner of the pixel is <-8,-8>, and <7,7> is the maximum
/// valid position (not quite to the bottom/right border of the pixel).
/// Specifies a custom sample pattern over a 2x2 pixel quad. Can be left null for non-MSAA images or when
/// a valid IMsaaState is bound prior to the CmdBarrier call.
const MsaaQuadSamplePattern* pQuadSamplePattern;
} imageInfo; ///< Image-specific transition information.
};
/// Flags that modify the behavior of ICmdBuffer::CmdBarrier(). @see BarrierInfo.
union BarrierFlags
{
struct
{
uint32 splitBarrierEarlyPhase : 1; ///< Indicates that this is a split barrier, and this call should only
/// execute the "early" portion of the barrier. This usally entails
/// performing any pipelined decompress operations and issuing a pipelined
/// operation to flush destination caches and signal the GPU event
/// specified in BarrierInfo (pSplitBarrierGpuEvent) once previous work
/// has completed. Requires pSplitBarrierGpuEvent is non-null and is
/// mutually exclusive with splitBarrierLatePhase.
uint32 splitBarrierLatePhase : 1; ///< Indicates that this is a split barrier, and this call should only
/// execute the "late" portion of the barrier. This usually entails
/// waiting for the "early" portion of the barrier to complete using the
/// GPU event specified in BarrierInfo (pSplitBarrierGpuEvent), then
/// invalidating source caches as necessary. Requires
/// pSplitBarrierGpuEvent is non-null and is mutually exclusive with
/// splitBarrierEarlyPhase.
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as a 32-bit uint.
};
/// Describes a barrier as inserted by a call to ICmdBuffer::CmdBarrier().
///
/// A barrier can be used to 1) stall GPU execution at a specified point to resolve a data hazard, 2) flush/invalidate
/// GPU caches to ensure data coherency, and/or 3) compress/decompress image resources as necessary when changing how
/// the GPU will use the image.
///
/// This structure directly specifies how #1 is performed. #2 and #3 are managed by the list of @ref BarrierTransition
/// structures passed in pTransitions.
struct BarrierInfo
{
BarrierFlags flags; ///< Flags controlling behavior of the barrier.
/// Determine at what point the GPU should stall until all specified waits and transitions have completed. If the
/// specified wait point is unavailable, PAL will wait at the closest available earlier point. In practice, on
/// GFX6-8, this is selecting between CP PFP and CP ME waits.
HwPipePoint waitPoint;
uint32 pipePointWaitCount; ///< Number of entries in pPipePoints.
const HwPipePoint* pPipePoints; ///< The barrier will stall until the hardware pipeline has cleared
/// up to each point specified in this array. One entry in this
/// array is typically enough, but CS and GFX operate in parallel
/// at certain stages.
uint32 gpuEventWaitCount; ///< Number of entries in ppGpuEvents.
const IGpuEvent** ppGpuEvents; ///< The barrier will stall until each GPU event in this array is
/// in the set state.
uint32 rangeCheckedTargetWaitCount; ///< Number of entries in ppTargets.
const IImage** ppTargets; ///< The barrier will stall until all previous rendering with any
/// color or depth/stencil image in this list bound as a target
/// has completed. If one of the targets is a nullptr it will
/// perform a full range sync.
uint32 transitionCount; ///< Number of entries in pTransitions.
const BarrierTransition* pTransitions; ///< List of image/memory transitions to process. See
/// @ref BarrierTransition. The same subresource should never
/// be specified more than once in the list of transitions.
/// PAL assumes that all specified subresources are unique.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 482
uint32 globalSrcCacheMask; ///< Bitmask of @ref CacheCoherencyUsageFlags describing previous write operations whose
/// results need to be visible for subsequent operations. This is a global mask and is
/// combined (bitwise logical union) with the @ref srcCacheMask field belonging to
/// every element in @ref pTransitions. If this is zero, then no global cache flags
/// are applied during every transition.
uint32 globalDstCacheMask; ///< Bitmask of @ref CacheCoherencyUsageFlags describing the operations expected to read
/// data flushed from the caches indicated by the srcCacheMask. This is a global mask
/// and is combined (bitwise logical union) with the @ref dstCacheMask field belonging
/// to every element in @ref pTransitions. If this is zero, then no global cache flags
/// are applied during every transition.
#endif
/// If non-null, this is a split barrier. A split barrier is executed by making two separate CmdBarrier() calls
/// with identical parameters with the exception that the first call sets flags.splitBarrierEarlyPhase and the
/// second calls sets flags.splitBarrierLatePhase.
///
/// The early phase will:
/// - Issue any pipelined operations that are optimally done immediately when an app is done with a resource
/// (e.g., doing a fixed function depth expand immediately after the app finished rendering to that depth
/// resource).
/// - Issue any required destination cache flushes that can be pipelined.
/// - Issue a pipelined GPU operation to signal the GPU event specified by pSplitBarrierGpuEvent when all
/// prior GPU work has completed (based on pPipePoints).
///
/// The late phase will:
/// - Wait until the GPU event specified by pSplitBarrierGpuEvent is signaled. Ideally, the app will insert
/// unrelated GPU work in between the early and late phases so that this wait is satisfied immediately - this
/// is where a performance benefit can be gained from using split barriers.
/// - Wait until all GPU events in ppGpuEvents are signaled.
/// - Perform any decompress operations that could not be pipelined for some reason.
/// - Invalidate any required source caches. These invalidations can not currently be pipelined.
///
/// @note PAL will not access these GPU events with the CPU. Clients should set the gpuAccessOnly flag when
/// creating GPU events used exclusively for this purpose.
const IGpuEvent* pSplitBarrierGpuEvent;
uint32 reason; ///< The reason that the barrier was invoked.
};
/// Specifies *availability* and/or *visibility* operations on a section of an IGpuMemory object. See @ref
/// AcquireReleaseInfo.
struct MemBarrier
{
union
{
struct
{
uint32 globallyAvailable : 1; ///< Normally, data made available is in the GPU LLC. When this bit is
/// set, available means in memory, available to all clients in the
/// system. This is useful for rare cases like mid command buffer
/// synchronization with the CPU or another external device.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as a 32-bit uint.
} flags; ///< Flags controlling the memory barrier.
GpuMemSubAllocInfo memory; ///< Specifies a portion of an IGpuMemory object this memory barrier affects.
uint32 srcAccessMask; ///< *Access scope* for the availability operation. This should be a mask of
/// all relevant CacheCoherencyUsageFlags corresponding to prior write
/// operations that should be made available (i.e., written back from local
/// caches to the LLC). This must be 0 when passed in to
/// ICmdBuffer::CmdAcquire(), which only supports visibility operations.
uint32 dstAccessMask; ///< *Access scope* for the visibility operation. This should be a mask of
/// all relevant CacheCoherencyUsageFlags corresponding to upcoming
/// read/write operations that need visibility (i.e., invalidate
/// corresponding local caches above the LLC). This must be 0 when passed
/// in to ICmdBuffer::CmdRelease(), which only supports availability
/// operations.
};
/// Specifies required layout transition, *availability*, and/or *visibility* operations on a subresource of an IImage
/// object. See @ref AcquireReleaseInfo.
struct ImgBarrier
{
const IImage* pImage; ///< Relevant image resource for this barrier.
SubresRange subresRange; ///< Selects a range of aspects/slices/mips the barrier affects. If newLayout
/// includes @ref LayoutUninitializedTarget this range must cover all subresources of
/// pImage unless the perSubresInit image create flag was specified.
Box box; ///< Restricts the barrier to a sub-section of each subresource. The Z offset/extent
/// must be 0 for 1D/2D images, and the Y offset/extent must be 0 for 1D images. A
/// box with zero extents will be ignored, and the barrier will affect the entire
/// subresource range. This box may be used to restrict ranges of cache flushes or
/// invalidations, or may restrict what data is decompressed. However, the
/// implementation may not be able to optimize particular cases and may expand the
/// barrier to cover the entire subresource range. Specifying a subregion with a box
/// when newLayout includes @ref LayoutUninitializedTarget is not supported.
uint32 srcAccessMask; ///< *Access scope* for the availability operation. This should be a mask of all
/// relevant CacheCoherencyUsageFlags corresponding to prior write operations that
/// should be made available (i.e., written back from local caches to the LLC). This
/// must be 0 when passed in to ICmdBuffer::CmdAcquire(), which only supports
/// visibility operations.
uint32 dstAccessMask; ///< *Access scope* for the visibility operation. This should be a mask of all
/// relevant CacheCoherencyUsageFlags corresponding to upcoming read/write operations
/// that need visibility (i.e., invalidate corresponding local caches above the LLC).
/// This must be 0 when passed in to ICmdBuffer::CmdRelease(), which only supports
/// availability operations.
ImageLayout oldLayout; ///< Specifies the current image layout based on bitmasks of allowed operations and
/// engines up to this point. These masks imply the previous compression state. No
/// usage flags should ever be set in oldLayout.usages that correspond to usages
/// that are not supported by the engine that is performing the transition. The
/// engine type performing the transition must be set in oldLayout.engines.
ImageLayout newLayout; ///< Specifies the upcoming image layout based on bitmasks of allowed operations and
/// engines after this point. These masks imply the upcoming compression state.
/// point. A difference between oldLayoutUsageMask and newLayoutUsageMask may result
/// in a decompression. PAL's implementation will ensure the results of any layout
/// operations are consistent with the requested availability and visibility
/// operations.
/// Specifies a custom sample pattern over a 2x2 pixel quad. The position for each sample is specified on a grid
/// where the pixel center is <0,0>, the top left corner of the pixel is <-8,-8>, and <7,7> is the maximum valid
/// position (not quite to the bottom/right border of the pixel). Specifies a custom sample pattern over a 2x2
/// pixel quad. Can be left null for non-MSAA images or when a valid IMsaaState is bound prior to the CmdBarrier
/// call.
const MsaaQuadSamplePattern* pQuadSamplePattern;
};
/// Input structure to CmdRelease(), CmdAcquire(), and CmdReleastThenAcquire(), describing the execution dependencies,
/// memory dependencies, and image layout transitions that must be resolved.
struct AcquireReleaseInfo
{
uint32 srcStageMask; ///< Bitmask of PipelineStageFlag values defining the synchronization
/// scope that must be confirmed complete as part of a release. Must be
/// 0 when passed in to ICmdBuffer::CmdAcquire().
uint32 dstStageMask; ///< Bitmask of PipelineStageFlag values defining the synchronization
/// scope of operations to be performed after the acquire. Must be
/// 0 when passed in to ICmdBuffer::CmdRelease().
uint32 srcGlobalAccessMask; ///< *Access scope* for the global availability operation. Serves the
/// same purpose as srcAccessMask in @ref MemoryBarrier, but will cause
/// all relevant caches to be flushed without range checking. This must
/// be 0 when passed in to ICmdBuffer::CmdAcquire(), which only supports
/// visibility operations.
uint32 dstGlobalAccessMask; ///< *Access scope* for the global visibility operation. Serves the
/// same purpose as dstAccessMask in @ref MemoryBarrier, but will cause
/// all relevant caches to be invalidated without range checking. This
/// must be 0 when passed in to ICmdBuffer::CmdRelease(), which only
/// supports availability operations.
uint32 memoryBarrierCount; ///< Number of entries in pMemoryBarriers.
const MemBarrier* pMemoryBarriers; ///< Describes memory dependencies specific to a range of a particular
/// IGpuMemory object.
uint32 imageBarrierCount; ///< Number of entries in pImageBarriers.
const ImgBarrier* pImageBarriers; /// Describes memory dependencies and image layout transitions required
/// for a subresource range of a particular IImage object.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 504
uint32 reason; ///< The reason that the barrier was invoked.
/// See @ref Developer::BarrierReason for internal reason codes, though
/// clients may define their own as well
#endif
};
/// Specifies parameters for a copy from one range of a source GPU memory allocation to a range of the same size in a
/// destination GPU memory allocation. Used as an input to ICmdBuffer::CmdCopyMemory().
struct MemoryCopyRegion
{
gpusize srcOffset; ///< Offset in bytes into the source GPU memory allocation to copy data from.
gpusize dstOffset; ///< Offset in bytes into the destination GPU memory allocation to copy data to.
gpusize copySize; ///< Amount of data to copy in bytes.
};
/// Specifies parameters for an image copy from one region in a source image subresource to a region of the same size in
/// a destination image subresource. Used as input to ICmdBuffer::CmdCopyImage().
/// If the region describes a copy between a 2D and a 3D image, extent.depth and numSlices must be equal and may be
/// larger than 1.
struct ImageCopyRegion
{
SubresId srcSubres; ///< Selects the source subresource.
Offset3d srcOffset; ///< Offset to the start of the chosen region in the source subresource.
SubresId dstSubres; ///< Selects the destination subresource.
Offset3d dstOffset; ///< Offset to the start of the chosen region in the destination
/// subresource.
Extent3d extent; ///< Size of the copy region in pixels.
uint32 numSlices; ///< Number of slices the copy will span.
};
/// Specifies parameters for a copy between an image and a GPU memory allocation. The same structure is used regardless
/// of direction, an input for both ICmdBuffer::CmdCopyImageToMemory() and ICmdBuffer::CmdCopyMemoryToImage().
struct MemoryImageCopyRegion
{
SubresId imageSubres; ///< Selects the image subresource.
Offset3d imageOffset; ///< Pixel offset to the start of the chosen subresource region.
Extent3d imageExtent; ///< Size of the image region in pixels.
uint32 numSlices; ///< Number of slices the copy will span.
gpusize gpuMemoryOffset; ///< Offset in bytes to the start of the copy region in the GPU memory allocation.
gpusize gpuMemoryRowPitch; ///< Offset in bytes between the same X position on two consecutive lines.
gpusize gpuMemoryDepthPitch; ///< Offset in bytes between the same X,Y position of two consecutive slices.
};
/// Specifies parameters for a copy between a PRT and a GPU memory allocation. The same structure is used regardless
/// of direction, an input for both ICmdBuffer::CmdCopyTiledImageToMemory() and ICmdBuffer::CmdCopyMemoryToTiledImage().
struct MemoryTiledImageCopyRegion
{
SubresId imageSubres; ///< Selects the image subresource; must not be a part of the packed mip tail.
Offset3d imageOffset; ///< Tile offset to the start of the chosen subresource region.
Extent3d imageExtent; ///< Size of the image region in tiles.
uint32 numSlices; ///< Number of slices the copy will span.
gpusize gpuMemoryOffset; ///< Offset in bytes to the start of the copy region in the GPU memory allocation.
gpusize gpuMemoryRowPitch; ///< Offset in bytes between the same X position on two consecutive lines.
gpusize gpuMemoryDepthPitch; ///< Offset in bytes between the same X,Y position of two consecutive slices.
};
/// Used by copy operations to temporarily interpret a range of GPU memory as a "typed buffer". A typed buffer is
/// essentially a linear image with a caller-defined row pitch and depth pitch. Typed buffer copies do not require
/// the GPU memory objects to be created with the "typedBuffer" flag.
struct TypedBufferInfo
{
SwizzledFormat swizzledFormat; ///< The pixels in this buffer have this format.
gpusize offset; ///< Offset in bytes to the start of the copy region in the buffer's GPU memory
/// allocation.
gpusize rowPitch; ///< Offset in bytes between the same X position on two consecutive lines.
gpusize depthPitch; ///< Offset in bytes between the same X,Y position of two consecutive slices.
};
/// Specifies parameters for a copy from one region of a typed buffer to a region of the same size in a destination
/// typed buffer. Used as an input to ICmdBuffer::CmdCopyTypedBuffer().
struct TypedBufferCopyRegion
{
TypedBufferInfo srcBuffer; ///< How to interpret the source GPU memory allocation as a typed buffer.
TypedBufferInfo dstBuffer; ///< How to interpret the destination GPU memory allocation as a typed buffer.
Extent3d extent; ///< Size of the copy region in pixels.
};
/// Specifies parameters for a scaled image copy from one region in a source image subresource to a region in the
/// destination image subresource. Used as an input to ICmdBuffer::CmdScaledCopyImage.
struct ImageScaledCopyRegion
{
SubresId srcSubres; ///< Selects the source subresource.
Offset3d srcOffset; ///< Offset to the start of the chosen region in the source subresource.
SignedExtent3d srcExtent; ///< Signed size of the source region in pixels. A negative size indicates a copy
/// in the reverse direction.
SubresId dstSubres; ///< Selects the destination subresource.
Offset3d dstOffset; ///< Offset to the start of the chosen region in the destination subresource.
SignedExtent3d dstExtent; ///< Signed size of the destination region in pixels. A negative size indicates a
/// copy in the reverse direction.
uint32 numSlices; ///< Number of slices the copy will span.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 494
SwizzledFormat swizzledFormat; ///< If not Undefined, reinterpret both subresources using this format and swizzle.
/// The specified format needs to have been included in the "pViewFormats" list
/// specified at image-creation time, otherwise the result might be incorrect.
#endif
};
/// Specifies parameters for a color-space-conversion copy from one region in a source image subresource to a region in
/// a destination image subresource. Used as an input to ICmdBuffer::CmdColorSpaceConversionCopy.
struct ColorSpaceConversionRegion
{
Offset2d srcOffset; ///< Offset to the start of the chosen region in the source subresource(s).
SignedExtent2d srcExtent; ///< Signed size of the source region in pixels. A negative size indicates a copy
/// in the reverse direction.
Offset2d dstOffset; ///< Offset to the start of the chosen region in the destination subresource(s).
SignedExtent2d dstExtent; ///< Signed size of the destination region in pixels. A negative size indicates a
/// copy in the reverse direction.
SubresId rgbSubres; ///< Selects the first subresource of the RGB image where the copy will begin. This
/// can either be the source or destination of the copy, depending on whether the
/// copy is performing an RGB->YUV or YUV->RGB conversion.
uint32 yuvStartSlice; ///< Array slice of the YUV image where the copy will begin. All aspects of planar
/// YUV images will be implicitly involved in the copy. This can either be the
/// source or destination of the copy, depending on whether the copy is performing
/// an RGB->YUV or YUV->RGB conversion.
uint32 sliceCount; ///< Number of slices the copy will span.
};
/// Specifies the color-space-conversion table used when converting between YUV and RGB Image formats. Used as an input
/// to ICmdBuffer:CmdColorSpaceConversionCopy.
struct ColorSpaceConversionTable
{
float table[3][4]; ///< Values forming the conversion table matrix, which has three rows and four columns. For RGB
/// to YUV conversions, the conversion shader uses the following expressions to evaluate the
/// YUV color:
/// Y = dot( [R G B 1], [row #0] )
/// U = dot( [R G B 1], [row #1] )
/// V = dot( [R G B 1], [row #2] )
/// For YUV to RGB conversions, the conversion shader uses the following expressions to
/// evaluate the RGB color:
/// R = dot( [Y U V 1], [row #0] )
/// G = dot( [Y U V 1], [row #1] )
/// B = dot( [Y U V 1], [row #2] )
/// A fourth row is not needed because alpha is copied directly between the RGB and YUV colors.
};
/// Default color-space-conversion table usable by PAL clients when calling ICmdBuffer::CmdColorSpaceConverionCopy
/// to perform a YUV to RGB color space conversion. Represents the BT.601 standard (standard-definition TV).
extern const ColorSpaceConversionTable DefaultCscTableYuvToRgb;
/// Default color-space-conversion table usable by PAL clients when calling ICmdBuffer::CmdColorSpaceConverionCopy
/// to perform a RGB to YUV color space conversion. Represents the BT.601 standard (standard-definition TV).
extern const ColorSpaceConversionTable DefaultCscTableRgbToYuv;
/// Specifies flags controlling GPU copy behavior. Format related flags are ignored by DMA queues.
enum CopyControlFlags : uint32
{
CopyFormatConversion = 0x1, ///< Requests that the copy convert between two compatible formats. This is ignored
/// unless both formats support @ref FormatFeatureFormatConversion.
CopyRawSwizzle = 0x2, ///< If possible, raw copies will swizzle from the source channel format into the
/// destination channel format (e.g., RGBA to BGRA).
};
/// Specifies parameters for a resolve of one region in an MSAA source image to a region of the same size in a single
/// sample destination image. Used as an input to ICmdBuffer::CmdResolveImage().