-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathksy_style_guide.html
1032 lines (1030 loc) · 56.8 KB
/
ksy_style_guide.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="generator" content="Asciidoctor 2.0.23">
<meta name="author" content="Kaitai Project">
<title>KSY Style Guide</title>
<link rel="stylesheet" href="styles/pygments-default.css">
<link rel="stylesheet" href="styles/bootstrap.min.css">
<link rel="stylesheet" href="styles/bootstrap-theme.min.css">
<link rel="stylesheet" href="styles/main.css">
<link rel="stylesheet" href="styles/pygments-default.css">
<link rel="stylesheet" href="./styles/colony.css">
<link rel="stylesheet" href="styles/asciidoctor-tabs.css">
</head>
<body class="article toc2 toc-left">
<nav class="navbar navbar-inverse navbar-fixed-top" id="main-navbar">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#main-navbar-collapse" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">Kaitai Struct</span>
</div>
<div class="collapse navbar-collapse" id="main-navbar-collapse">
<ul class="nav navbar-nav">
<li class=""><a href="//kaitai.io/#what-is-it">What is it?</a></li>
<li class=""><a href="//kaitai.io/#quick-start">Quick Start</a></li>
<li class=""><a href="//kaitai.io/#download">Download</a></li>
<li class=""><a href="//kaitai.io/news/">News</a></li>
<li class=""><a href="//formats.kaitai.io/">Format Gallery</a></li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li><a href="https://ide.kaitai.io/">Try it — Web IDE</a></li>
<li class="active"><a href="index.html">Documentation</a></li>
</ul>
</div>
</div>
</nav>
<div id="header">
<h1>KSY Style Guide</h1>
<div class="details">
<span id="author" class="author">Kaitai Project</span><br>
<span id="revnumber">version 0.10</span>
</div>
<div id="toc" class="toc2">
<div id="toctitle">Table of Contents</div>
<ul class="sectlevel1">
<li><a href="#general">1. General formatting</a></li>
<li><a href="#type">2. Order of sections in a type spec</a></li>
<li><a href="#meta">3. Meta section (<code>meta</code>)</a></li>
<li><a href="#documentation">4. Documentation</a>
<ul class="sectlevel2">
<li><a href="#doc">4.1. <code>doc</code></a></li>
<li><a href="#doc-ref">4.2. <code>doc-ref</code></a></li>
</ul>
</li>
<li><a href="#seq-attr">5. Sequence attributes</a>
<ul class="sectlevel2">
<li><a href="#attr-id">5.1. Attribute identifiers (<code>id</code>)</a></li>
<li><a href="#_trailing_padding">5.2. Trailing padding</a></li>
</ul>
</li>
<li><a href="#inst-attr">6. Instance attributes</a></li>
<li><a href="#transcribing">7. Transcribing existing specs</a>
<ul class="sectlevel2">
<li><a href="#_windows_struct">7.1. Windows struct</a></li>
<li><a href="#_linux_struct">7.2. Linux struct</a></li>
<li><a href="#_c_struct_as_header">7.3. C struct as "header"</a></li>
</ul>
</li>
</ul>
</div>
</div>
<div id="content">
<div id="preamble">
<div class="sectionbody">
<div class="paragraph">
<p>Although .ksy files are treated as YAML files, and YAML syntax allows
quite a few representations of the same content, it is recommended to
maintain a certain style in .ksy files to aid collaboration. This
document serves as official .ksy style guide. In particular, we strive
to make sure that all formats in our
<a href="https://github.com/kaitai-io/kaitai_struct_formats">formats repository</a>
are using this style.</p>
</div>
<div class="paragraph">
<p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
document are to be interpreted as described in
<a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
</div>
<div class="paragraph">
<p>This document is work in progress, not all sections are complete yet.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="general">1. General formatting</h2>
<div class="sectionbody">
<div class="paragraph">
<p>MUST use:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>Single implicit YAML document per file (i.e. no <code>---</code> header).</p>
</li>
<li>
<p>No <code>%YAML x.y</code> version directives, no <code>%TAG</code> directives.</p>
</li>
<li>
<p>2-space indent.</p>
</li>
<li>
<p>UTF-8 encoding throughout the file.</p>
</li>
<li>
<p>LF (AKA "UNIX") line endings.</p>
</li>
<li>
<p>Trailing newline character in a .ksy file.</p>
</li>
<li>
<p>Block YAML style in most general cases, unless explicitly
specified/allowed otherwise.</p>
</li>
</ul>
</div>
<div class="paragraph">
<p>Formatting of sequences MUST include an indent, i.e.:</p>
</div>
<table class="tableblock frame-none grid-all stretch">
<colgroup>
<col style="width: 50%;">
<col style="width: 50%;">
</colgroup>
<tbody>
<tr>
<td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock">
<div class="title">Good</div>
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-nt">imports</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">foo</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">bar</span></code></pre>
</div>
</div></div></td>
<td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock">
<div class="title">Bad</div>
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-nt">imports</span><span class="tok-p">:</span>
<span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">foo</span>
<span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">bar</span></code></pre>
</div>
</div></div></td>
</tr>
</tbody>
</table>
<div class="paragraph">
<p>Formatting of maps-inside-sequences MUST have <code>-</code> delimiter and first
map element on the same first line, i.e.:</p>
</div>
<table class="tableblock frame-none grid-all stretch">
<colgroup>
<col style="width: 50%;">
<col style="width: 50%;">
</colgroup>
<tbody>
<tr>
<td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock">
<div class="title">Good</div>
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">foo</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u1</span>
<span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">bar</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u2</span>
<span class="tok-w"> </span><span class="tok-nt">enum</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">baz</span></code></pre>
</div>
</div></div></td>
<td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock">
<div class="title">Bad</div>
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-p tok-p-Indicator">-</span>
<span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">foo</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u1</span>
<span class="tok-p tok-p-Indicator">-</span>
<span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">bar</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u2</span>
<span class="tok-w"> </span><span class="tok-nt">enum</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">baz</span></code></pre>
</div>
</div></div></td>
</tr>
</tbody>
</table>
<div class="paragraph">
<p>All identifiers, docstrings, comments and generally all human-readable
text SHOULD be kept in English, unless there’s a very good reason not
to do so.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="type">2. Order of sections in a type spec</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Use the following order of sections:</p>
</div>
<div class="olist arabic">
<ol class="arabic">
<li>
<p><code>meta</code>, if present, MUST go first</p>
</li>
<li>
<p><code>doc</code></p>
</li>
<li>
<p><code>doc-ref</code></p>
</li>
<li>
<p><code>seq</code></p>
</li>
<li>
<p><code>instances</code>, <code>types</code>, <code>enums</code> — use one’s best judgement to order
these 3 to maximize readability</p>
</li>
</ol>
</div>
</div>
</div>
<div class="sect1">
<h2 id="meta">3. Meta section (<code>meta</code>)</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Meta section is mostly used to specify additional ("meta") information
for a particular file format, so 99% of time it is only present for
top-level type. In some cases, meta section might be specified in
intermediate types as well, for example, to switch default endianness
or encoding. Thus, style recommendations for "top-level" meta sections
and "intermediate" meta sections are different, as they serve slightly
different purposes.</p>
</div>
<div class="paragraph">
<p>For top-level meta section, use the following order of keys:</p>
</div>
<div class="olist arabic">
<ol class="arabic">
<li>
<p><code>id</code> — main identifier, MUST match the name of the <code>.ksy</code> file</p>
</li>
<li>
<p>Human-readable meta-information (in order of importance, at least
one MUST be present):</p>
<div class="olist loweralpha">
<ol class="loweralpha" type="a">
<li>
<p><code>title</code></p>
</li>
<li>
<p><code>application</code> — SHOULD name a particular application, if there’s
any; if there are too many to list (for example, network packet
formats or executables are used virtually everywhere), then one
SHOULD omit this field.</p>
</li>
<li>
<p><code>file-extension</code> — if there’s only one extension, MUST be a
string; if there are several, MUST be an sequence in block form
and SHOULD order extensions from most popular extension to least
popular.</p>
</li>
<li>
<p><code>xref</code> — keys inside MUST be in alphabetic order</p>
</li>
<li>
<p><code>tags</code> — values inside MUST be in alphabetic order</p>
</li>
</ol>
</div>
</li>
<li>
<p>Legal information — <code>license</code> — MUST be a valid
<a href="https://spdx.org/licenses/">SPDX license expression</a></p>
</li>
<li>
<p>Processing instructions:</p>
<div class="olist loweralpha">
<ol class="loweralpha" type="a">
<li>
<p><code>ks-version</code> — SHOULD list lowest possible KS compiler version that
is able to compile this file.</p>
</li>
<li>
<p><code>imports</code></p>
</li>
</ol>
</div>
</li>
<li>
<p>Defaults (in alphabetic order):</p>
<div class="olist loweralpha">
<ol class="loweralpha" type="a">
<li>
<p><code>encoding</code></p>
</li>
<li>
<p><code>endian</code></p>
</li>
</ol>
</div>
</li>
</ol>
</div>
<div class="paragraph">
<p>For intermediate-level meta sections, use the following order of keys:</p>
</div>
<div class="olist arabic">
<ol class="arabic">
<li>
<p>Defaults (in alphabetic order):</p>
<div class="olist loweralpha">
<ol class="loweralpha" type="a">
<li>
<p><code>encoding</code></p>
</li>
<li>
<p><code>endian</code></p>
</li>
</ol>
</div>
</li>
</ol>
</div>
<div class="admonitionblock note">
<table>
<tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">
KS syntax allows usage of some top-level elements deep inside
the hierarchy — this can be useful during development, for example,
for purpose of grafting one .ksy file into another quickly. However,
in production-quality .ksy files, one MUST NOT use keys like <code>title</code>,
<code>imports</code> or <code>ks-version</code> (i.e. everything except explicitly listed in
a list above) on intermediate levels.
</td>
</tr>
</table>
</div>
<div class="paragraph">
<p>The following keys are reserved for internal use (i.e. debugging and
test running) and MUST NOT be used in general-purpose .ksy files:</p>
</div>
<div class="ulist">
<ul>
<li>
<p><code>ks-debug</code></p>
</li>
<li>
<p><code>ks-opaque-types</code></p>
</li>
</ul>
</div>
</div>
</div>
<div class="sect1">
<h2 id="documentation">4. Documentation</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="doc">4.1. <code>doc</code></h3>
<div class="paragraph">
<p>Formatting:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>Single-line documentation strings SHOULD BE formatted using raw
unquoted string literals.</p>
</li>
<li>
<p>Multi-line SHOULD BE formatted using
<a href="https://yaml.org/spec/1.2/spec.html#id2795688">YAML literal style
scalar</a>, i.e. using <code>: |</code> syntax. An example:</p>
</li>
</ul>
</div>
<div class="listingblock">
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-nt">doc</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-p tok-p-Indicator">|</span>
<span class="tok-w"> </span><span class="tok-no">File index entry contains intricate details about file in the</span>
<span class="tok-w"> </span><span class="tok-no">archive: there are both meta-information attributes (such as file</span>
<span class="tok-w"> </span><span class="tok-no">names, locations, various timestamps, etc) and references to</span>
<span class="tok-w"> </span><span class="tok-no">inodes, which can be used to find file body in the container.</span>
<span class="tok-w"> </span><span class="tok-no">For networked locations, file index entry uses an optional</span>
<span class="tok-w"> </span><span class="tok-no">`remote_resource` type. Proper usage sequence is:</span>
<span class="tok-w"> </span><span class="tok-no">* check `code` to be one that requires network usage</span>
<span class="tok-w"> </span><span class="tok-no">* determine file name using `name_networked` instance and check if</span>
<span class="tok-w"> </span><span class="tok-no">it's really a file requested by the user</span>
<span class="tok-w"> </span><span class="tok-no">* proceed to query information from networked resource given by</span>
<span class="tok-w"> </span><span class="tok-no">`resource` attribute</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>Lines should be wrapped to be 80 columns long. If it doesn’t fit into
single line after wrapping, then it’s a multi-line docstring, so use
proper multi-line syntax.</p>
</div>
<div class="paragraph">
<p>There is no formal conversion of docstrings into language-specific
docstrings now in KS, but generally we SHOULD keep it close to
<a href="https://commonmark.org/">CommonMark formatting</a>, i.e.:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>paragraphs separated by an empty line</p>
</li>
<li>
<p>bullet lists created by an asterisk <code>*</code> and a space at the beginning
of the line</p>
</li>
<li>
<p>use backticks <code>`</code> to wrap identifiers and small pieces of
code</p>
</li>
</ul>
</div>
<div class="paragraph">
<p>TODO: documentation contents, what should and should no be included</p>
</div>
</div>
<div class="sect2">
<h3 id="doc-ref">4.2. <code>doc-ref</code></h3>
<div class="paragraph">
<p>TODO</p>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="seq-attr">5. Sequence attributes</h2>
<div class="sectionbody">
<div class="paragraph">
<p>When specifying an attribute, one MUST use the following order of keys:</p>
</div>
<div class="olist arabic">
<ol class="arabic">
<li>
<p>Identifier(s)</p>
<div class="olist loweralpha">
<ol class="loweralpha" type="a">
<li>
<p><code>id</code></p>
</li>
<li>
<p><code>-orig-id</code> — use to specify original ID spelling if transcribing a
structure from existing software and/or official spec</p>
</li>
</ol>
</div>
</li>
<li>
<p><code>size</code></p>
</li>
<li>
<p><code>size-eos</code></p>
</li>
<li>
<p><code>type</code></p>
</li>
<li>
<p>Type-related keys:</p>
<div class="olist loweralpha">
<ol class="loweralpha" type="a">
<li>
<p><code>enum</code></p>
</li>
<li>
<p><code>contents</code></p>
</li>
<li>
<p><code>pad-right</code></p>
</li>
<li>
<p><code>terminator</code></p>
</li>
<li>
<p><code>include</code></p>
</li>
<li>
<p><code>consume</code></p>
</li>
<li>
<p><code>eos-error</code></p>
</li>
<li>
<p><code>encoding</code></p>
</li>
</ol>
</div>
</li>
<li>
<p><code>process</code></p>
</li>
<li>
<p>Repetition-related keys:</p>
<div class="olist loweralpha">
<ol class="loweralpha" type="a">
<li>
<p><code>repeat</code></p>
</li>
<li>
<p><code>repeat-eos</code>, <code>repeat-expr</code>, <code>repeat-until</code></p>
</li>
</ol>
</div>
</li>
<li>
<p><code>if</code></p>
</li>
<li>
<p><code>doc</code></p>
</li>
<li>
<p><code>doc-ref</code></p>
</li>
</ol>
</div>
<div class="paragraph">
<p>Every key is optional. Attributes SHOULD have at least <code>id</code> and <code>doc</code>
— however, see below for notes about omitting <code>id</code>, and <code>doc</code> SHOULD
NOT be included if it’s trivial (i.e. if it is a copy of <code>id</code>, and
there is really nothing more to say about that attribute).</p>
</div>
<div class="sect2">
<h3 id="attr-id">5.1. Attribute identifiers (<code>id</code>)</h3>
<div class="paragraph">
<p>KS enforces specific identifier style in the language -
<code>lower_underscore_case</code> (it is needed to be able to convert to other
styles of identifier spelling, like <code>UpperCamelCase</code> or
<code>lowerCamelCase</code>, which some target languages use).</p>
</div>
<div class="paragraph">
<p>KS allows omitting <code>id</code>. One MUST omit <code>id</code> to mark up attributes of
unknown/undetermined purpose, i.e. unfinished reverse engineering
work. One MUST NOT omit <code>id</code> to mark up reserved/unused attributes and
padding, i.e. placeholder that are known to be empty and unused.</p>
</div>
<div class="paragraph">
<p>One SHOULD use the following rules to maintain consistency across
various KSY files. Doing that would maintain the "principle of least
surprise" and make life easier to end-users, reducing amount of
guesswork.</p>
</div>
<div class="ulist">
<ul>
<li>
<p>For simple non-repeated fields, use a simple singular form —
e.g. <code>width</code>, <code>header</code>, <code>transaction_id</code>, <code>file</code>.</p>
</li>
<li>
<p>For an array of objects (i.e. with <code>repeat: something</code>), use plural
form — e.g. <code>files</code>, <code>transactions</code>.</p>
</li>
<li>
<p>Don’t be overly verbose: use commonly understood abbreviations
liberally, if it will improve readability — e.g. <code>src_mac</code> or
<code>src_mac_addr</code> instead of <code>source_media_access_control_address</code></p>
</li>
<li>
<p>For fields that are designed to be used to detect file type (AKA
"magic values"), use <code>magic</code> name, or, if there are several of them,
<code>magic1</code>, <code>magic2</code>, etc.</p>
</li>
<li>
<p>For reserved fields which are <strong>known</strong> to be unused, use <code>reserved</code>
name (or <code>reserved1</code>, <code>reserved2</code>, etc, if there are many of them)</p>
</li>
<li>
<p>For fields that designate <strong>number / count</strong> of something (in
particular, number of repetitions of some other structure), use
<code>num_</code> prefix and plural form — i.e. <code>num_questions</code>, <code>num_blocks</code>,
<code>num_nodes</code></p>
</li>
<li>
<p>For fields that designate <strong>offset</strong> to some particular data structure,
use <code>ofs_</code> prefix and name of that data structure (as it would appear
in the file) — i.e. <code>ofs_block</code>, <code>ofs_queries</code>, <code>ofs_path</code></p>
</li>
<li>
<p>For fields that designate <strong>size</strong> of some particular data structure
(in bytes or some other fixed units), use <code>len_</code> prefix and name of
that data structure — i.e. <code>len_block</code> (length of a single <code>block</code>
entry), <code>len_blocks</code> (total length of whole <code>blocks</code> array, made of
<code>block</code> entries).</p>
</li>
</ul>
</div>
<div class="admonitionblock note">
<table>
<tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">
See <a href="#transcribing">Transcribing existing specs</a> for more info on preserving / renaming
identifiers when transcribing existing spec into KSY.
</td>
</tr>
</table>
</div>
</div>
<div class="sect2">
<h3 id="_trailing_padding">5.2. Trailing padding</h3>
<div class="paragraph">
<p>If you’re using a size-limited substream for a structure, one MUST NOT
specify manually calculated or auto-calculated extra padding to make
structure consume whole substream. Just omit it — it will save memory
and CPU time on parsing.</p>
</div>
<table class="tableblock frame-none grid-all stretch">
<colgroup>
<col style="width: 33.3333%;">
<col style="width: 33.3333%;">
<col style="width: 33.3334%;">
</colgroup>
<tbody>
<tr>
<td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock">
<div class="title">Good</div>
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">header</span>
<span class="tok-w"> </span><span class="tok-nt">size</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">64</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">block</span>
<span class="tok-nt">types</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">block</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">param1</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">param2</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u2</span></code></pre>
</div>
</div></div></td>
<td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock">
<div class="title">Bad</div>
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">header</span>
<span class="tok-w"> </span><span class="tok-nt">size</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">64</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">block</span>
<span class="tok-nt">types</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">block</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">param1</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">param2</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u2</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">padding</span>
<span class="tok-w"> </span><span class="tok-nt">size-eos</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">true</span></code></pre>
</div>
</div></div></td>
<td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock">
<div class="title">Worst</div>
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">header</span>
<span class="tok-w"> </span><span class="tok-nt">size</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">64</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">block</span>
<span class="tok-nt">types</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">block</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">param1</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">param2</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u2</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">padding</span>
<span class="tok-w"> </span><span class="tok-nt">size</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">58</span>
<span class="tok-w"> </span><span class="tok-c1"># 64 - 4 - 2</span></code></pre>
</div>
</div></div></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<div class="sect1">
<h2 id="inst-attr">6. Instance attributes</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Instance attribute use the superset of keys which are allowed in
sequence attributes (except for <code>id</code>), thus all ordering rules apply
here as well. Keys MUST appear in this order:</p>
</div>
<div class="olist arabic">
<ol class="arabic">
<li>
<p><code>-orig-id</code> (optional)</p>
</li>
<li>
<p><code>io</code> (optional)</p>
</li>
<li>
<p><code>pos</code> or <code>value</code></p>
</li>
<li>
<p>All other keys (except for <code>id</code> and <code>-orig-id</code>), in order specified
in <a href="#seq-attr">Sequence attributes</a></p>
</li>
</ol>
</div>
</div>
</div>
<div class="sect1">
<h2 id="transcribing">7. Transcribing existing specs</h2>
<div class="sectionbody">
<div class="paragraph">
<p>When transcribing structures already described in some other existing
spec or software, note that it’s not necessary to copy existing
identifiers to <code>id</code> keys (in verbatim or modified form) or even
maintain same structures as types.</p>
</div>
<div class="paragraph">
<p>The rationale of doing so is that a lot of existing specs rely on
particular standards and approaches of some target language and/or
platform. Sometimes, existing specs are burdened by some legacy
(i.e. they are obliged to maintain names of fields, even when its true
purpose was extended since its introduction for compatibility with
older software). KS, on the other hand, is cross-platform and
cross-language, thus it is not necessary (and in many cases, it’s just
impossible) to stick to single platform’s style. And KS-provide API is
to be used by new software anyway, so you don’t usually need to be
concerned with legacy compatibility.</p>
</div>
<div class="paragraph">
<p>Use <code>-orig-id</code> key to specify original names of fields for purposes of
maintaining a reference link to parts of original spec, but,
otherwise, feel free to use a more consistent and language-neutral
approach in naming attributes and types.</p>
</div>
<div class="sect2">
<h3 id="_windows_struct">7.1. Windows struct</h3>
<div class="paragraph">
<p>For example, consider this
<a href="https://docs.microsoft.com/en-us/windows/win32/api/mmiscapi/ns-mmiscapi-mmckinfo">MMCKINFO
Windows structure</a>, as specified in MSDN:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="pygments highlight"><code data-lang="cpp"><span></span><span class="tok-k">typedef</span><span class="tok-w"> </span><span class="tok-k">struct</span><span class="tok-w"> </span><span class="tok-p">{</span>
<span class="tok-w"> </span><span class="tok-n">FOURCC</span><span class="tok-w"> </span><span class="tok-n">ckid</span><span class="tok-p">;</span>
<span class="tok-w"> </span><span class="tok-n">DWORD</span><span class="tok-w"> </span><span class="tok-n">cksize</span><span class="tok-p">;</span>
<span class="tok-w"> </span><span class="tok-n">FOURCC</span><span class="tok-w"> </span><span class="tok-n">fccType</span><span class="tok-p">;</span>
<span class="tok-w"> </span><span class="tok-n">DWORD</span><span class="tok-w"> </span><span class="tok-n">dwDataOffset</span><span class="tok-p">;</span>
<span class="tok-w"> </span><span class="tok-n">DWORD</span><span class="tok-w"> </span><span class="tok-n">dwFlags</span><span class="tok-p">;</span>
<span class="tok-p">}</span><span class="tok-w"> </span><span class="tok-n">MMCKINFO</span><span class="tok-p">;</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>It is pretty inconsistent:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>Some fields use abbreviated lower case (<code>ckid</code> — as it stands for
"chunk ID"), others use upper camel case (<code>dwDataOffset</code>).</p>
</li>
<li>
<p>Some fields use so-called "Hungarian notation", i.e. prepending type
information before identifier (i.e. <code>fccType</code> = "type, four-CC",
<code>dwDataOffset</code> = "data offset, double word"), some don’t (<code>ckid</code>,
<code>cksize</code>).</p>
</li>
<li>
<p>Some abbreviations are very brief (<code>ckid</code>), some are pretty verbose
(<code>dwDataOffset</code>).</p>
</li>
<li>
<p>Actually, <code>dwDataOffset</code> and <code>cksize</code> specify offset and size of the
same data structure (called "chunk’s data member" in human-readable
annotation).</p>
</li>
</ul>
</div>
<div class="paragraph">
<p>Also, this definition does not specify flag values (i.e. in a C struct
union syntax), but instead relies of flag constant definitions
elsewhere, which is also pretty inconvenient.</p>
</div>
<div class="paragraph">
<p>Recommended way to lay out that structure in KS would be something like that:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">chunk_id</span>
<span class="tok-w"> </span><span class="tok-nt">-orig-id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">ckid</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span>
<span class="tok-w"> </span><span class="tok-nt">enum</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">four_cc</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">len_data</span>
<span class="tok-w"> </span><span class="tok-nt">-orig-id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">cksize</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">type</span>
<span class="tok-w"> </span><span class="tok-nt">-orig-id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">fccType</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span>
<span class="tok-w"> </span><span class="tok-nt">enum</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">four_cc</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">ofs_data</span>
<span class="tok-w"> </span><span class="tok-nt">-orig-id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">dwDataOffset</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">flags</span>
<span class="tok-w"> </span><span class="tok-nt">-orig-id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">dwFlags</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">flags</span>
<span class="tok-w"> </span><span class="tok-nt">size</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">4</span>
<span class="tok-nt">instances</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">data</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">pos</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">ofs_data</span>
<span class="tok-w"> </span><span class="tok-nt">size</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">len_data</span>
<span class="tok-nt">types</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">flags</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-c1"># add a comprehensive type that describes flags here</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>Note that we’ve clearly separated names and types here, used standard
<code>ofs_</code> and <code>len_</code> prefixes for referencing offset and length of a
particular structure (named "data", short for "chunk’s data member",
in this case). Also, we’ve added <code>data</code> instance to access that
structure directly.</p>
</div>
</div>
<div class="sect2">
<h3 id="_linux_struct">7.2. Linux struct</h3>
<div class="paragraph">
<p>Another example is ELF executable header, as specified in elf.h in
Linux:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="pygments highlight"><code data-lang="c"><span></span><span class="tok-k">typedef</span><span class="tok-w"> </span><span class="tok-k">struct</span>
<span class="tok-p">{</span>
<span class="tok-w"> </span><span class="tok-kt">unsigned</span><span class="tok-w"> </span><span class="tok-kt">char</span><span class="tok-w"> </span><span class="tok-n">e_ident</span><span class="tok-p">[</span><span class="tok-n">EI_NIDENT</span><span class="tok-p">];</span><span class="tok-w"> </span><span class="tok-cm">/* Magic number and other info */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Half</span><span class="tok-w"> </span><span class="tok-n">e_type</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Object file type */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Half</span><span class="tok-w"> </span><span class="tok-n">e_machine</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Architecture */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Word</span><span class="tok-w"> </span><span class="tok-n">e_version</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Object file version */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Addr</span><span class="tok-w"> </span><span class="tok-n">e_entry</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Entry point virtual address */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Off</span><span class="tok-w"> </span><span class="tok-n">e_phoff</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Program header table file offset */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Off</span><span class="tok-w"> </span><span class="tok-n">e_shoff</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Section header table file offset */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Word</span><span class="tok-w"> </span><span class="tok-n">e_flags</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Processor-specific flags */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Half</span><span class="tok-w"> </span><span class="tok-n">e_ehsize</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* ELF header size in bytes */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Half</span><span class="tok-w"> </span><span class="tok-n">e_phentsize</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Program header table entry size */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Half</span><span class="tok-w"> </span><span class="tok-n">e_phnum</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Program header table entry count */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Half</span><span class="tok-w"> </span><span class="tok-n">e_shentsize</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Section header table entry size */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Half</span><span class="tok-w"> </span><span class="tok-n">e_shnum</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Section header table entry count */</span>
<span class="tok-w"> </span><span class="tok-n">Elf32_Half</span><span class="tok-w"> </span><span class="tok-n">e_shstrndx</span><span class="tok-p">;</span><span class="tok-w"> </span><span class="tok-cm">/* Section header string table index */</span>
<span class="tok-p">}</span><span class="tok-w"> </span><span class="tok-n">Elf32_Ehdr</span><span class="tok-p">;</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>This one is less inconsistent, but still could be improved:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>It uses its own convention for specifying "offset", "size" and
"count" attributes.</p>
</li>
<li>
<p>It prepends <code>e_</code> prefix to every element, which would serve
absolutely no purpose in KS</p>
</li>
<li>
<p>It uses its own non-standard system of types (<code>Elf32_Half</code>,
<code>Elf32_Word</code>, <code>Elf32_Addr</code>, etc)</p>
</li>
<li>
<p><code>e_ident</code> actually is a complex 16-byte multi-member structure,
which includes 4 bytes of magic number to identify a file format and
12 bytes worth of extra fields</p>
</li>
<li>
<p>Abbreviations are way too short (i.e. <code>ph</code> for "program header",
<code>sh</code> for "section header", <code>eh</code> for "ELF header") for a casual user
to understand its meaning without a documentation lookup. This can
be easily remedied by using slightly more verbose names.</p>
</li>
</ul>
</div>
<div class="paragraph">
<p>Thus, the recommended way to represent it would be:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">magic</span>
<span class="tok-w"> </span><span class="tok-nt">-orig-id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">e_ident</span>
<span class="tok-w"> </span><span class="tok-nt">contents</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">0x7f</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-s">"ELF"</span>
<span class="tok-w"> </span><span class="tok-nt">doc</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">Magic number</span>
<span class="tok-w"> </span><span class="tok-c1"># add extra members for these 12 bytes here</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">file_type</span>
<span class="tok-w"> </span><span class="tok-nt">-orig-id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">e_type</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u2</span>
<span class="tok-w"> </span><span class="tok-nt">doc</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">Object file type</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">machine</span>
<span class="tok-w"> </span><span class="tok-nt">-orig-id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">e_machine</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u2</span>
<span class="tok-w"> </span><span class="tok-nt">doc</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">Architecture</span>
<span class="tok-w"> </span><span class="tok-c1"># ...</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">ofs_program_headers</span>
<span class="tok-w"> </span><span class="tok-nt">-orig-id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">e_phoff</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span>
<span class="tok-w"> </span><span class="tok-nt">doc</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">Program header table file offset</span>
<span class="tok-w"> </span><span class="tok-c1"># ...</span>
<span class="tok-nt">instances</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">program_headers</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">pos</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">ofs_program_headers</span>
<span class="tok-w"> </span><span class="tok-nt">repeat</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">expr</span>
<span class="tok-w"> </span><span class="tok-nt">repeat-expr</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">num_program_headers</span>
<span class="tok-w"> </span><span class="tok-nt">size</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">len_program_header</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">program_header</span></code></pre>
</div>
</div>
</div>
<div class="sect2">
<h3 id="_c_struct_as_header">7.3. C struct as "header"</h3>
<div class="paragraph">
<p>Sometimes existing implementations use structures where they are
actually not necessary. This is, again, frequently done to satisfy
constraints of particular implementation, like C struct being of a
fixed size. KS does not have these restrictions, so in some cases one
can embed C struct "headers" right into the type and it would be
totally ok.</p>
</div>
<div class="paragraph">
<p>For example, consider the following description:</p>
</div>
<div class="quoteblock">
<blockquote>
<div class="paragraph">
<p>Image file starts with a header, which consists of:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>4 bytes - magic number, must be 0x11335577</p>
</li>
<li>
<p>4 bytes integer - width of image in pixels</p>
</li>
<li>
<p>4 bytes integer - height of image in pixels</p>
</li>
</ul>
</div>
<div class="paragraph">
<p>Then raw image data follows, width * height bytes.</p>
</div>
</blockquote>
</div>
<div class="paragraph">
<p>Naive C implementation of this format would likely split this format
into a "header" structure and a "body", header being declared as:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="pygments highlight"><code data-lang="c"><span></span><span class="tok-k">typedef</span><span class="tok-w"> </span><span class="tok-k">struct</span><span class="tok-w"> </span><span class="tok-p">{</span>
<span class="tok-w"> </span><span class="tok-kt">uint32_t</span><span class="tok-w"> </span><span class="tok-n">magic</span><span class="tok-p">;</span>
<span class="tok-w"> </span><span class="tok-kt">uint32_t</span><span class="tok-w"> </span><span class="tok-n">width</span><span class="tok-p">;</span>
<span class="tok-w"> </span><span class="tok-kt">uint32_t</span><span class="tok-w"> </span><span class="tok-n">height</span><span class="tok-p">;</span>
<span class="tok-p">}</span><span class="tok-w"> </span><span class="tok-n">image_header_t</span><span class="tok-p">;</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>Straightforward conversion of that structures would result in:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="pygments highlight"><code data-lang="yaml"><span></span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">header</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">image_header</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">image_data</span>
<span class="tok-w"> </span><span class="tok-nt">size</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">header.width * header.height</span>
<span class="tok-nt">types</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">image_header</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-nt">seq</span><span class="tok-p">:</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">magic</span>
<span class="tok-w"> </span><span class="tok-nt">contents</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-p tok-p-Indicator">[</span><span class="tok-nv">0x11</span><span class="tok-p tok-p-Indicator">,</span><span class="tok-w"> </span><span class="tok-nv">0x33</span><span class="tok-p tok-p-Indicator">,</span><span class="tok-w"> </span><span class="tok-nv">0x55</span><span class="tok-p tok-p-Indicator">,</span><span class="tok-w"> </span><span class="tok-nv">0x77</span><span class="tok-p tok-p-Indicator">]</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">width</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span>
<span class="tok-w"> </span><span class="tok-p tok-p-Indicator">-</span><span class="tok-w"> </span><span class="tok-nt">id</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">height</span>
<span class="tok-w"> </span><span class="tok-nt">type</span><span class="tok-p">:</span><span class="tok-w"> </span><span class="tok-l tok-l-Scalar tok-l-Scalar-Plain">u4</span></code></pre>
</div>