-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
generate-component-docs.rb
executable file
·1775 lines (1528 loc) · 74.2 KB
/
generate-component-docs.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env ruby
# frozen_string_literal: true
begin
require 'json'
require 'tempfile'
rescue LoadError => e
puts "Load error: #{e.message}"
exit 1
end
DEBUG_LEVEL = 1
INFO_LEVEL = 2
ERROR_LEVEL = 3
LEVEL_MAPPINGS = {
'debug' => { 'numeric' => DEBUG_LEVEL, 'colored' => "\033[34mDEBUG\033[0m" },
'info' => { 'numeric' => INFO_LEVEL, 'colored' => "\033[32mINFO\033[0m" },
'error' => { 'numeric' => ERROR_LEVEL, 'colored' => "\033[31mERROR\033[0m" },
}
def numerical_level(level_str)
LEVEL_MAPPINGS.dig(level_str.downcase, 'numeric') if !level_str.nil?
end
def colored_level(level_str)
LEVEL_MAPPINGS.dig(level_str.downcase, 'colored') if !level_str.nil?
end
class Logger
def initialize
@level = numerical_level(ENV['LOG_LEVEL'] || '') || INFO_LEVEL
@is_tty = STDOUT.isatty
end
def formatted_level(level)
if @is_tty
colored_level(level)
else
level.upcase
end
end
def log(level, msg)
numeric_level = numerical_level(level)
if numeric_level >= @level
formatted_level = self.formatted_level(level)
dt = Time.now.strftime('%Y-%m-%dT%H:%M:%S')
puts "[#{dt}] #{formatted_level} #{msg}"
end
end
def debug(msg)
self.log('debug', msg)
end
def info(msg)
self.log('info', msg)
end
def error(msg)
self.log('error', msg)
end
end
@logger = Logger.new
@integer_schema_types = %w[uint int]
@number_schema_types = %w[float]
@numeric_schema_types = @integer_schema_types + @number_schema_types
# Cross-platform friendly method of finding if command exists on the current path.
#
# If the command is found, the full path to it is returned. Otherwise, `nil` is returned.
def find_command_on_path(command)
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
exts.each do |ext|
maybe_command_path = File.join(path, "#{command}#{ext}")
return maybe_command_path if File.executable?(maybe_command_path) && !File.directory?(maybe_command_path)
end
end
nil
end
@cue_binary_path = find_command_on_path('cue')
# Helpers for caching resolved/expanded schemas and detecting schema resolution cycles.
@resolved_schema_cache = {}
@expanded_schema_cache = {}
# Gets the schema of the given `name` from the resolved schema cache, if it exists.
def get_cached_resolved_schema(schema_name)
@resolved_schema_cache[schema_name]
end
# Gets the schema of the given `name` from the expanded schema cache, if it exists.
def get_cached_expanded_schema(schema_name)
@expanded_schema_cache[schema_name]
end
# Generic helpers for making working with Ruby a bit easier.
def to_pretty_json(value)
if value.is_a?(Hash)
JSON.pretty_generate(Hash[*value.sort.flatten])
else
JSON.pretty_generate(value)
end
end
def deep_copy(obj)
Marshal.load(Marshal.dump(obj))
end
def mergeable?(value)
value.is_a?(Hash) || value.is_a?(Array)
end
def _nested_merge_impl(base, override, merger)
# Handle some basic cases first.
if base.nil?
return override
elsif override.nil?
return base
elsif !mergeable?(base) && !mergeable?(override)
return override
end
deep_copy(base).merge(override.to_h, &merger)
end
def nested_merge(base, override)
merger = proc { |_, v1, v2|
if v1.is_a?(Hash) && v2.is_a?(Hash)
v1.merge(v2, &merger)
elsif v1.is_a?(Array) && v2.is_a?(Array)
v1 | v2
else
[:undefined, nil, :nil].include?(v2) ? v1 : v2
end
}
_nested_merge_impl(base, override, merger)
end
def schema_aware_nested_merge(base, override)
merger = proc { |key, v1, v2|
if v1.is_a?(Hash) && v2.is_a?(Hash)
# Special behavior for merging const schemas together so they can be properly enum-ified.
if key == 'const' && v1.has_key?('value') && v2.has_key?('value')
[v1].flatten | [v2].flatten
else
v1.merge(v2, &merger)
end
elsif v1.is_a?(Array) && v2.is_a?(Array)
v1 | v2
else
[:undefined, nil, :nil].include?(v2) ? v1 : v2
end
}
_nested_merge_impl(base, override, merger)
end
def sort_hash_nested(input)
input.keys.sort.each_with_object({}) do |key, acc|
acc[key] = if input[key].is_a?(Hash)
sort_hash_nested(input[key])
else
input[key]
end
end
end
def write_to_temp_file(prefix, data)
file = Tempfile.new(prefix)
file.write(data)
file.close
file.path
end
# Gets the JSON Schema-compatible type name for the given Ruby Value.
def json_type_str(value)
if value.is_a?(String)
'string'
elsif value.is_a?(Integer)
'integer'
elsif value.is_a?(Float)
'number'
elsif [true, false].include?(value)
'boolean'
elsif value.is_a?(Array)
'array'
elsif value.is_a?(Hash)
'object'
else
'null'
end
end
# Gets the docs-compatible type name for the given Ruby value.
#
# This is slightly different from the JSON Schema types, and is mostly an artifact of the original
# documentation design, and not representative of anything fundamental.
def docs_type_str(value)
type_str = json_type_str(value)
type_str = 'bool' if type_str == 'boolean'
type_str
end
# Gets the type of the resolved schema.
#
# If the resolved schema has more than one type defined, `nil` is returned.
def resolved_schema_type?(resolved_schema)
if resolved_schema['type'].length == 1
resolved_schema['type'].keys.first
end
end
# Gets the numeric type of the resolved schema.
#
# If the resolved schema has more than one type defined, or the type is not a numeric type, `nil` is
# returned.
def numeric_schema_type(resolved_schema)
schema_type = resolved_schema_type?(resolved_schema)
schema_type if @numeric_schema_types.include?(schema_type)
end
# Gets the docs type for the given value's type.
#
# When dealing with a source schema, and trying to get the "docs"-compatible schema type, we need to
# cross-reference the original schema with the type of the value we have on the Ruby side. While
# some types like a string will always have a type of "string", numbers represent an area where we
# need to do some digging.
#
# For example, we encode the specific class of number on the source schema -- unsigned, signed, or
# floating-point -- and we can discern nearly as much from the Ruby value itself (integer vs float),
# but we need to be able to discern the precise type i.e. the unsigned vs signed vs floating-point
# bit.
#
# This function cross-references the Ruby value given with the source schema it is associated with
# and returns the appropriate "docs" schema type for that value. If the value is not recognized, or
# if the source schema does not match the given value, `nil` is returned.
#
# Otherwise, the most precise "docs" schema type for the given value is returned.
def get_docs_type_for_value(schema, value)
# If there's no schema to check against, or there is but it has no type field, that means we're
# dealing with something like a complex, overlapping `oneOf` subschema, where we couldn't
# declaratively figure out the right field to dig into if we were discerning an integer/number
# value, and so on.
#
# We just use the detected value type in that case.
schema_instance_type = get_json_schema_instance_type(schema) unless schema.nil?
if schema.nil? || schema_instance_type.nil?
return docs_type_str(value)
end
# If the schema defines a type, see if it matches the value type. If it doesn't, that's a bad sign
# and we abort. Otherwise, we fallthrough below to make sure we're handling special cases i.e.
# numeric types.
value_type = json_type_str(value)
if value_type != schema_instance_type
@logger.error "Schema instance type and value type are a mismatch, which should not happen."
@logger.error "Schema instance type: #{schema_instance_type}"
@logger.error "Value: #{value} (type: #{value_type})"
exit 1
end
# For any numeric type, extract the value of `docs::numeric_type`, which must always be present in
# the schema for numeric fields. If the schema is `nil`, though, then it means we're dealing with
# a complex schema (like an overlapping `oneOf` subschema, etc) and we just fallback to the
# detected type.
if ['number', 'integer'].include?(value_type)
numeric_type = get_schema_metadata(schema, 'docs::numeric_type')
if numeric_type.nil?
@logger.error "All fields with numeric types should have 'docs::numeric_type' metadata included." +
"e.g. #[configurable(metadata(docs::numeric_type = \"bytes\"))]"
@logger.error "Value: #{value} (type: #{value_type})"
exit 1
end
return numeric_type
end
# TODO: The documentation should really just use `boolean` to match JSON Schema, which would let
# us get rid of this weird `json_type_str`/`docs_type_str` dichotomy.
docs_type_str(value)
end
# Gets the schema type field for the given value's type.
#
# Essentially, as we resolve a schema we end up with a hash that looks like this:
#
# { "type" => { "string" => { ... } }
#
# When we want to do something like specify a default value, or give example values, we need to set
# them on the hash that represents the actual property value type. If a schema resolves as a string
# schema, we can trivially take that default value, calculate its type, and know that we need to set
# further data under the `string` key in the above example.
#
# This gets trickier for numeric types, however, as we encode them more specifically -- unsigned
# integer, floating-point number, etc -- in the resolved schema... but can only determine (on the
# Ruby side) if a value is the `number` type. To handle this, for any value of the type `number`, we
# iteratively try and find a matching type definition in the resolved schema for any of the possible
# numeric types.
def get_json_schema_type_field_for_value(source_schema, resolved_schema, value)
value_type = get_docs_type_for_value(source_schema, value)
resolved_schema.dig('type', value_type)
end
# Tries to find the schema for an object property nested in the given schema.
#
# This function will search through either the properties of the schema itself, if it is an object
# schema, or the properties of any object subschema that is present in `oneOf`/`allOf`.
#
# If no property is found, `nil` is returned.
def find_nested_object_property_schema(schema, property_name)
# See if we're checking an object schema directly.
if !schema['properties'].nil?
return schema['properties'][property_name]
end
# The schema isn't an object schema, so check to see if it's a `oneOf`/`allOf`, and if so,
# recursively visit each of those subschemas, looking for object schemas along the way that we can
# check for the given property within.
matching_property_schemas = []
unvisited_subschemas = schema['oneOf'].dup || schema['anyOf'].dup|| schema['allOf'].dup || []
while !unvisited_subschemas.empty? do
unvisited_subschema = unvisited_subschemas.pop
# If the subschema has object properties, it won't be `oneOf`/`allOf`, so just try and grab the
# property if it exists, and move on.
if !unvisited_subschema['properties'].nil?
subschema_property = unvisited_subschema.dig('properties', property_name)
matching_property_schemas.push(subschema_property) unless subschema_property.nil?
next
end
# If the subschema had no object properties, see if it's an `oneOf`/`allOf` subschema, and if
# so, collect any of _those_ subschemas and add them to our list of subschemas to visit.
maybe_unvisited_subschemas = unvisited_subschema['oneOf'].dup || unvisited_subschema['anyOf'].dup || unvisited_subschema['allOf'].dup || []
unvisited_subschemas.concat(maybe_unvisited_subschemas) unless maybe_unvisited_subschemas.nil?
end
# Compare all matching property schemas to each other -- in their reduced form -- to see if they're
# identical. If they're not, or there were no matches, return `nil`.
#
# Otherwise, return the first matching property schema.
reduced_matching_property_schemas = matching_property_schemas.map { |schema| get_reduced_schema(schema) }
matching_property_schemas[0] unless reduced_matching_property_schemas.uniq.count != 1
end
def get_schema_metadata(schema, key)
schema.dig('_metadata', key)
end
def get_schema_ref(schema)
schema['$ref']
end
# Gets the schema type for the given schema.
def get_json_schema_type(schema)
if schema.key?('allOf')
'all-of'
elsif schema.key?('oneOf')
'one-of'
elsif schema.key?('anyOf')
'any-of'
elsif schema.key?('type')
get_json_schema_instance_type(schema)
elsif schema.key?('const')
'const'
elsif schema.key?('enum')
'enum'
end
end
def get_json_schema_instance_type(schema)
maybe_type = schema['type']
# We don't deal with null instance types at all in the documentation generation phase.
if maybe_type == 'null'
return nil
end
# If the schema specifies multiple instance types, see if `null` is one of them, and if so,
# remove it. After that, if only one value is left, return that value directly rather than
# wrapped in an array.
#
# Otherwise, return the original array.
if maybe_type.is_a?(Array)
filtered = maybe_type.reject { |instance_type| instance_type == "null" }
if filtered.length == 1
return filtered[0]
end
end
maybe_type
end
# Fixes grouped enums by adjusting the schema type where necessary.
#
# For "grouped enums", these represent the sum of all possible enum values in an `enum` schema being
# grouped by their JSON type. For example, a set of enums such as `[0, 1, 2, true, false]` would be
# grouped as:
#
# { "bool": [true, false], "number": [0, 1, 2] }
#
# This is technically correct, but in the documentation output, that `number` key needs to be `uint`
# or `int` or what have you. Since `enum` schemas don't carry the "numeric type" information, we try
# and figure that out here.
#
# If we find a `number` group, we check all of its values to see if they fit neatly within the
# bounds of any of the possible numeric types `int`, `uint`, or `float`. We try and coalesce
# towards `uint` as it's by far the most common numeric type in Vector configuration, but after
# that, we aim for `int`, unless the values are too large, in which case we'll shift up to `float`.
def fix_grouped_enums_if_numeric!(grouped_enums)
['integer', 'number'].each { |type_name|
number_group = grouped_enums.delete(type_name)
if !number_group.nil?
is_integer = number_group.all? { |n| n.is_a?(Integer) }
within_uint = number_group.all? { |n| n >= 0 && n <= 2 ** 64 }
within_int = number_group.all? { |n| n >= -(2 ** 63) && n <= (2 ** 63) - 1 }
# If the values themselves are not all integers, or they are but not all of them can fit within
# a normal 64-bit signed/unsigned integer, then we use `float` as it's the only other type that
# could reasonably satisfy the constraints.
numeric_type = if !is_integer || (!within_int && !within_uint)
'float'
else
if within_uint
'uint'
elsif within_int
'int'
else
# This should never actually happen, _but_, technically Ruby integers could be a "BigNum"
# aka arbitrary-precision integer, so this protects us if somehow we get a value that is an
# integer but doesn't actually fit neatly into 64 bits.
'float'
end
end
grouped_enums[numeric_type] = number_group
end
}
end
# Gets a schema definition from the root schema, by name.
def get_schema_by_name(root_schema, schema_name)
schema_name = schema_name.gsub(%r{#/definitions/}, '')
schema_def = root_schema.dig('definitions', schema_name)
if schema_def.nil?
@logger.error "Could not find schema definition '#{schema_name}' in given schema."
exit 1
end
schema_def
end
# Gets the dereferenced version of this schema.
#
# If the schema has no schema reference, `nil` is returned.
def dereferenced_schema(schema)
schema_name = get_schema_ref(schema)
if !schema_ref.nil?
get_schema_by_name(root_schema, schema_name)
end
end
# Applies various fields to an object property.
#
# This includes items such as any default value that is present, or whether or not the property is
# required.
def apply_object_property_fields!(parent_schema, property_schema, property_name, property)
@logger.debug "Applying object property fields for '#{property_name}'..."
required_properties = parent_schema['required'] || []
has_self_default_value = !property_schema['default'].nil?
has_parent_default_value = !parent_schema.dig('default', property_name).nil?
has_default_value = has_self_default_value || has_parent_default_value
is_required = required_properties.include?(property_name)
if has_self_default_value
@logger.debug "Property has self-defined default value: #{property_schema['default']}"
end
if has_parent_default_value
@logger.debug "Property has parent-defined default value: #{parent_schema.dig('default', property_name)}"
end
if is_required
@logger.debug "Property is marked as required."
end
# Set whether or not this property is required.
property['required'] = required_properties.include?(property_name) && !has_default_value
end
# Expands any schema references in the given schema.
#
# If the schema contains a top-level schema reference, or if any of the parts of its schema contain
# schema references (array items schema, any subschemas in `oneOf`/`allOf`, etc), then those
# references are expanded. Expansion happens recursively until all schema references
#
# For any overlapping fields in the given schema and the referenced schema, the fields from the
# given schema will win.
def expand_schema_references(root_schema, unexpanded_schema)
schema = deep_copy(unexpanded_schema)
# Grab the existing title/description from our unexpanded schema, and reset them after
# merging. This avoids us adding a title where there was only a description, and so on, since
# we have special handling rules around titles vs descriptions.
#
# TODO: If we ever just get rid of the title/description dichotomy, we could clean up this
# logic.
original_title = unexpanded_schema['title']
original_description = unexpanded_schema['description']
loop do
expanded = false
# If the schema has a top level reference, we expand it.
schema_ref = schema['$ref']
if !schema_ref.nil?
expanded_schema_ref = get_cached_expanded_schema(schema_ref)
if expanded_schema_ref.nil?
@logger.debug "Expanding top-level schema ref of '#{schema_ref}'..."
unexpanded_schema_ref = get_schema_by_name(root_schema, schema_ref)
expanded_schema_ref = expand_schema_references(root_schema, unexpanded_schema_ref)
@expanded_schema_cache[schema_ref] = expanded_schema_ref
end
schema.delete('$ref')
schema = nested_merge(expanded_schema_ref, schema)
expanded = true
end
# If the schema is an array type and has a reference for its items, we expand that.
items_ref = schema.dig('items', '$ref')
if !items_ref.nil?
expanded_items_schema_ref = expand_schema_references(root_schema, schema['items'])
schema['items'].delete('$ref')
schema['items'] = nested_merge(expanded_items_schema_ref, schema['items'])
expanded = true
end
# If the schema has any object properties, we expand those.
if !schema['properties'].nil?
schema['properties'] = schema['properties'].transform_values { |property_schema|
new_property_schema = expand_schema_references(root_schema, property_schema)
if new_property_schema != property_schema
expanded = true
end
new_property_schema
}
end
# If the schema has any `allOf`/`oneOf` subschemas, we expand those, too.
if !schema['allOf'].nil?
schema['allOf'] = schema['allOf'].map { |subschema|
new_subschema = expand_schema_references(root_schema, subschema)
if new_subschema != subschema
expanded = true
end
new_subschema
}
end
if !schema['oneOf'].nil?
schema['oneOf'] = schema['oneOf'].map { |subschema|
new_subschema = expand_schema_references(root_schema, subschema)
if new_subschema != subschema
expanded = true
end
new_subschema
}
end
if !schema['anyOf'].nil?
schema['anyOf'] = schema['anyOf'].map { |subschema|
new_subschema = expand_schema_references(root_schema, subschema)
if new_subschema != subschema
expanded = true
end
new_subschema
}
end
if !expanded
break
end
end
# If the original schema had either a title or description, we forcefully reset both of them back
# to their original state, either in terms of their value or them not existing as fields.
#
# If neither were present, we allow the merged in title/description, if any, to persist, as this
# maintains the "#[configurable(derived)]" behavior of titles/descriptions for struct fields.
if !original_title.nil? || !original_description.nil?
if !original_title.nil?
schema['title'] = original_title
else
schema.delete('title')
end
if
schema['description'] = original_description
else
schema.delete('description')
end
end
schema
end
# Gets a reduced version of a schema.
#
# The reduced version strips out extraneous fields from the given schema, such that a value should
# be returned that is suitable for comparison with other schemas, to determine if the schemas --
# specifically the values that are allowed/valid -- are the same, while ignoring things like titles
# and descriptions.
def get_reduced_schema(schema)
schema = deep_copy(schema)
allowed_properties = ['type', 'const', 'enum', 'allOf', 'oneOf', '$ref', 'items', 'properties']
schema.delete_if { |key, _value| !allowed_properties.include?(key) }
if schema.key?('items')
schema['items'] = get_reduced_schema(schema['items'])
end
if schema.key?('properties')
schema['properties'] = schema['properties'].transform_values { |property_schema| get_reduced_schema(property_schema) }
end
if schema.key?('allOf')
schema['allOf'] = schema['allOf'].map { |subschema| get_reduced_schema(subschema) }
end
if schema.key?('oneOf')
schema['oneOf'] = schema['oneOf'].map { |subschema| get_reduced_schema(subschema) }
end
schema
end
# Gets a reduced version of a resolved schema.
#
# This is similar in purpose to `get_reduced_schema` but only cares about fields relevant to a
# resolved schema.
def get_reduced_resolved_schema(schema)
schema = deep_copy(schema)
allowed_types = ['condition', 'object', 'array', 'enum', 'const', 'string', 'bool', 'float', 'int', 'uint']
allowed_fields = []
# Clear out anything not related to the type definitions first.
schema.delete_if { |key, _value| key != 'type' }
type_defs = schema['type']
if !type_defs.nil?
type_defs.delete_if { |key, _value| !allowed_types.include?(key) }
schema['type'].each { |type_name, type_def|
case type_name
when "object"
type_def.delete_if { |key, _value| key != 'options' }
type_def['options'].transform_values! { |property|
get_reduced_resolved_schema(property)
}
when "array"
type_def.delete_if { |key, _value| key != 'items' }
type_def['items'] = get_reduced_resolved_schema(type_def['items'])
else
type_def.delete_if { |key, _value| !allowed_types.include?(key) }
end
}
end
schema
end
# Fully resolves a schema definition, if it exists.
#
# This looks up a schema definition by the given `name` within `root_schema` and resolves it.
# If no schema definition exists for the given name, `nil` is returned. Otherwise, the schema
# definition is preprocessed (collapsing schema references, etc), and then resolved. Both the
# "source" schema (preprocessed value) and the resolved schema are returned.
#
# Resolved schemas are cached.
#
# See `resolve_schema` for more details.
def resolve_schema_by_name(root_schema, schema_name)
# If it's already cached, use that.
resolved = get_cached_resolved_schema(schema_name)
return deep_copy(resolved) unless resolved.nil?
# It wasn't already cached, so we actually have to resolve it.
schema = get_schema_by_name(root_schema, schema_name)
resolved = resolve_schema(root_schema, schema)
@resolved_schema_cache[schema_name] = resolved
deep_copy(resolved)
end
# Fully resolves the schema.
#
# This recursively resolves schema references, as well as flattening them into a single object, and
# transforming certain usages -- composite/enum (`allOf`, `oneOf`), etc -- into more human-readable
# forms.
def resolve_schema(root_schema, schema)
# If the schema we've been given if a schema reference, we expand that first. This happens
# recursively, such that the resulting expanded schema has no schema references left. We need this
# because in further steps, we need the access to the full input schema that was used to generate
# the resolved schema.
schema = expand_schema_references(root_schema, schema)
# Skip any schema that is marked as hidden.
#
# While this is already sort of obvious, one non-obvious usage is for certain types that we
# manually merge after this script runs, such as the high-level "outer" (`SinkOuter`, etc) types.
# Those types include a field that uses the Big Enum -- an enum with all possible components of
# that type -- which, if we resolved it here, would spit out a ton of nonsense.
#
# We mark that field hidden so that it's excluded when we resolve the schema for `SinkOuter`, etc,
# and then we individually resolve the component schemas, and merge the two (outer + component
# itself) schemas back together.
if get_schema_metadata(schema, 'docs::hidden')
@logger.debug 'Instructed to skip resolution for the given schema.'
return
end
# Handle schemas that have type overrides.
#
# In order to better represent specific field types in the documentation, we may opt to use a
# special type definition name, separate from the classic types like "bool" or "string" or
# "object", and so on, in order to let the documentation generation process inject more
# full-fledged output than we can curry from the Rust side, across the configuration schema.
#
# We intentially set no actual definition for these types, relying on the documentation generation
# process to provide the actual details. We only need to specify the custom type name.
#
# To handle u8 types as ascii characters and not there uint representation between 0 and 255 we
# added a special handling of these exact values. This means
# `#[configurable(metadata(docs::type_override = "ascii_char"))]` should only be used consciously
# for rust u8 type. See lib/codecs/src/encoding/format/csv.rs for an example and
# https://github.com/vectordotdev/vector/pull/20498
type_override = get_schema_metadata(schema, 'docs::type_override')
if !type_override.nil?
if type_override == 'ascii_char'
if !schema['default'].nil?
resolved = { 'type' => { type_override.to_s => { 'default' => schema['default'].chr } } }
else
resolved = { 'type' => { type_override.to_s => { } } }
end
else
resolved = { 'type' => { type_override.to_s => {} } }
end
description = get_rendered_description_from_schema(schema)
resolved['description'] = description unless description.empty?
return resolved
end
# Now that the schema is fully expanded and it didn't need special handling, we'll go ahead and
# fully resolve it.
resolved = resolve_bare_schema(root_schema, schema)
if resolved.nil?
return
end
# If this is an array schema, remove the description from the schema used for the items, as we
# want the description for the overall property, using this array schema, to describe everything.
items_schema = resolved.dig('type', 'array', 'items')
if !items_schema.nil?
items_schema.delete('description')
end
# Apply any necessary defaults, descriptions, etc, to the resolved schema. This must happen here
# because there could be callsite-specific overrides to defaults, descriptions, etc, for a given
# schema definition that have to be layered.
apply_schema_default_value!(schema, resolved)
apply_schema_metadata!(schema, resolved)
description = get_rendered_description_from_schema(schema)
resolved['description'] = description unless description.empty?
## Resolve the deprecated flag. An optional deprecated message can also be set in the metadata.
if schema.fetch('deprecated', false)
resolved['deprecated'] = true
message = get_schema_metadata(schema, 'deprecated_message')
if message
resolved['deprecated_message'] = message
end
end
# Reconcile the resolve schema, which essentially gives us a chance to, once the schema is
# entirely resolved, check it for logical inconsistencies, fix up anything that we reasonably can,
# and so on.
reconcile_resolved_schema!(resolved)
resolved
end
# Fully resolves a bare schema.
#
# A bare schema is one that has no references to another schema, etc.
def resolve_bare_schema(root_schema, schema)
resolved = case get_json_schema_type(schema)
when 'all-of'
@logger.debug 'Resolving composite schema.'
# Composite schemas are indeed the sum of all of their parts, so resolve each subschema and
# merge their resolved state together.
reduced = schema['allOf'].filter_map { |subschema| resolve_schema(root_schema, subschema) }
.reduce { |acc, item| nested_merge(acc, item) }
reduced['type']
when 'one-of', 'any-of'
@logger.debug 'Resolving enum schema.'
# We completely defer resolution of enum schemas to `resolve_enum_schema` because there's a
# lot of tricks and matching we need to do to suss out patterns that can be represented in more
# condensed resolved forms.
wrapped = resolve_enum_schema(root_schema, schema)
# `resolve_enum_schema` always hands back the resolved schema under the key `_resolved`, so
# that we can add extra details about the resolved schema (anything that will help us render
# it better in the docs output) on the side. That's why we have to unwrap the resolved schema
# like this.
# TODO: Do something with the extra detail (`annotations`).
wrapped.dig('_resolved', 'type')
when 'array'
@logger.debug 'Resolving array schema.'
{ 'array' => { 'items' => resolve_schema(root_schema, schema['items']) } }
when 'object'
@logger.debug 'Resolving object schema.'
# TODO: Not all objects have an actual set of properties, such as anything using
# `additionalProperties` to allow for arbitrary key/values to be set, which is why we're
# handling the case of nothing in `properties`... but we probably want to be able to better
# handle expressing this in the output.. or maybe it doesn't matter, dunno!
properties = schema['properties'] || {}
options = properties.filter_map do |property_name, property_schema|
@logger.debug "Resolving object property '#{property_name}'..."
resolved_property = resolve_schema(root_schema, property_schema)
if !resolved_property.nil?
apply_object_property_fields!(schema, property_schema, property_name, resolved_property)
@logger.debug "Resolved object property '#{property_name}'."
[property_name, resolved_property]
else
@logger.debug "Resolution failed for '#{property_name}'."
nil
end
end
# If the object schema has `additionalProperties` set, we add an additional field
# (`*`) which uses the specified schema for that field.
additional_properties = schema['additionalProperties']
if !additional_properties.nil?
@logger.debug "Handling additional properties."
# Normally, we only get here if there's a hashmap field on a struct that can act as the
# catch-all for additional properties. That field, by definition, will be required to have a
# description, and maybe will have a title.
#
# That title/description makes sense for the field itself, but when we generate this new
# wildcard property, we generally want to have something short and simple, in the singular
# form. For example, if we have a field called "labels", the title/description might talk
# about what the labels are used for, any special requirements, and so on... and then for
# the wildcard property, we might want to have the description read as "A foobar label."
# just to make the UI look nice.
#
# Rather than try and derive this from the title/description on the field, we'll require
# such a description to be provided on the Rust side via the metadata attribute shown below.
singular_description = get_schema_metadata(schema, 'docs::additional_props_description')
if singular_description.nil?
@logger.error "Missing 'docs::additional_props_description' metadata for a wildcard field.\n\n" \
"For map fields (`HashMap<...>`, etc), a description (in the singular form) must be provided by via `#[configurable(metadata(docs::additional_props_description = \"Description of the field.\"))]`.\n\n" \
"The description on the field, derived from the code comments, is shown specifically for `field`, while the description provided via `docs::additional_props_description` is shown for the special `field.*` entry that denotes that the field is actually a map."
@logger.error "Relevant schema: #{JSON.pretty_generate(schema)}"
exit 1
end
resolved_additional_properties = resolve_schema(root_schema, additional_properties)
resolved_additional_properties['required'] = true
resolved_additional_properties['description'] = singular_description
options.push(['*', resolved_additional_properties])
end
{ 'object' => { 'options' => options.to_h } }
when 'string'
@logger.debug 'Resolving string schema.'
string_def = {}
string_def['default'] = schema['default'] unless schema['default'].nil?
{ 'string' => string_def }
when 'number', 'integer'
@logger.debug 'Resolving number schema.'
numeric_type = get_schema_metadata(schema, 'docs::numeric_type') || 'number'
number_def = {}
number_def['default'] = schema['default'] unless schema['default'].nil?
@logger.debug 'Resolved number schema.'
{ numeric_type => number_def }
when 'boolean'
@logger.debug 'Resolving boolean schema.'
bool_def = {}
bool_def['default'] = schema['default'] unless schema['default'].nil?
{ 'bool' => bool_def }
when 'const'
@logger.debug 'Resolving const schema.'
# For `const` schemas, just figure out the type of the constant value so we can generate the
# resolved output.
const_type = get_docs_type_for_value(schema, schema['const'])
const_value = { 'value' => schema['const'] }
const_description = get_rendered_description_from_schema(schema)
const_value['description'] = const_description unless const_description.nil?
{ const_type => { 'const' => const_value } }
when 'enum'
@logger.debug 'Resolving enum const schema.'
# Similarly to `const` schemas, `enum` schemas are merely multiple possible constant values. Given
# that JSON Schema does allow for the constant values to differ in type, we group them all by
# type to get the resolved output.
enum_values = schema['enum']
grouped = enum_values.group_by { |value| docs_type_str(value) }
fix_grouped_enums_if_numeric!(grouped)
grouped.transform_values! { |values| { 'enum' => values } }
grouped
else
@logger.error "Failed to resolve the schema. Schema: #{schema}"
exit 1
end
{ 'type' => resolved }
end
def resolve_enum_schema(root_schema, schema)
# Figure out if this is a one-of or any-of enum schema. Both at the same time is never correct.
subschemas = if schema.key?('oneOf')
schema['oneOf']
elsif schema.key?('anyOf')
schema['anyOf']
else
@logger.error "Enum schema had both `oneOf` and `anyOf` specified. Schema: #{schema}"
exit 1
end
# Filter out all subschemas which are purely null schemas used for indicating optionality, as well
# as any subschemas that are marked as being hidden.
is_optional = get_schema_metadata(schema, 'docs::optional')
subschemas = subschemas
.reject { |subschema| subschema['type'] == 'null' }
.reject { |subschema| get_schema_metadata(subschema, 'docs::hidden') }
subschema_count = subschemas.count
# If we only have one subschema after filtering, check to see if it's an `allOf` or `oneOf` schema
# and `is_optional` is true.
#
# If it's an `allOf` subschema, then that means we originally had an `allOf` schema that we had to
# make optional, thus converting it to a `oneOf` with subschemas in the shape of `[null, allOf]`.
# In this case, we'll just remove the `oneOf` and move the `allOf` subschema up, as if it this
# schema was a `allOf` one all along.
#
# If so, we unwrap it such that we end up with a copy of `schema` that looks like it was an
# `allOf` schema all along. We do this to properly resolve `allOf` schemas that were wrapped as
# `oneOf` w/ a null schema in order to establish optionality.
if is_optional && subschema_count == 1
if get_json_schema_type(subschemas[0]) == 'all-of'
@logger.debug "Detected optional all-of schema, unwrapping all-of schema to resolve..."
# Copy the current schema and drop `oneOf` and set `allOf` with the subschema, which will get us the correct
# unwrapped structure.
unwrapped_schema = deep_copy(schema)
unwrapped_schema.delete('oneOf')
unwrapped_schema['allOf'] = deep_copy(subschemas[0]['allOf'])
return { '_resolved' => resolve_schema(root_schema, unwrapped_schema) }
else
# For all other subschema types, we copy the current schema, drop the `oneOf`, and merge the
# subschema into it. This essentially unnests the schema.
unwrapped_schema = deep_copy(schema)