From 3f68c3c3df60dd92d8f43b61c878cb3b8edbc0cf Mon Sep 17 00:00:00 2001 From: Andrei Litvin Date: Fri, 18 Feb 2022 08:14:59 -0500 Subject: [PATCH] Document codegen python code (#15306) * Add comments to build-time generator logic. Add some code comments and slight cleanup to idl generator logic. * Add a readme.md * Restyled by prettier-markdown * Fix some typos * Another typo fix * More typos from misspell check * Update wordlist with IDL content ... spellcheck on code makes me sad * Remove typo of octet * add back apparently used filter. Did not realise it is used * Do not add misspelled words to wordlist * Fix one more typo * Fix typo Co-authored-by: Restyled.io --- .github/.wordlist.txt | 40 ++++- scripts/codegen.py | 7 +- scripts/idl/README.md | 190 ++++++++++++++++++++++++ scripts/idl/generators/__init__.py | 30 +++- scripts/idl/generators/filters.py | 4 + scripts/idl/generators/java/__init__.py | 62 ++++++-- scripts/idl/generators/types.py | 117 +++++++++++++-- scripts/idl/matter_idl_parser.py | 30 +++- 8 files changed, 442 insertions(+), 38 deletions(-) create mode 100644 scripts/idl/README.md diff --git a/.github/.wordlist.txt b/.github/.wordlist.txt index 9a6002b4a82d82..dca0323454552f 100644 --- a/.github/.wordlist.txt +++ b/.github/.wordlist.txt @@ -1,4 +1,3 @@ - 14 15 16 @@ -17,6 +16,7 @@ ABI ABIs ables AccessControl +AccessControlEntry accessor AccountLogin acdbc @@ -48,6 +48,7 @@ amebad amebaiot announcementReason AnnounceOTAProvider +AnnounceOtaProviderRequest APIs apk AppConfig @@ -74,6 +75,7 @@ armv ASAN asdk AssertionError +AST ASYNC atomics att @@ -89,6 +91,7 @@ ATW ATWC AudioOutput auth +AuthMode autocompletion autoconnect autocrlf @@ -226,6 +229,8 @@ CMVH cn codeaurora codebase +codegen +CodeGenerator CodeLab codelabs ColorControl @@ -246,12 +251,14 @@ ConnectIP ConnectivityManager ConnectivityManagerImpl ConnectNetwork +ConnectNetworkRequest +ConnectNetworkResponse connstring conntype const ContentApp -ContentApp's ContentAppPlatform +ContentApp's ContentLaunch ContentLauncher continuousHinting @@ -301,9 +308,11 @@ DCL DCMAKE DCONFIG debianutils +debugText DEDEDEDE deepnote DefaultOTAProviders +DefaultSuccess definedValue DehumidificationControl DelayedActionTime @@ -413,8 +422,10 @@ endsWith eno entrypoint enum +Enums env eq +errorValue esd ESPPORT Espressif @@ -434,6 +445,7 @@ exe ExecStart executables ExtendedPAN +ExtensionEntry extern extpanid FabricId @@ -496,7 +508,6 @@ getManualTests getstarted getTests GH -gh ghp githubusercontent gitignore @@ -548,6 +559,8 @@ ibb ICA ICMP IDF +IDL +IDLs idx ifdef ifdefs @@ -608,18 +621,24 @@ jre js json JTAG -jupyter Jupyter jupyterlab KA +kBusy +kCase Kconfig KeypadInput +kGroup kInvalidCommandId KitProg kNodeIdNotSpecified knownissues +kPAKEParameterError +kPase KVS +kWindowNotOpen LabelList +LabelStruct launchable LAUNCHXL ldflags @@ -764,6 +783,7 @@ MX mydir MyPASSWORD MySSID +namespacing nano natively navpad @@ -794,6 +814,7 @@ nrfdks nrfutil nrfxlib NTAG +nullable nullptr NUM nwk @@ -852,6 +873,7 @@ PacketBuffer PAI PairDevice PAKE +palletsprojects pankore param params @@ -952,6 +974,7 @@ ReadConfigValue readelf readfds README +readonly readthedocs Reag rebase @@ -966,17 +989,17 @@ Rendez RendezvousInformation RendezvousParameters RendezVousTest -repl REPL repo req Requestor -Requestor's RequestorCanConsent +Requestor's Requestors responder retargeting reusability +reviwed rfid rfids RGB @@ -1012,8 +1035,8 @@ ScriptBinding SDC SDHC SDK -SDK's sdkconfig +SDK's SDKs SDKTARGETSYSROOT sdl @@ -1071,6 +1094,7 @@ SSID startoffset StartScan startsWith +StatusCode stderr stdout sterm @@ -1117,6 +1141,7 @@ TCP teardown Telink TemperatureMeasurement +templating testability TestArray TestCluster @@ -1181,7 +1206,6 @@ TXD txt UART udc -UDC udcport udhcpc UDP diff --git a/scripts/codegen.py b/scripts/codegen.py index 2712495d5d4fb2..c912ec0b3623bc 100755 --- a/scripts/codegen.py +++ b/scripts/codegen.py @@ -31,6 +31,11 @@ class CodeGeneratorTypes(enum.Enum): + """ + Represents every generator type supported by codegen and maps + the simple enum value (user friendly and can be a command line input) + into underlying generators. + """ JAVA = enum.auto() def CreateGenerator(self, *args, **kargs): @@ -42,7 +47,7 @@ def CreateGenerator(self, *args, **kargs): class ListGeneratedFilesStorage(GeneratorStorage): """ - Output a list of files to be generated + A storage that prints out file names that would have content in them. """ def get_existing_data(self, relative_path: str): diff --git a/scripts/idl/README.md b/scripts/idl/README.md new file mode 100644 index 00000000000000..6ad11b511f7d2f --- /dev/null +++ b/scripts/idl/README.md @@ -0,0 +1,190 @@ +# IDL based codegen + +## What is a matter IDL + +A matter IDL is a text-file that aims to be a concise representation of data +structures, cluster definitions and bindings. It is intended for human +readability (has to be clear and concise, supports comments) as well as machine +parsing (well defined syntax, not free form). + +The actual grammar is defined as a +[Lark](https://lark-parser.readthedocs.io/en/latest/index.html) parsing grammar, +however it may be easier to understand with an example: + +``` +/* C++ comments are supported */ +// so are C-style ones + + +// Types such as enums and structs can be defined globally +// An enum has a name and must have an underlying type (a sized integer defining +// storage size and how it gets transmitted over the wire) +enum StatusCode : ENUM8 { + kBusy = 1; // every enum MUST have an integer value + kPAKEParameterError = 0x02; // throughout, either HEX or integer can be used + kWindowNotOpen = 3; +} + +// Structures just have a name +struct LabelStruct { + CHAR_STRING<16> label = 0; // structure fields have a type. Some types can be sized (strings, octet strings) + CHAR_STRING<16> value = 1; // every structure field has an index. this is the tag when encoding over the wire +} + +// Server clusters are clusters that are expected to be exposed as servers +// +// Clusters may have optional things (e.g. optional attributes). A server +// cluster only defines attributes actually exposed by the server. +// +// Every cluster has an identifier that is defined by the matter spec (31 in this case) +server cluster AccessControl = 31 { + + // Enums and structs can be defined globally or be cluster specific. + // IDL generation rules will take into account scoping (i.e. pick local defined + // name first, things defined in one cluster are not visible in another). + enum AuthMode : ENUM8 { + kPase = 1; + kCase = 2; + kGroup = 3; + } + + struct AccessControlEntry { + fabric_idx fabricIndex = 0; + Privilege privilege = 1; + AuthMode authMode = 2; + nullable INT64U subjects[] = 3; // fields in structures may be lists and + nullable Target targets[] = 4; // they may have attributes: nullable + } + + // request structures are regular structures that are used + // as command inputs. Special tagging to make the use clear. + request struct AnnounceOtaProviderRequest {} + request struct ConnectNetworkRequest { + OCTET_STRING networkID = 0; + INT64U breadcrumb = 1; + } + + // Response structures are used for command outputs + response struct ConnectNetworkResponse { + CHAR_STRING debugText = 1; + INT32S errorValue = 2; + } + + + attribute AccessControlEntry acl[] = 0; // attributes are read-write by default + attribute ExtensionEntry extension[] = 1; // and require a (spec defined) number + + // attributes may be read-only as well + // "global" specifies that this attribute number is generally available and + // reused across all clusters. + // + // TODO: it is unclear if this helps codegen or readability so the "global" + // attribute may be removed in the future. + readonly global attribute int16u clusterRevision = 65533; + + // Commands have spec-defined numbers which are used for over-the-wire + // invocation. + // + // Commands have input and output data types, generally encoded as structures. + command ConnectNetwork(ConnectNetworkRequest): ConnectNetworkResponse = 0; + + // An output is always available even if just for ok/failure, however + // IDLs specifically do not define a structure for DefaultSuccess. + // + // which is considered an internal type. + command AnnounceOtaProvider(AnnounceOtaProviderRequest): DefaultSuccess = 1; + + // Some commands may take no inputs at all + command On(): DefaultSuccess = 2; + command Off(): DefaultSuccess = 3; +} + +// A client cluster represents something that is used by an app +// but no server is exposed. +// +// A code generation may generate either combination of client and server +// clusters for a given cluster name. +// +// Even if both client and server cluster are declared within an IDL, their +// content may differ. For example +// - server cluster contains only attributes that the server exposes. As such +// some optional attributes may be missing. +// +// - client cluster contains attributes that the app wants to access as a +// client. So an optional attribute may be presented because the +// underlying application can make use of that attribute. +client cluster OtaSoftwareUpdateProvider = 41 { + ///.... content removed: it is very similar to a server cluster +} + + +// On every endpoint number (non-dynamic) +// a series of clusters can be exposed +endpoint 0 { + // A binding cluster is a CLIENT cluster that can be bound to for the + // application to make use. + // + // As an example, a light switch can be bound to a light bulb or + // a cluster can be bound to a OTA provider to use for updates. + binding cluster OtaSoftwareUpdateProvider; + + // A server cluster is a server that gets exposed to the world. + // + // As an example, a light bulb may expose a OnOff cluster. + server cluster OtaSoftwareUpdateRequestor; +} + +``` + +## Parsing of IDLs + +IDL parsing is done within the `idl` python package (this is the current +directory of this README). Most of the heavy lifting is done by the lark using +[matter_grammar.lark](./matter_grammar.lark), which is then turned into an AST: + +- [matter_grammar.lark](./matter_grammar.lark) parses and validates textual + content +- [matter_idl_parser.py](./matter_idl_parser.py) has a transformer that + converts the text given by lark into a more type-safe (and type-rich) AST as + defined ing [matter_idl_types.py](./matter_idl_types.py) + +## Code generation + +Code generators are defined in `generators` and their purpose is to convert the +parsed AST into one or several output files. In most cases the output will be +split per cluster so that large CPP files are not generated (faster and more +parallel compilation). + +### Code generator base functionality + +Generators use [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/) as a +templating language. The general `CodeGenerator` class in +[generators/**init**.py](./generators/__init__.py) provides the ability to +output files based on jinja templates. + +In order to build working jinja2 templates, some further processing of the AST +data is required. Some facilities for lookup namespacing (e.g. search for named +data types within cluster first then globally) as well interpretation of data +types into more concrete types is provided by `generators/types.py`. + +### Implementing generators + +Beyond default AST processing, each generator is expected to add +language-specific filters to create templates that work. This includes: + +- add any additional filters and transforms for data +- add any additional type processing that is language specific + +See the java code generator in `generators/java` as an example of codegen. + +### Testing generators + +Tests of generation are based on checking that a given input matches an expected +output. The [tests/available_tests](./test/available_tests.yaml) describe for +each input and generator the expected output. + +Intent for tests is to be focused and still easy to see deltas. Input IDLs are +expected to be small and focusing on a specific functionality. Keep in mind that +the test outputs are expected to be human-reviwed when codegen logic changes. + +These generator tests are run by `test_generators.py`. diff --git a/scripts/idl/generators/__init__.py b/scripts/idl/generators/__init__.py index 6cd25fb9927d8b..308aaf603f005b 100644 --- a/scripts/idl/generators/__init__.py +++ b/scripts/idl/generators/__init__.py @@ -43,6 +43,11 @@ def write_new_data(self, relative_path: str, content: str): class FileSystemGeneratorStorage(GeneratorStorage): + """ + A storage generator which will physically write files to disk into + a given output folder. + """ + def __init__(self, output_dir: str): self.output_dir = output_dir @@ -75,17 +80,30 @@ def write_new_data(self, relative_path: str, content: str): class CodeGenerator: """ - Defines the general interface for things that can - generate code output. + Defines the general interface for things that can generate code output. + + A CodeGenerator takes a AST as input (a `Idl` type) and generates files + as output (like java/cpp/mm/other). + + Its public interface surface is reasonably small: + 'storage' init argument specifies where generated code goes + 'idl' is the input AST to generate + 'render' will perform a rendering of all files. + + As special optimizations, CodeGenerators generally will try to read + existing data and will not re-write content if not changed (so that + write time of files do not change and rebuilds are not triggered). """ def __init__(self, storage: GeneratorStorage, idl: Idl): + """ + A code generator will render a parsed IDL (a AST) into a given storage. + """ self.storage = storage self.idl = idl self.jinja_env = jinja2.Environment( loader=jinja2.FileSystemLoader(searchpath=os.path.dirname(__file__))) self.dry_run = False - self.output_file_names = [] RegisterCommonFilters(self.jinja_env.filters) @@ -98,7 +116,6 @@ def render(self, dry_run=False): dry_run: if true, outputs are not actually written to disk. if false, outputs are actually written to disk. """ - self.output_file_names = [] self.dry_run = dry_run self.internal_render_all() @@ -112,8 +129,8 @@ def internal_render_one_output(self, template_path: str, output_file_name: str, """ Method to be called by subclasses to mark that a template is to be generated. - File will either actually do a jinja2 generation or, if only output file - names are desired, will only record the output. + File will either actually do a jinja2 generation or just log things + if dry-run was requested during `render`. NOTE: to make this method suitable for rebuilds, this file will NOT alter the timestamp of the output file if the file content would not @@ -126,7 +143,6 @@ def internal_render_one_output(self, template_path: str, output_file_name: str, vars - variables used for template generation """ logging.info("File to be generated: %s" % output_file_name) - self.output_file_names.append(output_file_name) if self.dry_run: return diff --git a/scripts/idl/generators/filters.py b/scripts/idl/generators/filters.py index 35c2293d28b765..13e84d37233be1 100644 --- a/scripts/idl/generators/filters.py +++ b/scripts/idl/generators/filters.py @@ -19,6 +19,10 @@ def RegisterCommonFilters(filtermap): """ Register filters that are NOT considered platform-generator specific. + + Codegen often needs standardized names, like "method names are CamelCase" + or "command names need-to-be-spinal-case" so these filters are often + generally registered on all generators. """ # General casing for output naming diff --git a/scripts/idl/generators/java/__init__.py b/scripts/idl/generators/java/__init__.py index 76034a584efb86..6b74579ea6167f 100644 --- a/scripts/idl/generators/java/__init__.py +++ b/scripts/idl/generators/java/__init__.py @@ -71,6 +71,15 @@ def FieldToGlobalName(field: Field, context: TypeLookupContext) -> Union[str, No def CallbackName(attr: Attribute, cluster: Cluster, context: TypeLookupContext) -> str: + """ + Figure out what callback name to use when a variable requires a read callback. + + These are split into native types, like Boolean/Float/Double/CharString, where + one callback type can support anything. + + For specific types (e.g. A struct) codegen will generate its own callback name + specific to that type. + """ global_name = FieldToGlobalName(attr.definition, context) if global_name: @@ -100,12 +109,6 @@ def attributesWithSupportedCallback(attrs, context: TypeLookupContext): yield attr -def ClientClustersOnly(clusters: List[Cluster]): - for cluster in clusters: - if cluster.side == ClusterSide.CLIENT: - yield cluster - - def NamedFilter(choices: List, name: str): for choice in choices: if choice.name == name: @@ -124,7 +127,14 @@ def ToBoxedJavaType(field: Field): return 'jobject' -def LowercaseFirst(name: str): +def LowercaseFirst(name: str) -> str: + """ + Change the first letter of a string to lowercase as long as the 2nd + letter is not uppercase. + + Can be used for variable naming, eg insider structures, codegen will + call things "Foo foo" (notice variable name is lowercase). + """ if len(name) > 1 and name[1].lower() != name[1]: # Odd workaround: PAKEVerifier should not become pAKEVerifier return name @@ -142,6 +152,12 @@ class EncodableValue: Contains helpers for encoding values, specifically lookups for optionality, lists and recursive data type lookups within the IDL and cluster + + Intended use is to be able to: + - derive types (see clone and without_* methods) such that codegen + can implement things like 'if x != null { treat non-null x}' + - Java specific conversions: get boxed types and JNI string signautes + for the underlying types. """ def __init__(self, context: TypeLookupContext, data_type: DataType, attrs: Set[EncodableValueAttr]): @@ -280,6 +296,13 @@ def boxed_java_signature(self): def EncodableValueFrom(field: Field, context: TypeLookupContext) -> EncodableValue: + """ + Filter to convert a standard field to an EncodableValue. + + This converts the AST information (field name/info + lookup context) into + a java-generator specific wrapper that can be manipulated and + queried for properties like java native name or JNI string signature. + """ attrs = set() if field.is_optional: @@ -294,11 +317,24 @@ def EncodableValueFrom(field: Field, context: TypeLookupContext) -> EncodableVal return EncodableValue(context, field.data_type, attrs) -def CreateLookupContext(idl: Idl, cluster: Cluster): +def CreateLookupContext(idl: Idl, cluster: Cluster) -> TypeLookupContext: + """ + A filter to mark a lookup context to be within a specific cluster. + + This is used to specify how structure/enum/other names are looked up. + Generally one looks up within the specific cluster then if cluster does + not contain a definition, we loop at global namespacing. + """ return TypeLookupContext(idl, cluster) -def CanGenerateSubscribe(attr: Attribute, lookup: TypeLookupContext): +def CanGenerateSubscribe(attr: Attribute, lookup: TypeLookupContext) -> bool: + """ + Filter that returns if an attribute can be subscribed to. + + Uses the given attribute and the lookupContext to figure out the attribute + type. + """ # For backwards compatibility, we do not subscribe to structs # (although list of structs is ok ...) if attr.definition.is_list: @@ -313,12 +349,15 @@ class JavaGenerator(CodeGenerator): """ def __init__(self, storage: GeneratorStorage, idl: Idl): + """ + Inintialization is specific for java generation and will add + filters as required by the java .jinja templates to function. + """ super().__init__(storage, idl) self.jinja_env.filters['attributesWithCallback'] = attributesWithSupportedCallback self.jinja_env.filters['callbackName'] = CallbackName self.jinja_env.filters['commandCallbackName'] = CommandCallbackName - self.jinja_env.filters['clientClustersOnly'] = ClientClustersOnly self.jinja_env.filters['named'] = NamedFilter self.jinja_env.filters['toBoxedJavaType'] = ToBoxedJavaType self.jinja_env.filters['lowercaseFirst'] = LowercaseFirst @@ -327,6 +366,9 @@ def __init__(self, storage: GeneratorStorage, idl: Idl): self.jinja_env.filters['canGenerateSubscribe'] = CanGenerateSubscribe def internal_render_all(self): + """ + Renders .CPP files required for JNI support. + """ # Every cluster has its own impl, to avoid # very large compilations (running out of RAM) for cluster in self.idl.clusters: diff --git a/scripts/idl/generators/types.py b/scripts/idl/generators/types.py index b67119249bb5b8..58743e865ba710 100644 --- a/scripts/idl/generators/types.py +++ b/scripts/idl/generators/types.py @@ -23,6 +23,13 @@ def ToPowerOfTwo(bits: int) -> int: + """ + Given a number, find the next power of two that is >= to the given value. + + Can be used to figure out a variable size given non-standard bit sizes in + matter: eg. a int24 can be stored in an int32, so ToPortOfTwo(24) == 32. + + """ # probably bit manipulation can be faster, but this should be ok as well result = 1 while result < bits: @@ -50,12 +57,18 @@ def power_of_two_bits(self): @dataclass class BasicString: + """ + Represents either a string or a binary string (blob). + """ idl_name: str is_binary: bool max_length: Union[int, None] = None class FundamentalType(enum.Enum): + """ + Native types, generally available across C++/ObjC/Java/python/other. + """ BOOL = enum.auto() FLOAT = enum.auto() DOUBLE = enum.auto() @@ -89,6 +102,10 @@ def bits(self): @dataclass class IdlEnumType: + """ + An enumeration type. Enumerations are constants with an underlying + base type that is an interger. + """ idl_name: str base_type: BasicInteger @@ -103,6 +120,11 @@ def bits(self): @dataclass class IdlBitmapType: + """ + Bitmaps mark that each bit (or a subset of said bits) have a meaning. + + Examples include "feature maps" where bits represent feature available or not. + """ idl_name: str base_type: BasicInteger @@ -122,6 +144,16 @@ class IdlItemType(enum.Enum): @dataclass class IdlType: + """ + A type defined within the IDL. + + IDLs would generally only define structures as all other types are + described in other things like enums/bitmaps/basic types etc. + + However since IDL parsing is not yet codegen just syntactically, we allow + the option to have a type that is marked 'unknown' (likely invalid/never + defined). + """ idl_name: str item_type: IdlItemType @@ -191,6 +223,37 @@ class TypeLookupContext: Generally when looking for a struct/enum, the lookup will be first done at a cluster level, then at a global level. + + Example: + + ================ test.matter ============== + enum A {} + + server cluster X { + struct A {} + struct B {} + } + + server cluster Y { + enum C {} + } + =========================================== + + When considering a lookup context of global (i.e. cluster is not set) + "A" is defined as an enum (::A) + "B" is undefined + "C" is undefined + + When considering a lookup context of cluster X + "A" is defined as a struct (X::A) + "B" is defined as a struct (X::B) + "C" is undefined + + When considering a lookup context of cluster Y + "A" is defined as an enum (::A) + "B" is undefined + "C" is defined as an enum (Y::C) + """ def __init__(self, idl: matter_idl_types.Idl, cluster: Optional[matter_idl_types.Cluster]): @@ -198,12 +261,11 @@ def __init__(self, idl: matter_idl_types.Idl, cluster: Optional[matter_idl_types self.cluster = cluster def find_enum(self, name) -> Optional[matter_idl_types.Enum]: - if self.cluster: - for e in self.cluster.enums: - if e.name == name: - return e - - for e in self.idl.enums: + """ + Find the first enumeration matching the given name for the given + lookup rules (searches cluster first, then global). + """ + for e in self.all_enums: if e.name == name: return e @@ -225,7 +287,12 @@ def find_bitmap(self, name) -> Optional[matter_idl_types.Bitmap]: @property def all_enums(self): - """All enumerations, ordered by lookup prioroty.""" + """ + All enumerations, ordered by lookup priority. + + If an enum A is defined both in the cluster and globally, this WILL + return both instances, however it will return the cluster version first. + """ if self.cluster: for e in self.cluster.enums: yield e @@ -234,14 +301,23 @@ def all_enums(self): @property def all_bitmaps(self): - """All structs, ordered by lookup prioroty.""" + """ + All bitmaps defined within this lookup context. + + bitmaps are only defined at cluster level. If lookup context does not + include a cluster, the bitmal list will be empty. + """ if self.cluster: for b in self.cluster.bitmaps: yield b @property def all_structs(self): - """All structs, ordered by lookup prioroty.""" + """All structs, ordered by lookup prioroty. + + If a struct A is defined both in the cluster and globally, this WILL + return both instances, however it will return the cluster version first. + """ if self.cluster: for e in self.cluster.structs: yield e @@ -249,14 +325,29 @@ def all_structs(self): yield e def is_enum_type(self, name: str): + """ + Determine if the given type name is an enumeration. + + Handles both standard names (like enum8) as well as enumerations defined + within the current lookup context. + """ if name.lower() in ["enum8", "enum16", "enum32"]: return True return any(map(lambda e: e.name == name, self.all_enums)) def is_struct_type(self, name: str): + """ + Determine if the given type name is type that is known to be a struct + """ return any(map(lambda s: s.name == name, self.all_structs)) def is_bitmap_type(self, name: str): + """ + Determine if the given type name is type that is known to be a bitmap. + + Handles both standard/zcl names (like bitmap32) and types defined within + the current lookup context. + """ if name.lower() in ["bitmap8", "bitmap16", "bitmap24", "bitmap32", "bitmap64"]: return True @@ -265,7 +356,13 @@ def is_bitmap_type(self, name: str): def ParseDataType(data_type: DataType, lookup: TypeLookupContext) -> Union[BasicInteger, BasicString, FundamentalType, IdlType]: """ - Match the given string name to a potentially known type + Given a AST data type and a lookup context, match it to a type that can be later + be used for generation. + + AST parsing is textual, so it does not understand what "foo" means. This method + looks up what "foo" actually means: includes basic types (e.g. bool), + zcl types (like enums or bitmaps) and does lookups to find structs/enums/bitmaps/etc + that are defined in the given lookup context. """ lowercase_name = data_type.name.lower() diff --git a/scripts/idl/matter_idl_parser.py b/scripts/idl/matter_idl_parser.py index 7b38b3fb96b548..26977d7f71edf1 100755 --- a/scripts/idl/matter_idl_parser.py +++ b/scripts/idl/matter_idl_parser.py @@ -16,8 +16,29 @@ class MatterIdlTransformer(Transformer): - """A transformer capable to transform data - parsed by Lark according to matter_grammar.lark + """ + A transformer capable to transform data parsed by Lark according to + matter_grammar.lark. + + Generally transforms a ".matter" file into an Abstract Syntax Tree (AST). + End result will be a `matter_idl_types.Idl` value that represents the + entire parsed .matter file. + + The content of this file closely resembles the .lark input file and its + purpose is to convert LARK tokens (that ar generally inputted by name) + into underlying python types. + + Some documentation to get started is available at + https://lark-parser.readthedocs.io/en/latest/visitors.html#transformer + + TLDR would be: + When the ".lark" defines a token like `foo: number`, the transformer + has the option to define a method called `foo` which will take the + parsed input (as strings unless transformed) and interpret them. + + Actual parametes to the methods depend on the rules multiplicity and/or + optionality. + """ def number(self, tokens): @@ -221,10 +242,15 @@ def idl(self, items): def CreateParser(): + """ + Generates a parser that will process a ".matter" file into a IDL + """ return Lark.open('matter_grammar.lark', rel_to=__file__, start='idl', parser='lalr', transformer=MatterIdlTransformer()) if __name__ == '__main__': + # This Parser is generally not intended to be run as a stand-alone binary. + # The ability to run is for debug and to print out the parsed AST. import click import coloredlogs