From 70ed01a4fb902f37ae57474aaf9aac8db60f0f2d Mon Sep 17 00:00:00 2001 From: Ilya Lakhin Date: Fri, 11 Nov 2022 19:00:03 +0700 Subject: [PATCH] Release 1.0.0 --- EULA.md | 247 +++ README.md | 56 + work/Cargo.toml | 39 + work/crates/derive/Cargo.toml | 74 + work/crates/derive/readme.md | 79 + work/crates/derive/src/lib.rs | 93 ++ .../derive/src/node/automata/conflicts.rs | 147 ++ work/crates/derive/src/node/automata/merge.rs | 170 ++ work/crates/derive/src/node/automata/mod.rs | 47 + work/crates/derive/src/node/automata/scope.rs | 73 + work/crates/derive/src/node/automata/skip.rs | 66 + .../src/node/automata/synchronization.rs | 147 ++ .../derive/src/node/automata/variables.rs | 390 +++++ .../derive/src/node/builder/constructor.rs | 223 +++ work/crates/derive/src/node/builder/kind.rs | 79 + work/crates/derive/src/node/builder/mod.rs | 660 ++++++++ work/crates/derive/src/node/builder/rule.rs | 244 +++ .../crates/derive/src/node/builder/variant.rs | 391 +++++ work/crates/derive/src/node/compiler/case.rs | 64 + .../derive/src/node/compiler/constructor.rs | 94 ++ .../derive/src/node/compiler/delimiters.rs | 146 ++ .../derive/src/node/compiler/function.rs | 863 ++++++++++ .../derive/src/node/compiler/generics.rs | 172 ++ .../derive/src/node/compiler/inserts.rs | 187 +++ work/crates/derive/src/node/compiler/mod.rs | 228 +++ .../derive/src/node/compiler/transitions.rs | 120 ++ .../derive/src/node/compiler/variables.rs | 160 ++ work/crates/derive/src/node/mod.rs | 68 + work/crates/derive/src/node/readme.md | 543 +++++++ work/crates/derive/src/node/regex/encode.rs | 151 ++ work/crates/derive/src/node/regex/inject.rs | 174 ++ work/crates/derive/src/node/regex/inline.rs | 135 ++ work/crates/derive/src/node/regex/mod.rs | 51 + work/crates/derive/src/node/regex/operand.rs | 178 ++ work/crates/derive/src/node/regex/operator.rs | 128 ++ work/crates/derive/src/node/regex/prefix.rs | 122 ++ .../derive/src/node/regex/references.rs | 158 ++ work/crates/derive/src/node/regex/skip.rs | 79 + work/crates/derive/src/node/regex/span.rs | 110 ++ work/crates/derive/src/node/regex/terminal.rs | 170 ++ work/crates/derive/src/token/characters.rs | 200 +++ work/crates/derive/src/token/compiler.rs | 281 ++++ work/crates/derive/src/token/entry.rs | 469 ++++++ work/crates/derive/src/token/mod.rs | 50 + work/crates/derive/src/token/readme.md | 284 ++++ work/crates/derive/src/token/regex.rs | 368 +++++ work/crates/derive/src/token/rule.rs | 134 ++ work/crates/derive/src/token/scope.rs | 98 ++ work/crates/derive/src/token/terminal.rs | 109 ++ work/crates/derive/src/token/transition.rs | 317 ++++ work/crates/derive/src/token/variant.rs | 246 +++ work/crates/derive/src/utils/automata.rs | 261 +++ work/crates/derive/src/utils/context.rs | 197 +++ work/crates/derive/src/utils/deterministic.rs | 158 ++ work/crates/derive/src/utils/expression.rs | 169 ++ work/crates/derive/src/utils/facade.rs | 139 ++ work/crates/derive/src/utils/map.rs | 135 ++ work/crates/derive/src/utils/mod.rs | 61 + work/crates/derive/src/utils/multimap.rs | 102 ++ work/crates/derive/src/utils/predictable.rs | 56 + work/crates/derive/src/utils/set.rs | 160 ++ work/crates/derive/src/utils/state.rs | 42 + work/crates/derive/src/utils/symbol.rs | 101 ++ work/crates/derive/src/utils/transitions.rs | 115 ++ work/crates/examples/Cargo.toml | 105 ++ work/crates/examples/benches/data.rs | 584 +++++++ work/crates/examples/benches/frameworks.rs | 163 ++ .../crates/examples/benches/frameworks/nom.rs | 235 +++ .../examples/benches/frameworks/ropey.rs | 101 ++ .../examples/benches/frameworks/treesitter.rs | 169 ++ work/crates/examples/benches/layer.rs | 279 ++++ work/crates/examples/benches/main.rs | 81 + work/crates/examples/readme.md | 117 ++ work/crates/examples/src/json/formatter.rs | 108 ++ work/crates/examples/src/json/lexis.rs | 87 + work/crates/examples/src/json/mod.rs | 40 + work/crates/examples/src/json/syntax.rs | 80 + work/crates/examples/src/lib.rs | 41 + work/crates/examples/tests/balance.rs | 278 ++++ work/crates/examples/tests/document.rs | 573 +++++++ work/crates/examples/tests/iteration.rs | 155 ++ work/crates/examples/tests/json.rs | 517 ++++++ work/crates/examples/tests/position.rs | 91 ++ work/crates/examples/tests/token.rs | 73 + work/crates/main/Cargo.toml | 66 + work/crates/main/readme.md | 389 +++++ work/crates/main/src/arena/id.rs | 140 ++ work/crates/main/src/arena/mod.rs | 14 + work/crates/main/src/arena/readme.md | 117 ++ work/crates/main/src/arena/reference.rs | 109 ++ work/crates/main/src/arena/repository.rs | 931 +++++++++++ work/crates/main/src/arena/sequence.rs | 290 ++++ work/crates/main/src/incremental/cursor.rs | 267 +++ work/crates/main/src/incremental/document.rs | 1173 +++++++++++++ work/crates/main/src/incremental/errors.rs | 91 ++ work/crates/main/src/incremental/lexis.rs | 469 ++++++ work/crates/main/src/incremental/mod.rs | 45 + .../main/src/incremental/storage/branch.rs | 1354 ++++++++++++++++ .../main/src/incremental/storage/cache.rs | 55 + .../main/src/incremental/storage/child.rs | 646 ++++++++ .../main/src/incremental/storage/item.rs | 620 +++++++ .../main/src/incremental/storage/mod.rs | 959 +++++++++++ .../main/src/incremental/storage/nesting.rs | 116 ++ .../main/src/incremental/storage/page.rs | 895 ++++++++++ .../src/incremental/storage/references.rs | 78 + .../main/src/incremental/storage/tree.rs | 1444 +++++++++++++++++ .../main/src/incremental/storage/utils.rs | 181 +++ work/crates/main/src/incremental/syntax.rs | 520 ++++++ work/crates/main/src/lexis/buffer.rs | 464 ++++++ work/crates/main/src/lexis/chunks.rs | 103 ++ work/crates/main/src/lexis/code.rs | 239 +++ work/crates/main/src/lexis/content.rs | 278 ++++ work/crates/main/src/lexis/cursor.rs | 455 ++++++ work/crates/main/src/lexis/mod.rs | 68 + work/crates/main/src/lexis/position.rs | 310 ++++ work/crates/main/src/lexis/readme.md | 95 ++ work/crates/main/src/lexis/session.rs | 332 ++++ work/crates/main/src/lexis/simple.rs | 118 ++ work/crates/main/src/lexis/site.rs | 317 ++++ work/crates/main/src/lexis/span.rs | 356 ++++ work/crates/main/src/lexis/token.rs | 510 ++++++ work/crates/main/src/lexis/utils.rs | 100 ++ work/crates/main/src/lib.rs | 56 + work/crates/main/src/std.rs | 198 +++ work/crates/main/src/syntax/buffer.rs | 222 +++ work/crates/main/src/syntax/cluster.rs | 332 ++++ work/crates/main/src/syntax/error.rs | 489 ++++++ work/crates/main/src/syntax/mod.rs | 61 + work/crates/main/src/syntax/no.rs | 100 ++ work/crates/main/src/syntax/node.rs | 501 ++++++ work/crates/main/src/syntax/readme.md | 81 + work/crates/main/src/syntax/session.rs | 225 +++ work/crates/main/src/syntax/simple.rs | 105 ++ work/crates/main/src/syntax/transducer.rs | 625 +++++++ work/crates/main/src/syntax/tree.rs | 117 ++ work/rustfmt.toml | 40 + 136 files changed, 33321 insertions(+) create mode 100644 EULA.md create mode 100644 README.md create mode 100644 work/Cargo.toml create mode 100644 work/crates/derive/Cargo.toml create mode 100644 work/crates/derive/readme.md create mode 100644 work/crates/derive/src/lib.rs create mode 100644 work/crates/derive/src/node/automata/conflicts.rs create mode 100644 work/crates/derive/src/node/automata/merge.rs create mode 100644 work/crates/derive/src/node/automata/mod.rs create mode 100644 work/crates/derive/src/node/automata/scope.rs create mode 100644 work/crates/derive/src/node/automata/skip.rs create mode 100644 work/crates/derive/src/node/automata/synchronization.rs create mode 100644 work/crates/derive/src/node/automata/variables.rs create mode 100644 work/crates/derive/src/node/builder/constructor.rs create mode 100644 work/crates/derive/src/node/builder/kind.rs create mode 100644 work/crates/derive/src/node/builder/mod.rs create mode 100644 work/crates/derive/src/node/builder/rule.rs create mode 100644 work/crates/derive/src/node/builder/variant.rs create mode 100644 work/crates/derive/src/node/compiler/case.rs create mode 100644 work/crates/derive/src/node/compiler/constructor.rs create mode 100644 work/crates/derive/src/node/compiler/delimiters.rs create mode 100644 work/crates/derive/src/node/compiler/function.rs create mode 100644 work/crates/derive/src/node/compiler/generics.rs create mode 100644 work/crates/derive/src/node/compiler/inserts.rs create mode 100644 work/crates/derive/src/node/compiler/mod.rs create mode 100644 work/crates/derive/src/node/compiler/transitions.rs create mode 100644 work/crates/derive/src/node/compiler/variables.rs create mode 100644 work/crates/derive/src/node/mod.rs create mode 100644 work/crates/derive/src/node/readme.md create mode 100644 work/crates/derive/src/node/regex/encode.rs create mode 100644 work/crates/derive/src/node/regex/inject.rs create mode 100644 work/crates/derive/src/node/regex/inline.rs create mode 100644 work/crates/derive/src/node/regex/mod.rs create mode 100644 work/crates/derive/src/node/regex/operand.rs create mode 100644 work/crates/derive/src/node/regex/operator.rs create mode 100644 work/crates/derive/src/node/regex/prefix.rs create mode 100644 work/crates/derive/src/node/regex/references.rs create mode 100644 work/crates/derive/src/node/regex/skip.rs create mode 100644 work/crates/derive/src/node/regex/span.rs create mode 100644 work/crates/derive/src/node/regex/terminal.rs create mode 100644 work/crates/derive/src/token/characters.rs create mode 100644 work/crates/derive/src/token/compiler.rs create mode 100644 work/crates/derive/src/token/entry.rs create mode 100644 work/crates/derive/src/token/mod.rs create mode 100644 work/crates/derive/src/token/readme.md create mode 100644 work/crates/derive/src/token/regex.rs create mode 100644 work/crates/derive/src/token/rule.rs create mode 100644 work/crates/derive/src/token/scope.rs create mode 100644 work/crates/derive/src/token/terminal.rs create mode 100644 work/crates/derive/src/token/transition.rs create mode 100644 work/crates/derive/src/token/variant.rs create mode 100644 work/crates/derive/src/utils/automata.rs create mode 100644 work/crates/derive/src/utils/context.rs create mode 100644 work/crates/derive/src/utils/deterministic.rs create mode 100644 work/crates/derive/src/utils/expression.rs create mode 100644 work/crates/derive/src/utils/facade.rs create mode 100644 work/crates/derive/src/utils/map.rs create mode 100644 work/crates/derive/src/utils/mod.rs create mode 100644 work/crates/derive/src/utils/multimap.rs create mode 100644 work/crates/derive/src/utils/predictable.rs create mode 100644 work/crates/derive/src/utils/set.rs create mode 100644 work/crates/derive/src/utils/state.rs create mode 100644 work/crates/derive/src/utils/symbol.rs create mode 100644 work/crates/derive/src/utils/transitions.rs create mode 100644 work/crates/examples/Cargo.toml create mode 100644 work/crates/examples/benches/data.rs create mode 100644 work/crates/examples/benches/frameworks.rs create mode 100644 work/crates/examples/benches/frameworks/nom.rs create mode 100644 work/crates/examples/benches/frameworks/ropey.rs create mode 100644 work/crates/examples/benches/frameworks/treesitter.rs create mode 100644 work/crates/examples/benches/layer.rs create mode 100644 work/crates/examples/benches/main.rs create mode 100644 work/crates/examples/readme.md create mode 100644 work/crates/examples/src/json/formatter.rs create mode 100644 work/crates/examples/src/json/lexis.rs create mode 100644 work/crates/examples/src/json/mod.rs create mode 100644 work/crates/examples/src/json/syntax.rs create mode 100644 work/crates/examples/src/lib.rs create mode 100644 work/crates/examples/tests/balance.rs create mode 100644 work/crates/examples/tests/document.rs create mode 100644 work/crates/examples/tests/iteration.rs create mode 100644 work/crates/examples/tests/json.rs create mode 100644 work/crates/examples/tests/position.rs create mode 100644 work/crates/examples/tests/token.rs create mode 100644 work/crates/main/Cargo.toml create mode 100644 work/crates/main/readme.md create mode 100644 work/crates/main/src/arena/id.rs create mode 100644 work/crates/main/src/arena/mod.rs create mode 100644 work/crates/main/src/arena/readme.md create mode 100644 work/crates/main/src/arena/reference.rs create mode 100644 work/crates/main/src/arena/repository.rs create mode 100644 work/crates/main/src/arena/sequence.rs create mode 100644 work/crates/main/src/incremental/cursor.rs create mode 100644 work/crates/main/src/incremental/document.rs create mode 100644 work/crates/main/src/incremental/errors.rs create mode 100644 work/crates/main/src/incremental/lexis.rs create mode 100644 work/crates/main/src/incremental/mod.rs create mode 100644 work/crates/main/src/incremental/storage/branch.rs create mode 100644 work/crates/main/src/incremental/storage/cache.rs create mode 100644 work/crates/main/src/incremental/storage/child.rs create mode 100644 work/crates/main/src/incremental/storage/item.rs create mode 100644 work/crates/main/src/incremental/storage/mod.rs create mode 100644 work/crates/main/src/incremental/storage/nesting.rs create mode 100644 work/crates/main/src/incremental/storage/page.rs create mode 100644 work/crates/main/src/incremental/storage/references.rs create mode 100644 work/crates/main/src/incremental/storage/tree.rs create mode 100644 work/crates/main/src/incremental/storage/utils.rs create mode 100644 work/crates/main/src/incremental/syntax.rs create mode 100644 work/crates/main/src/lexis/buffer.rs create mode 100644 work/crates/main/src/lexis/chunks.rs create mode 100644 work/crates/main/src/lexis/code.rs create mode 100644 work/crates/main/src/lexis/content.rs create mode 100644 work/crates/main/src/lexis/cursor.rs create mode 100644 work/crates/main/src/lexis/mod.rs create mode 100644 work/crates/main/src/lexis/position.rs create mode 100644 work/crates/main/src/lexis/readme.md create mode 100644 work/crates/main/src/lexis/session.rs create mode 100644 work/crates/main/src/lexis/simple.rs create mode 100644 work/crates/main/src/lexis/site.rs create mode 100644 work/crates/main/src/lexis/span.rs create mode 100644 work/crates/main/src/lexis/token.rs create mode 100644 work/crates/main/src/lexis/utils.rs create mode 100644 work/crates/main/src/lib.rs create mode 100644 work/crates/main/src/std.rs create mode 100644 work/crates/main/src/syntax/buffer.rs create mode 100644 work/crates/main/src/syntax/cluster.rs create mode 100644 work/crates/main/src/syntax/error.rs create mode 100644 work/crates/main/src/syntax/mod.rs create mode 100644 work/crates/main/src/syntax/no.rs create mode 100644 work/crates/main/src/syntax/node.rs create mode 100644 work/crates/main/src/syntax/readme.md create mode 100644 work/crates/main/src/syntax/session.rs create mode 100644 work/crates/main/src/syntax/simple.rs create mode 100644 work/crates/main/src/syntax/transducer.rs create mode 100644 work/crates/main/src/syntax/tree.rs create mode 100644 work/rustfmt.toml diff --git a/EULA.md b/EULA.md new file mode 100644 index 0000000..35eca0a --- /dev/null +++ b/EULA.md @@ -0,0 +1,247 @@ +# End User License Agreement + +This End User License Agreement(the "Agreement") is a legal document detailing +your rights and obligations related to using of my proprietary computer software +work(the "Work"). + +By downloading or using of this Work, you are agreeing to be bound by the terms +of this Agreement, and this Agreement will be effective upon the first of those +events to occur. + +If you do not or cannot agree to the terms of this Agreement, do not use +this Work. + +"I", "me" or "my" refers Ilya Lakhin (Илья Александрович Лахин in Russian), +the exclusive copyright holder of the Work responsible for providing this Work +to you under the terms of this Agreement. + +"You" or "your" refers an individual or a legal entity exercising +permissions granted by this Agreement. + +## The Work. + +This Agreement is licensing work called "Lady Deirdre", a computer software +technology made for development of the programming languages source code +analysis, compilation, translation and interpretation software. This technology +has a number of applications including(but not limited to) development of the +programming languages compilers, script languages interpreters, live coding +systems, source code editors software, extensions for integrated development +environment software. + +The Work consists of a set of source code files, documentation files, +example files, compilation scripts, and related content files. As a whole this +Work represents a set of computer software tools and compilable modules with +application programming interfaces to be integrated into the third party end +software products. + +This Work was created and published worldwide by me through the GitHub public +repository in 2022: + +https://github.com/Eliah-Lakhin/lady-deirdre + +The Work licensed to you under this Agreement includes the foregoing published +files that is copied to your computer, any future updates and upgrades to this +Work made available to you by me, and any versions of any of the foregoing +that you make under the License. + +## The License. + +I grant you a non-exclusive, non-transferable, non-sublicensable worldwide +license to use, modify and distribute of this Work in ways expressively +permitted by the terms of this Agreement solely(the "License"). + +No license or other rights in any work or intellectual property rights owned +by me other than this Work are granted under this Agreement. + +## Personal Use. + +As long as you are not violating this Agreement or applicable law, you can +privately use this Work however you want. For example, you can use this Work +to try Lady Deirdre technology, or for personal educational, training, or +research purposes. + +"Private use" means personal use of the Work such that you do not share +the Work or anything you make with it with the third party of this Agreement. + +## Derivative Work. + +You can make a Derivative Work from this Work if you assign to me +this Derivative Work. + +"Assign" means that without any additional terms and conditions other than the +terms of this Agreement you grant me a fully-paid, exclusive, irrevocable, +unlimited in time, sublicensable, transferable license to use, reproduce, +distribute, publicly display, publicly perform, make, sell, offer to sell, +import, modify and make derivative works based on, and otherwise exploit your +Derivative Work for all current and future methods and forms of exploitation +in any country. + +You represent and warrant that you have sufficient rights (including, but not +limited to intellectual property rights, moral rights and other personal rights) +in the Derivative Work that you provide to me to grant me the rights described +above. If any of the rights to be licensed to me may not be licensed under +applicable law, you hereby waive and agree not to assert any of those rights. + +"Derivative Work" means any work that is based on the Work and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work. + +For the purposes of this Agreement, the term "Derivative Work" shall not include +works that remain separable from the Work, that uses application programming +interfaces of programming modules of the Work only, and that do not copy, +distribute, display, sell the Work or any parts of the Work to the third +parties in source or compiled form. + +For example, if you fork my public GitHub repository to make some changes to +the Work, or if you make a pull request to my GitHub repository with the changes +to the Work, you assign those changes to me. + +But if you make separated computer software that uses the Work through +the application programming interfaces of compilable modules of the Work only, +and without any changes to the Work, and your work does not incorporate or +distribute my Work in source or compiled form, this separated computer software +is not a Derivative Work. + +## Extension Work. + +In purpose to extend functional capabilities of the Work as a computer software, +you can make a work separable from my Work that will use my Work through +the application programming interfaces of compilable modules of my Work only, +and that will not incorporate or distribute my Work or any part of my Work +in source or compiled form(the "Extension Work"). + +As between you and me, you own all rights to the Extension Work, and this +Agreement does not limit your rights to your Extension Work under applicable +law as long as distribution, selling, using, reproducing, public displaying, +public performing, modifying or making derivatives of your Extension Work +does not permit the third party of this Agreement to incorporate my Work and +does not extend my liability to the third party of this Agreement. + +## Products and Services. + +You can develop a computer software product or a service that incorporates +the Work without changes in source or compiled form(the "Product"). + +You may only publicly distribute this Product as expressively permitted by +the terms of this section of the Agreement, and you agree to pay me Quarterly +Royalties from Revenue generated by your Product under the terms of this section +of the Agreement. + + 1. You must ensure to inform your Product Users about copyright attribution of + the Work by including the name of the Work, my name as a copyright holder + of the Work, and the link to the GitHub repository where my Work was + published (as described in "The Work" section of this Agreement). + + 2. You can permit your Product Users to use, reproduce, display, publicly + perform the Work in compiled form as an inseparable part of your Product + and for the purpose of your Product functional availability only. + + 3. You may not permit your Product Users to incorporate the Work into their + own products, projects, or other services. + + 4. In your Product's end user license agreement you must include necessary + warranty terms that otherwise would infringe the warranty terms of this + Agreement. + + 5. If the total Revenue generated by your Product during the Product lifetime + is lesser or equal to $200,000 in USD (or the market equal value in other + currencies), you will not owe me royalty payments under this + Agreement(the "Royalty-Free Limit"). + + 6. If the total Revenue generated by your Product during the Product lifetime + has exceeded the Royalty-Free Limit, the Agreement obligates you to pay me + "Quarterly Royalties", and this obligation will be effective upon this + event to occur. + + 7. Quarterly Royalties are calculated by multiplying the calendar quarterly + gross Revenue by the factor of 0,02(two percents). Within 30 days after + the end of each calendar quarter in which you earn Revenue, you must report + the quarterly Revenue to me on a per Product basis, and pay me the 2% of + the quarterly Revenue. + + 8. To the extent of the applicable law, all Quarterly Royalty payments are + non-refundable under all circumstances. + + 9. I reserve the right to charge additional 1% late fee per calendar quarter + compounding for any amount unpaid after the required due date. + + 10. You are responsible for all taxes and transactional expenses on all + payments required to be made by you under this Agreement other than the + taxes of my income. + +"Product Users" means any individual or an entity who receives a copy of your +Product, or to whom you provide a service(directly or through the third party +distributors or service providers) using your Product. + +"Revenue" means total gross revenue generated by your Product worldwide +directly, and regardless of who receives the revenue. Revenue sources +include(but not limited to) such sources as digital or retail sales of the +Product, revenue from paid subscriptions to the Product as a service, donations +and crowdfund campaigns associated with the Product, any payments from the third +party distributors or the third party agents distributing, selling or offering +the Product. + +## Distribution. + +You can distribute or display copies of the Work without changes through network +to the third parties of this Agreement who have legal rights to receive these +copies as long as the distribution or displaying of the Work is free of +charge to the third party, and free of charge to me, and the distribution or +displaying will not extend my liability to the third parties of this Agreement. + +When you distribute or display the Work to the third party you must ensure that +the third party also gets the text of this Agreement or a link +to this Agreement on GitHub: + +https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md + +For example, GitHub or any other Internet content distribution service that I +use to publish this Work or to publish any updates and upgrades to the Work also +receive the right under this term of the Agreement to distribute or to display +my Work without changes to the users who are also bound by this Agreement terms. + +## Warranty. + +**As far as the law allows, this software comes as is, without any warranty or +condition, and I will not be liable to anyone for any damages related to +this software or this License, under any kind of legal claim.** + +## Third Party Software. + +By entering into this Agreement, you are accepting to the license terms of the +third party software incorporated into this Work that precedence over any +inconsistencies with the terms of this Agreement. You agree that the owners of +the third party software are intended third party beneficiary to this Agreement +in relation to your use of the third party software incorporated into this Work. + +When you make a Derivative Work, or by making any contribution into this Work, +or otherwise use this Work in source or compiled form, you are not permitted +(and you may not permit others) to include, incorporate, combine or mix any +third party software or parts of it in source or compiled form that would +directly or indirectly require that all or a part of this Work be governed under +any terms other than those of this Agreement. + +In particular, you may not incorporate any third party software licensed under +the GNU General Public License into this Work. + +## Amendments, Continuation, Additional Terms. + +I reserve the right to change and amend the terms of this Agreement, +to replace or amend this Agreement with another public license terms, +or to stop offering of this Work to public without any offering replacement. + +Once you have agreed to the new terms and amendments, the new terms will +supersede the terms agreed previously. + +You are not required to accept new terms or amendments, and this Agreement +will continue to govern your use of the Work you already have access to. +But if you do not or cannot agree to the new terms or amendments you will not +be allowed to use any new versions, updates, upgrades and changes in this Work +governed by new terms and amendments. + +This Agreement does not supersede, amend or otherwise affect other agreements +you may have between you and me. + +This Agreement does not have any obligations to make new versions and upgrades +to the Work, or to make available for access or download any versions or +upgrades to this Work. diff --git a/README.md b/README.md new file mode 100644 index 0000000..6c47448 --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +# Lady Deirdre. + +[![Lady Deirdre Main Crate API Docs](https://img.shields.io/docsrs/lady-deirdre?label=Main%20Docs)](https://docs.rs/lady-deirdre) +[![Lady Deirdre Macro Crate API Docs](https://img.shields.io/docsrs/lady-deirdre-derive?label=Macro%20Docs)](https://docs.rs/lady-deirdre-derive) +[![Lady Deirdre Main Crate](https://img.shields.io/crates/v/lady-deirdre?label=Main%20Crate)](https://crates.io/crates/lady-deirdre) +[![Lady Deirdre Macro Crate](https://img.shields.io/crates/v/lady-deirdre-derive?label=Macro%20Crate)](https://crates.io/crates/lady-deirdre-derive) + +Compiler front-end foundation technology. + +If you want to create your own programming language with IDE support from +day one, or if you are going to develop new IDE from scratch, or a programming +language LSP plugin, this Technology is for you! + +Lady Deirdre provides a framework to develop Lexical Scanner, Syntax Parser and +Semantic Analyser that could work in live coding environment applying +user-input changes incrementally to all underlying data structures. + +This Technology represents a set of essential instruments to develop modern +programming language compilers with seamless IDE integration. + +**Features**: + + - Written in Rust entirely. + - Derive-macros to define PL Grammar directly on Enum types. + - Smart error recovery system out of the box. + - Dependency-free no-std ready API. + - Works faster than Tree Sitter. + +**Links:** + - [Main Crate API Documentation](https://docs.rs/lady-deirdre). + - [Macro Crate API Documentation](https://docs.rs/lady-deirdre-derive). + - [Repository](https://github.com/Eliah-Lakhin/lady-deirdre). + - [Examples, Tests, Benchmarks](https://github.com/Eliah-Lakhin/lady-deirdre/tree/master/work/crates/examples). + - [End User License Agreement](https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md). + +**This Work is a proprietary software with source available code.** + +To copy, use, distribute, and contribute into this Work you must agree to +the terms of the [End User License Agreement](https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md). + +The Agreement let you use this Work in commercial and non-commercial purposes. +Commercial use of the Work is free of charge to start, but the Agreement +obligates you to pay me royalties under certain conditions. + +If you want to contribute into the source code of this Work, the Agreement +obligates you to assign me all exclusive rights to the Derivative Work made by +you (this includes GitHub forks and pull requests to my repository). + +The Agreement does not limit rights of the third party software developers as +long as the third party software uses public API of this Work only, and the +third party software does not incorporate or distribute this Work directly. + +If you do not or cannot agree to the terms of this Agreement, do not use +this Work. + +Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). All rights reserved. diff --git a/work/Cargo.toml b/work/Cargo.toml new file mode 100644 index 0000000..325a734 --- /dev/null +++ b/work/Cargo.toml @@ -0,0 +1,39 @@ +################################################################################ +# This file is a part of the "Lady Deirdre" Work, # +# a compiler front-end foundation technology. # +# # +# This Work is a proprietary software with source available code. # +# # +# To copy, use, distribute, and contribute into this Work you must agree to # +# the terms of the End User License Agreement: # +# # +# https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. # +# # +# The Agreement let you use this Work in commercial and non-commercial # +# purposes. Commercial use of the Work is free of charge to start, # +# but the Agreement obligates you to pay me royalties # +# under certain conditions. # +# # +# If you want to contribute into the source code of this Work, # +# the Agreement obligates you to assign me all exclusive rights to # +# the Derivative Work or contribution made by you # +# (this includes GitHub forks and pull requests to my repository). # +# # +# The Agreement does not limit rights of the third party software developers # +# as long as the third party software uses public API of this Work only, # +# and the third party software does not incorporate or distribute # +# this Work directly. # +# # +# AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY # +# OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES # +# RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. # +# # +# If you do not or cannot agree to the terms of this Agreement, # +# do not use this Work. # +# # +# Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). # +# All rights reserved. # +################################################################################ + +[workspace] +members = ["crates/*"] diff --git a/work/crates/derive/Cargo.toml b/work/crates/derive/Cargo.toml new file mode 100644 index 0000000..ac80913 --- /dev/null +++ b/work/crates/derive/Cargo.toml @@ -0,0 +1,74 @@ +################################################################################ +# This file is a part of the "Lady Deirdre" Work, # +# a compiler front-end foundation technology. # +# # +# This Work is a proprietary software with source available code. # +# # +# To copy, use, distribute, and contribute into this Work you must agree to # +# the terms of the End User License Agreement: # +# # +# https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. # +# # +# The Agreement let you use this Work in commercial and non-commercial # +# purposes. Commercial use of the Work is free of charge to start, # +# but the Agreement obligates you to pay me royalties # +# under certain conditions. # +# # +# If you want to contribute into the source code of this Work, # +# the Agreement obligates you to assign me all exclusive rights to # +# the Derivative Work or contribution made by you # +# (this includes GitHub forks and pull requests to my repository). # +# # +# The Agreement does not limit rights of the third party software developers # +# as long as the third party software uses public API of this Work only, # +# and the third party software does not incorporate or distribute # +# this Work directly. # +# # +# AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY # +# OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES # +# RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. # +# # +# If you do not or cannot agree to the terms of this Agreement, # +# do not use this Work. # +# # +# Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). # +# All rights reserved. # +################################################################################ + +[package] +name = "lady-deirdre-derive" +version = "1.0.0" +authors = ["Ilya Lakhin (Илья Александрович Лахин) "] +edition = "2021" +description = "Compiler front-end foundation technology. Macro crate." +keywords = ["parsing", "perser", "incremental", "compiler", "editor"] +categories = ["compilers", "data-structures", "no-std", "parsing", "text-editors"] +readme="./readme.md" +license-file="../../../EULA.md" +documentation = "https://docs.rs/lady-deirdre-derive" +repository = "https://github.com/Eliah-Lakhin/lady-deirdre" +rust-version = "1.65" +publish = true +autobins = false +autoexamples = false +autotests = false +autobenches = false + +[features] +default = ["std"] + +# Turns on Rust Standatd Library dependency. +std = [] + +[lib] +proc-macro = true + +[dependencies.proc-macro2] +version = "1.0" + +[dependencies.syn] +version = "1.0" +features = ["default", "full", "extra-traits"] + +[dependencies.quote] +version = "1.0" diff --git a/work/crates/derive/readme.md b/work/crates/derive/readme.md new file mode 100644 index 0000000..242b7c7 --- /dev/null +++ b/work/crates/derive/readme.md @@ -0,0 +1,79 @@ +# Lady Deirdre. + +[![Lady Deirdre Main Crate API Docs](https://img.shields.io/docsrs/lady-deirdre?label=Main%20Docs)](https://docs.rs/lady-deirdre) +[![Lady Deirdre Macro Crate API Docs](https://img.shields.io/docsrs/lady-deirdre-derive?label=Macro%20Docs)](https://docs.rs/lady-deirdre-derive) +[![Lady Deirdre Main Crate](https://img.shields.io/crates/v/lady-deirdre?label=Main%20Crate)](https://crates.io/crates/lady-deirdre) +[![Lady Deirdre Macro Crate](https://img.shields.io/crates/v/lady-deirdre-derive?label=Macro%20Crate)](https://crates.io/crates/lady-deirdre-derive) + +Compiler front-end foundation technology. + +If you want to create your own programming language with IDE support from +day one, or if you are going to develop new IDE from scratch, or a programming +language LSP plugin, this Technology is for you! + +Lady Deirdre provides a framework to develop Lexical Scanner, Syntax Parser and +Semantic Analyser that could work in live coding environment applying +user-input changes incrementally to all underlying data structures. + +This Technology represents a set of essential instruments to develop modern +programming language compilers with seamless IDE integration. + +**Features**: + + - Written in Rust entirely. + - Derive-macros to define PL Grammar directly on Enum types. + - Smart error recovery system out of the box. + - Dependency-free no-std ready API. + - Works faster than Tree Sitter. + +**Links:** + - [Main Crate API Documentation](https://docs.rs/lady-deirdre). + - [Macro Crate API Documentation](https://docs.rs/lady-deirdre-derive). + - [Repository](https://github.com/Eliah-Lakhin/lady-deirdre). + - [Examples, Tests, Benchmarks](https://github.com/Eliah-Lakhin/lady-deirdre/tree/master/work/crates/examples). + - [End User License Agreement](https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md). + +**This Work is a proprietary software with source available code.** + +To copy, use, distribute, and contribute into this Work you must agree to +the terms of the +[End User License Agreement](https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md). + +The Agreement let you use this Work in commercial and non-commercial purposes. +Commercial use of the Work is free of charge to start, but the Agreement +obligates you to pay me royalties under certain conditions. + +If you want to contribute into the source code of this Work, the Agreement +obligates you to assign me all exclusive rights to the Derivative Work made by +you (this includes GitHub forks and pull requests to my repository). + +The Agreement does not limit rights of the third party software developers as +long as the third party software uses public API of this Work only, and the +third party software does not incorporate or distribute this Work directly. + +If you do not or cannot agree to the terms of this Agreement, do not use +this Work. + +Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). All rights reserved. + +# Macro Crate API Documentation. + +This Crate provides two optional companion macros to the +[`Main Crate`](https://docs.rs/lady-deirdre) to construct +Lexis Scanner and Syntax Parser using derive Rust syntax on enum types. + +The +[Token](https://docs.rs/lady-deirdre-derive/latest/lady_deirdre_derive/derive.Token.html) +macro constructs a Lexical Scanner through the set of user-defined regular +expressions specified directly on enum variants using macro-attributes. +And the +[Node](https://docs.rs/lady-deirdre-derive/latest/lady_deirdre_derive/derive.Node.html) +macro, in turn, constructs a Syntax Parser through the set of +user-defined LL(1) grammar rules over the Token variants. + +Both macros implement +[Token](https://docs.rs/lady-deirdre/latest/lady_deirdre/lexis/trait.Token.html) +and +[Node](https://docs.rs/lady-deirdre/latest/lady_deirdre/syntax/trait.Node.html) +traits accordingly, and considered to be the primary recommended way to define +Programming Language grammar. diff --git a/work/crates/derive/src/lib.rs b/work/crates/derive/src/lib.rs new file mode 100644 index 0000000..50fa1f7 --- /dev/null +++ b/work/crates/derive/src/lib.rs @@ -0,0 +1,93 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +#![doc = include_str!("../readme.md")] +//TODO check warnings regularly +#![allow(warnings)] + +#[macro_use] +extern crate quote; + +#[macro_use] +extern crate syn; + +extern crate core; +extern crate proc_macro; + +mod node; +mod token; +mod utils; + +#[doc = include_str!("./token/readme.md")] +#[proc_macro_derive(Token, attributes(define, rule, precedence, constructor, mismatch))] +pub fn token(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + // panic!( + // "{}", + // proc_macro::TokenStream::from(parse_macro_input!(input as token::Token)) + // ); + + parse_macro_input!(input as token::Token).into() + + // (quote! {}).into() +} + +#[doc = include_str!("./node/readme.md")] +#[proc_macro_derive( + Node, + attributes( + token, + error, + skip, + define, + rule, + root, + comment, + synchronization, + constructor, + default, + ) +)] +pub fn node(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + // panic!( + // "{}", + // proc_macro::TokenStream::from(parse_macro_input!(input as node::Node)) + // ); + + parse_macro_input!(input as node::Node).into() + + // (quote! {}).into() +} diff --git a/work/crates/derive/src/node/automata/conflicts.rs b/work/crates/derive/src/node/automata/conflicts.rs new file mode 100644 index 0000000..da74ac9 --- /dev/null +++ b/work/crates/derive/src/node/automata/conflicts.rs @@ -0,0 +1,147 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Ident; +use syn::{spanned::Spanned, Error, Result}; + +use crate::{ + node::{ + automata::{scope::SyntaxState, NodeAutomata}, + builder::Builder, + regex::terminal::Terminal, + }, + utils::{Map, PredictableCollection}, +}; + +impl CheckConflicts for NodeAutomata { + fn check_conflicts(&self, builder: &Builder, allow_skips: bool) -> Result<()> { + struct OutgoingView<'a> { + map: Map<&'a SyntaxState, Map<&'a Ident, &'a Terminal>>, + } + + impl<'a> OutgoingView<'a> { + fn insert( + &mut self, + from: &'a SyntaxState, + token: &'a Ident, + terminal: &'a Terminal, + ) -> Result<()> { + let map = self.map.entry(from).or_insert_with(|| Map::empty()); + + if let Some(existed) = map.insert(token, terminal) { + let mut message = String::new(); + + match terminal { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name, .. } => { + message.push_str(&format!( + "Token matching \"${}\" conflicts with ", + name.to_string() + )); + } + + Terminal::Node { name, .. } => { + message.push_str(&format!( + "Rule {:?} with \"${}\" token in the leftmost position conflicts \ + with ", + name.to_string(), + token.to_string(), + )); + } + } + + match existed { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { .. } => { + message.push_str("matching of the same token in this expression."); + } + + Terminal::Node { name, .. } => { + message.push_str(&format!( + "rule {:?} that contains the same token matching in its leftmost \ + position.", + name.to_string(), + )); + } + } + + return Err(Error::new(terminal.span(), message)); + } + + Ok(()) + } + } + + let mut view = OutgoingView { map: Map::empty() }; + + for (from, through, _) in &self.transitions { + match through { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name, capture } => { + if let Some(capture) = capture { + if !allow_skips && builder.skip_leftmost().tokens().contains(name) { + return Err(Error::new( + name.span(), + format!( + "Token capturing \"{}: ${}\" conflicts with Skip expression.", + capture, name, + ), + )); + } + } + + view.insert(from, name, through)?; + } + + Terminal::Node { name, .. } => { + for token in builder.variant(name).leftmost().tokens() { + view.insert(from, token, through)?; + } + } + } + } + + Ok(()) + } +} + +pub(in crate::node) trait CheckConflicts { + fn check_conflicts(&self, builder: &Builder, allow_skips: bool) -> Result<()>; +} diff --git a/work/crates/derive/src/node/automata/merge.rs b/work/crates/derive/src/node/automata/merge.rs new file mode 100644 index 0000000..9a0d9b2 --- /dev/null +++ b/work/crates/derive/src/node/automata/merge.rs @@ -0,0 +1,170 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::mem::take; + +use syn::{Error, Result}; + +use crate::{ + node::{ + automata::{scope::Scope, NodeAutomata}, + regex::terminal::Terminal, + }, + utils::{Map, MapImpl, MultimapImpl, PredictableCollection, SetImpl}, +}; + +impl AutomataMergeCaptures for NodeAutomata { + fn merge_captures(&mut self, scope: &mut Scope) -> Result<()> { + loop { + let mut has_changes = false; + + self.transitions = take(&mut self.transitions) + .group(|(from, through, to)| (from, (through, to))) + .try_for_each(|_, transitions| { + let count = transitions.len(); + + let mut tokens = Map::with_capacity(count); + let mut nodes = Map::with_capacity(count); + + for (terminal, to) in take(transitions) { + match &terminal { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { + name, + capture: None, + } => { + if !tokens.contains_key(name) { + let _ = tokens.insert(name.clone(), (terminal, to)); + } + } + + rule_a @ Terminal::Token { + name, + capture: Some(capture), + } => match tokens.get(name) { + None | Some((Terminal::Token { capture: None, .. }, _)) => { + let _ = tokens.insert(name.clone(), (terminal, to)); + } + + Some(( + rule_b @ Terminal::Token { + capture: Some(_), .. + }, + _, + )) => { + return Err(Error::new( + capture.span(), + format!( + "Rule \"{}\" conflicts with rule \"{}\" by capturing \ + the same Token in the same source code position into \ + two distinct variables.", + rule_a, rule_b, + ), + )) + } + _ => (), + }, + + Terminal::Node { + name, + capture: None, + } => { + if !nodes.contains_key(name) { + let _ = nodes.insert(name.clone(), (terminal, to)); + } + } + + rule_a @ Terminal::Node { + name, + capture: Some(capture), + } => match nodes.get(name) { + None | Some((Terminal::Node { capture: None, .. }, _)) => { + let _ = nodes.insert(name.clone(), (terminal, to)); + } + + Some(( + rule_b @ Terminal::Node { + capture: Some(_), .. + }, + _, + )) => { + return Err(Error::new( + capture.span(), + format!( + "Rule \"{}\" conflicts with rule \"{}\" by capturing \ + the same Node in the same source code position into \ + two distinct variables.", + rule_a, rule_b, + ), + )) + } + _ => (), + }, + } + } + + for (_, token) in tokens { + transitions.insert(token); + } + + for (_, node) in nodes { + transitions.insert(node); + } + + if count != transitions.len() { + has_changes = true; + } + + Ok(()) + })? + .join(|from, (through, to)| (from, through, to)); + + if !has_changes { + break; + } + + self.canonicalize(scope); + } + + Ok(()) + } +} + +pub(in crate::node) trait AutomataMergeCaptures { + fn merge_captures(&mut self, scope: &mut Scope) -> Result<()>; +} diff --git a/work/crates/derive/src/node/automata/mod.rs b/work/crates/derive/src/node/automata/mod.rs new file mode 100644 index 0000000..0a9b5cb --- /dev/null +++ b/work/crates/derive/src/node/automata/mod.rs @@ -0,0 +1,47 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{node::automata::scope::Scope, utils::Automata}; + +pub(in crate::node) mod conflicts; +pub(in crate::node) mod merge; +pub(in crate::node) mod scope; +pub(in crate::node) mod skip; +pub(in crate::node) mod synchronization; +pub(in crate::node) mod variables; + +pub(in crate::node) type NodeAutomata = Automata; diff --git a/work/crates/derive/src/node/automata/scope.rs b/work/crates/derive/src/node/automata/scope.rs new file mode 100644 index 0000000..25bd200 --- /dev/null +++ b/work/crates/derive/src/node/automata/scope.rs @@ -0,0 +1,73 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::ops::RangeFrom; + +use crate::{ + node::regex::terminal::Terminal, + utils::{AutomataContext, State}, +}; + +pub(in crate::node) struct Scope { + state_generator: RangeFrom, +} + +impl Default for Scope { + #[inline(always)] + fn default() -> Self { + Self { + state_generator: 1.., + } + } +} + +impl AutomataContext for Scope { + type State = SyntaxState; + type Terminal = Terminal; +} + +pub(in crate::node) type SyntaxState = usize; + +impl State for SyntaxState { + #[inline(always)] + fn gen_state(context: &mut Scope) -> Self { + context + .state_generator + .next() + .expect("Internal error. State generator exceeded.") + } +} diff --git a/work/crates/derive/src/node/automata/skip.rs b/work/crates/derive/src/node/automata/skip.rs new file mode 100644 index 0000000..21776c8 --- /dev/null +++ b/work/crates/derive/src/node/automata/skip.rs @@ -0,0 +1,66 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use syn::{Error, Result}; + +use crate::node::{automata::NodeAutomata, regex::terminal::Terminal}; + +impl IsSkipAutomata for NodeAutomata { + fn is_skip(&self) -> Result<()> { + for (from, through, to) in &self.transitions { + if from != &self.start || !self.finish.contains(to) { + match through { + Terminal::Null => unreachable!("Automata with null transition"), + + Terminal::Node { name, .. } | Terminal::Token { name, .. } => { + return Err(Error::new( + name.span(), + "Skip expression cannot match token sequences of more than one \ + token.", + )); + } + } + } + } + + Ok(()) + } +} + +pub(in crate::node) trait IsSkipAutomata { + fn is_skip(&self) -> Result<()>; +} diff --git a/work/crates/derive/src/node/automata/synchronization.rs b/work/crates/derive/src/node/automata/synchronization.rs new file mode 100644 index 0000000..35c43ea --- /dev/null +++ b/work/crates/derive/src/node/automata/synchronization.rs @@ -0,0 +1,147 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::{Ident, Span}; +use syn::spanned::Spanned; + +use crate::node::{automata::NodeAutomata, regex::terminal::Terminal}; + +pub(in crate::node) struct Synchronization { + variant_name: Ident, + attribute_span: Span, + open: Option, + close: Option, +} + +impl Spanned for Synchronization { + #[inline(always)] + fn span(&self) -> Span { + self.attribute_span + } +} + +impl Synchronization { + #[inline(always)] + pub(in crate::node) fn variant_name(&self) -> &Ident { + &self.variant_name + } + + #[inline(always)] + pub(in crate::node) fn open(&self) -> Option<&Ident> { + self.open.as_ref() + } + + #[inline(always)] + pub(in crate::node) fn close(&self) -> Option<&Ident> { + self.close.as_ref() + } +} + +impl AutomataSynchronization for NodeAutomata { + fn synchronization(&self, variant_name: Ident, attribute_span: Span) -> Synchronization { + enum Single<'a> { + Vacant, + Found(&'a Ident), + Ambiguity, + } + + let mut open = Single::Vacant; + let mut close = Single::Vacant; + + for (from, through, to) in &self.transitions { + let start = from == &self.start; + let end = self.finish.contains(to); + + if !start && !end { + continue; + } + + match through { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name, .. } => { + if start { + match &open { + Single::Vacant => open = Single::Found(name), + Single::Found(..) => open = Single::Ambiguity, + Single::Ambiguity => (), + } + } + + if end { + match &close { + Single::Vacant => close = Single::Found(name), + Single::Found(token) if *token != name => close = Single::Ambiguity, + _ => (), + } + } + } + + Terminal::Node { .. } => { + if start { + open = Single::Ambiguity; + } + + if end { + close = Single::Ambiguity; + } + } + } + } + + let open = match open { + Single::Found(token) => Some(token.clone()), + _ => None, + }; + + let close = match close { + Single::Found(token) => Some(token.clone()), + _ => None, + }; + + Synchronization { + variant_name, + attribute_span, + open, + close, + } + } +} + +pub(in crate::node) trait AutomataSynchronization { + fn synchronization(&self, variant_name: Ident, span: Span) -> Synchronization; +} diff --git a/work/crates/derive/src/node/automata/variables.rs b/work/crates/derive/src/node/automata/variables.rs new file mode 100644 index 0000000..66ac303 --- /dev/null +++ b/work/crates/derive/src/node/automata/variables.rs @@ -0,0 +1,390 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{ + collections::hash_map::Keys, + fmt::{Display, Formatter}, +}; + +use proc_macro2::Ident; +use syn::{Error, Result}; + +use crate::{ + node::{ + automata::{scope::SyntaxState, NodeAutomata}, + builder::constructor::Constructor, + regex::terminal::Terminal, + }, + utils::{Map, PredictableCollection, Set, SetImpl}, +}; + +impl AutomataVariables for NodeAutomata { + fn variable_map(&self) -> Result { + let mut kinds = Map::empty(); + + for (_, through, _) in &self.transitions { + match through { + Terminal::Token { + capture: Some(capture), + .. + } => match kinds.insert(capture.clone(), VariableKind::TokenRef) { + Some(VariableKind::NodeRef) => { + return Err(Error::new( + capture.span(), + format!( + "Variable {:?} captures two distinct types: TokenRef and NodeRef.", + capture.to_string(), + ), + )) + } + _ => (), + }, + + Terminal::Node { + capture: Some(capture), + .. + } => match kinds.insert(capture.clone(), VariableKind::NodeRef) { + Some(VariableKind::TokenRef) => { + return Err(Error::new( + capture.span(), + format!( + "Variable {:?} captures two distinct types: TokenRef and NodeRef.", + capture.to_string(), + ), + )) + } + _ => (), + }, + + _ => (), + } + } + + let mut result = Map::with_capacity(kinds.len()); + + for (capture, kind) in kinds { + let mut optional = Set::new([self.start]); + self.spread_without(&capture, &mut optional); + + let mut single = self.step_with(&capture, &optional); + self.spread_without(&capture, &mut single); + + let mut multiple = self.step_with(&capture, &single); + self.spread(&mut multiple); + + let mut is_optional = false; + let mut is_multiple = false; + + for finish in &self.finish { + if optional.contains(finish) { + is_optional = true; + } + + if multiple.contains(finish) { + is_multiple = true; + } + + if is_optional && is_multiple { + break; + } + } + + let repetition = match (is_optional, is_multiple) { + (_, true) => VariableRepetition::Multiple, + (true, false) => VariableRepetition::Optional, + (false, false) => VariableRepetition::Single, + }; + + result.insert( + capture.clone(), + VariableMeta { + name: capture, + kind, + repetition, + }, + ); + } + + Ok(VariableMap { map: result }) + } +} + +impl AutomataPrivate for NodeAutomata { + #[inline] + fn spread(&self, states: &mut Set) { + loop { + let mut new_states = false; + + for (from, _, to) in &self.transitions { + if !states.contains(&from) || states.contains(to) { + continue; + } + + let _ = states.insert(*to); + new_states = true; + } + + if !new_states { + break; + } + } + } + + fn spread_without(&self, variable: &Ident, states: &mut Set) { + loop { + let mut new_states = false; + + for (from, through, to) in &self.transitions { + if !states.contains(&from) || states.contains(to) { + continue; + } + + let transits = match through { + Terminal::Token { + capture: Some(capture), + .. + } => capture == variable, + + Terminal::Node { + capture: Some(capture), + .. + } => capture == variable, + + _ => false, + }; + + if !transits { + let _ = states.insert(*to); + new_states = true; + } + } + + if !new_states { + break; + } + } + } + + #[inline] + fn step_with(&self, variable: &Ident, states: &Set) -> Set { + let mut result = Set::empty(); + + for (from, through, to) in &self.transitions { + if !states.contains(&from) || result.contains(to) { + continue; + } + + let transits = match through { + Terminal::Token { + capture: Some(capture), + .. + } => capture == variable, + + Terminal::Node { + capture: Some(capture), + .. + } => capture == variable, + + _ => false, + }; + + if transits { + let _ = result.insert(*to); + } + } + + result + } +} + +pub(in crate::node) trait AutomataVariables { + fn variable_map(&self) -> Result; +} + +trait AutomataPrivate { + fn spread(&self, states: &mut Set); + + fn spread_without(&self, variable: &Ident, states: &mut Set); + + fn step_with(&self, variable: &Ident, states: &Set) -> Set; +} + +#[derive(Default)] +pub(in crate::node) struct VariableMap { + map: Map, +} + +impl Display for VariableMap { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result { + for (key, variable) in &self.map { + writeln!(formatter, " {}: {}", key, variable)?; + } + + Ok(()) + } +} + +impl<'a> IntoIterator for &'a VariableMap { + type Item = &'a Ident; + type IntoIter = Keys<'a, Ident, VariableMeta>; + + #[inline(always)] + fn into_iter(self) -> Self::IntoIter { + self.map.keys() + } +} + +impl VariableMap { + pub(in crate::node) fn fits(&self, constructor: &Constructor) -> Result<()> { + let explicit = constructor.is_explicit(); + let parameters = constructor + .parameters() + .iter() + .map(|parameter| (parameter.name(), parameter)) + .collect::>(); + + for (name, parameter) in ¶meters { + if self.map.contains_key(name) { + if parameter.is_default() { + return Err(Error::new( + parameter.default_attribute().clone(), + "Default attribute is not applicable here, because corresponding \ + variable is explicitly captured in the rule expression.", + )); + } + } else { + if explicit { + return Err(Error::new( + name.span(), + "This parameter is missing in the set of the rule capturing \ + variables.", + )); + } else if !parameter.is_default() { + return Err(Error::new( + name.span(), + "This parameter is missing in the set of the rule capturing \ + variables.\nIf this is intended, the rule needs an explicit constructor.\n\ + Use #[constructor(...)] attribute to specify constructor function.\n\ + Alternatively, associate this parameter with #[default(...)] attribute.", + )); + } + } + } + + for argument in self.map.keys() { + if !parameters.contains_key(argument) { + return if explicit { + Err(Error::new( + argument.span(), + format!( + "Capturing \"{}\" variable is missing in constructor's parameters.", + argument, + ), + )) + } else { + Err(Error::new( + argument.span(), + format!( + "Capturing \"{}\" variable is missing in the list of variant fields.", + argument, + ), + )) + }; + } + } + + Ok(()) + } + + #[inline(always)] + pub(in crate::node) fn get(&self, name: &Ident) -> &VariableMeta { + self.map + .get(name) + .expect("Internal error. Missing variable.") + } +} + +pub(in crate::node) struct VariableMeta { + name: Ident, + kind: VariableKind, + repetition: VariableRepetition, +} + +impl Display for VariableMeta { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result { + use VariableRepetition::*; + + let kind = format!("{:?}", self.kind); + + match self.repetition { + Single => formatter.write_str(&format!("{}", kind)), + Optional => formatter.write_str(&format!("{}?", kind)), + Multiple => formatter.write_str(&format!("{}*", kind)), + } + } +} + +impl VariableMeta { + #[inline(always)] + pub(in crate::node) fn name(&self) -> &Ident { + &self.name + } + + #[inline(always)] + pub(in crate::node) fn kind(&self) -> &VariableKind { + &self.kind + } + + #[inline(always)] + pub(in crate::node) fn repetition(&self) -> &VariableRepetition { + &self.repetition + } +} + +#[derive(Debug)] +pub(in crate::node) enum VariableKind { + TokenRef, + NodeRef, +} + +pub(in crate::node) enum VariableRepetition { + Single, + Optional, + Multiple, +} diff --git a/work/crates/derive/src/node/builder/constructor.rs b/work/crates/derive/src/node/builder/constructor.rs new file mode 100644 index 0000000..25ff84e --- /dev/null +++ b/work/crates/derive/src/node/builder/constructor.rs @@ -0,0 +1,223 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::{Ident, Span}; +use syn::{ + parse::ParseStream, + punctuated::Punctuated, + spanned::Spanned, + AttrStyle, + Attribute, + Error, + Expr, + Fields, + Result, + Variant, +}; + +pub(in crate::node) struct Constructor { + span: Span, + name: Ident, + parameters: Vec, + explicit: bool, +} + +impl Spanned for Constructor { + #[inline(always)] + fn span(&self) -> Span { + self.span + } +} + +impl<'a> TryFrom<&'a Attribute> for Constructor { + type Error = Error; + + fn try_from(attribute: &'a Attribute) -> Result { + let span = attribute.span(); + + attribute.parse_args_with(|input: ParseStream| { + let name = input.parse::()?; + + let content; + parenthesized!(content in input); + + let parameters = Punctuated::::parse_terminated(&content)? + .into_iter() + .map(|name| Parameter { + name, + default_value: None, + default_attribute: None, + }) + .collect::>(); + + Ok(Self { + span, + name, + parameters, + explicit: true, + }) + }) + } +} + +impl<'a> TryFrom<&'a Variant> for Constructor { + type Error = Error; + + fn try_from(variant: &'a Variant) -> Result { + match &variant.fields { + Fields::Unnamed(fields) => { + return Err(Error::new( + fields.span(), + "Variants with unnamed fields require explicit constructor.\nAnnotate \ + this variant with #[constructor(...)] attribute.", + )); + } + + _ => (), + } + + let span = variant.span(); + let name = variant.ident.clone(); + + let parameters = variant + .fields + .iter() + .map(|field| { + let mut default = None; + + for attribute in &field.attrs { + match attribute.style { + AttrStyle::Inner(_) => continue, + AttrStyle::Outer => (), + } + + let name = match attribute.path.get_ident() { + None => continue, + Some(name) => name, + }; + + match name.to_string().as_str() { + "default" => { + if default.is_some() { + return Err(Error::new( + attribute.span(), + "Duplicate Default attribute.", + )); + } + + default = Some((attribute.span(), attribute.parse_args::()?)); + } + + _ => (), + } + } + + let name = field.ident.clone().expect("Internal error. Unnamed field."); + + match default { + None => Ok(Parameter { + name, + default_value: None, + default_attribute: None, + }), + + Some((span, value)) => Ok(Parameter { + name, + default_value: Some(value), + default_attribute: Some(span), + }), + } + }) + .collect::>>()?; + + Ok(Self { + span, + name, + parameters, + explicit: false, + }) + } +} + +impl Constructor { + #[inline(always)] + pub(in crate::node) fn name(&self) -> &Ident { + &self.name + } + + #[inline(always)] + pub(in crate::node) fn is_explicit(&self) -> bool { + self.explicit + } + + #[inline(always)] + pub(in crate::node) fn parameters(&self) -> &[Parameter] { + &self.parameters + } +} + +pub(in crate::node) struct Parameter { + name: Ident, + default_value: Option, + default_attribute: Option, +} + +impl Parameter { + #[inline(always)] + pub(in crate::node) fn name(&self) -> &Ident { + &self.name + } + + #[inline(always)] + pub(in crate::node) fn is_default(&self) -> bool { + self.default_value.is_some() + } + + #[inline(always)] + pub(in crate::node) fn default_value(&self) -> &Expr { + self.default_value + .as_ref() + .expect("Internal error. Missing default value.") + } + + #[inline(always)] + pub(in crate::node) fn default_attribute(&self) -> &Span { + self.default_attribute + .as_ref() + .expect("Internal error. Missing default attribute.") + } +} diff --git a/work/crates/derive/src/node/builder/kind.rs b/work/crates/derive/src/node/builder/kind.rs new file mode 100644 index 0000000..6784920 --- /dev/null +++ b/work/crates/derive/src/node/builder/kind.rs @@ -0,0 +1,79 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Span; +use syn::{spanned::Spanned, Error, Result}; + +pub(in crate::node) enum VariantKind { + Unspecified(Span), + Root(Span), + Comment(Span), + Sentence(Span), +} + +impl Spanned for VariantKind { + #[inline] + fn span(&self) -> Span { + match self { + Self::Unspecified(span) => *span, + Self::Root(span) => *span, + Self::Comment(span) => *span, + Self::Sentence(span) => *span, + } + } +} + +impl VariantKind { + #[inline] + pub(in crate::node) fn is_vacant(&self, span: Span) -> Result<()> { + match self { + Self::Unspecified(..) => Ok(()), + Self::Root(..) => Err(Error::new( + span, + "The variant already annotated as a Root rule.", + )), + Self::Comment(..) => Err(Error::new( + span, + "The variant already annotated as a Comment rule.", + )), + Self::Sentence(..) => Err(Error::new( + span, + "The variant already annotated as a Sentence rule.", + )), + } + } +} diff --git a/work/crates/derive/src/node/builder/mod.rs b/work/crates/derive/src/node/builder/mod.rs new file mode 100644 index 0000000..477129d --- /dev/null +++ b/work/crates/derive/src/node/builder/mod.rs @@ -0,0 +1,660 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::collections::hash_map::Keys; + +use proc_macro2::Ident; +use syn::{ + parse::ParseStream, + spanned::Spanned, + AttrStyle, + Attribute, + Data, + DeriveInput, + Error, + Generics, + Result, + Type, + Variant, +}; + +use crate::{ + node::{ + automata::{conflicts::CheckConflicts, scope::Scope, skip::IsSkipAutomata, NodeAutomata}, + builder::{kind::VariantKind, variant::NodeVariant}, + regex::{ + encode::Encode, + inline::Inline, + operand::RegexOperand, + operator::RegexOperator, + prefix::{Leftmost, RegexPrefix}, + skip::IsSkipRegex, + Regex, + }, + }, + utils::{Map, PredictableCollection, Set}, +}; + +pub(in crate::node) mod constructor; +pub(in crate::node) mod kind; +pub(in crate::node) mod rule; +pub(in crate::node) mod variant; + +pub(in crate::node) struct Builder { + node_name: Ident, + generics: Generics, + token_type: Option, + error_type: Option, + scope: Scope, + skip: Option, + inline_map: Map, + variant_map: Map, + skip_leftmost: Leftmost, + skip_automata: Option, + synchronization: Map, +} + +impl<'a> TryFrom<&'a DeriveInput> for Builder { + type Error = Error; + + fn try_from(input: &'a DeriveInput) -> Result { + let node_name = input.ident.clone(); + let generics = input.generics.clone(); + + let mut builder = Self { + node_name, + generics, + token_type: None, + error_type: None, + scope: Scope::default(), + skip: None, + inline_map: Map::empty(), + variant_map: Map::empty(), + skip_leftmost: Leftmost::default(), + skip_automata: None, + synchronization: Map::empty(), + }; + + let data = match &input.data { + Data::Enum(data) => data, + + other => { + let span = match other { + Data::Struct(data) => data.struct_token.span, + Data::Union(data) => data.union_token.span, + _ => unimplemented!(), + }; + + return Err(Error::new( + span, + "Node must be derived on the enum type with variants representing \ + syntax variants.", + )); + } + }; + + for attribute in &input.attrs { + match attribute.style { + AttrStyle::Inner(_) => continue, + AttrStyle::Outer => (), + } + + let name = match attribute.path.get_ident() { + None => continue, + Some(name) => name, + }; + + match name.to_string().as_str() { + "token" => { + builder.set_token_type(attribute)?; + } + + "error" => { + builder.set_error_type(attribute)?; + } + + "skip" => { + builder.set_skip(attribute)?; + } + + "define" => { + builder.add_inline(attribute)?; + } + + _ => continue, + } + } + + for variant in &data.variants { + builder.add_variant(variant)?; + } + + builder.check_error_type()?; + builder.check_token_type()?; + builder.check_root()?; + builder.check_references()?; + builder.build_leftmost()?; + builder.build_skip()?; + builder.build_automata()?; + builder.check_conflicts()?; + builder.build_synchronizations()?; + + Ok(builder) + } +} + +impl<'a> IntoIterator for &'a Builder { + type Item = &'a Ident; + type IntoIter = Keys<'a, Ident, NodeVariant>; + + #[inline(always)] + fn into_iter(self) -> Self::IntoIter { + self.variant_map.keys() + } +} + +impl Builder { + #[inline(always)] + pub(in crate::node) fn node_name(&self) -> &Ident { + &self.node_name + } + + #[inline(always)] + pub(in crate::node) fn token_type(&self) -> &Type { + self.token_type + .as_ref() + .expect("Internal error. Missing token type.") + } + + #[inline(always)] + pub(in crate::node) fn error_type(&self) -> &Type { + self.error_type + .as_ref() + .expect("Internal error. Missing error type.") + } + + #[inline(always)] + pub(in crate::node) fn generics(&self) -> &Generics { + &self.generics + } + + #[inline(always)] + pub(in crate::node) fn get_inline(&self, name: &Ident) -> Option<&Regex> { + self.inline_map.get(name) + } + + #[inline(always)] + pub(in crate::node) fn get_variant(&self, name: &Ident) -> Option<&NodeVariant> { + self.variant_map.get(name) + } + + #[inline(always)] + pub(in crate::node) fn variant(&self, name: &Ident) -> &NodeVariant { + self.variant_map + .get(name) + .as_ref() + .expect("Internal error. Unknown variant.") + } + + #[inline(always)] + pub(in crate::node) fn variants_count(&self) -> usize { + self.variant_map.len() + } + + #[inline(always)] + pub(in crate::node) fn skip_leftmost(&self) -> &Leftmost { + &self.skip_leftmost + } + + #[inline(always)] + pub(in crate::node) fn skip_automata(&self) -> Option<&NodeAutomata> { + self.skip_automata.as_ref() + } + + #[inline(always)] + pub(in crate::node) fn synchronization(&self) -> &Map { + &self.synchronization + } + + #[inline(always)] + pub(in crate::node) fn modify( + &mut self, + name: &Ident, + mut function: impl FnMut(&mut Self, &mut NodeVariant) -> Result<()>, + ) -> Result<()> { + let (name, mut variant) = self + .variant_map + .remove_entry(name) + .expect("Internal error. Unknown variant."); + + function(self, &mut variant)?; + + assert!( + self.variant_map.insert(name, variant).is_none(), + "Internal error. Duplicate variant." + ); + + Ok(()) + } + + #[inline(always)] + pub(in crate::node) fn scope(&mut self) -> &mut Scope { + &mut self.scope + } + + fn set_token_type(&mut self, attribute: &Attribute) -> Result<()> { + if self.token_type.is_some() { + return Err(Error::new(attribute.span(), "Duplicate Token attribute.")); + } + + self.token_type = Some(attribute.parse_args::()?); + + Ok(()) + } + + fn set_error_type(&mut self, attribute: &Attribute) -> Result<()> { + if self.error_type.is_some() { + return Err(Error::new(attribute.span(), "Duplicate Error attribute.")); + } + + self.error_type = Some(attribute.parse_args::()?); + + Ok(()) + } + + fn set_skip(&mut self, attribute: &Attribute) -> Result<()> { + if self.skip.is_some() { + return Err(Error::new(attribute.span(), "Duplicate Skip attribute.")); + } + + let mut skip = attribute.parse_args::()?; + + skip.inline(self)?; + skip.is_skip()?; + + self.skip = Some(skip); + + Ok(()) + } + + fn add_inline(&mut self, attribute: &Attribute) -> Result<()> { + let (name, mut regex) = attribute.parse_args_with(|input: ParseStream| { + let name = input.parse::()?; + let _ = input.parse::()?; + + let expression = input.parse::()?; + + Ok((name, expression)) + })?; + + self.is_vacant(&name)?; + + regex.inline(self)?; + + assert!( + self.inline_map.insert(name, regex).is_none(), + "Internal error. Inline redefined.", + ); + + Ok(()) + } + + fn add_variant(&mut self, variant: &Variant) -> Result<()> { + let mut variant = NodeVariant::try_from(variant)?; + + self.is_vacant(variant.name())?; + + variant.inline(self)?; + + assert!( + self.variant_map + .insert(variant.name().clone(), variant) + .is_none(), + "Internal error. Variant redefined.", + ); + + Ok(()) + } + + fn check_error_type(&self) -> Result<()> { + if self.error_type.is_none() { + return Err(Error::new( + self.node_name.span(), + "Error Type must be specified explicitly.\nUse #[error()] \ + attribute on the derived type to specify Error type.\nFor example you can specify \ + default \"SyntaxError\" error type.", + )); + } + + Ok(()) + } + + fn check_token_type(&self) -> Result<()> { + if self.token_type.is_none() { + return Err(Error::new( + self.node_name.span(), + "Token Type must be specified explicitly.\nUse #[token()] \ + attribute on the derived type to specify Token type.", + )); + } + + Ok(()) + } + + fn check_root(&self) -> Result<()> { + let mut found = false; + + for variant in self.variant_map.values() { + match variant.kind() { + VariantKind::Root(..) if !found => found = true, + + VariantKind::Root(span) if found => { + return Err(Error::new( + *span, + "Duplicate Root rule.\nThe syntax must specify only one Root rule.", + )); + } + + _ => (), + } + } + + if !found { + return Err(Error::new( + self.node_name.span(), + "Node syntax must specify a Root rule.\nAnnotate one of the enum variants \ + with #[root] attribute.", + )); + } + + Ok(()) + } + + fn check_references(&self) -> Result<()> { + let mut visited = Set::empty(); + + let mut pending = self + .variant_map + .iter() + .filter(|(_, variant)| match variant.kind() { + VariantKind::Root(..) | VariantKind::Comment(..) => true, + _ => false, + }) + .map(|(name, _)| name.clone()) + .collect::>(); + + while let Some(next) = pending.pop() { + if visited.contains(&next) { + continue; + } + + let variant = self.variant(&next); + + let references = variant.check_references(self)?; + + for reference in references { + pending.push(reference); + } + + let _ = visited.insert(next); + } + + for (_, variant) in &self.variant_map { + let span = match variant.kind() { + VariantKind::Unspecified(..) | VariantKind::Root(..) | VariantKind::Comment(..) => { + continue; + } + + VariantKind::Sentence(span) => span, + }; + + if !visited.contains(variant.name()) { + return Err(Error::new( + *span, + "Variant's rule is not referred by any other rule.\nEvery \ + parsable variant except the root rule or a comment rule must be referred \ + directly or indirectly from the root.", + )); + } + } + + Ok(()) + } + + fn build_leftmost(&mut self) -> Result<()> { + for variant in &self.variant_map.keys().cloned().collect::>() { + self.modify(variant, |builder, variant| variant.build_leftmost(builder))? + } + + Ok(()) + } + + fn build_skip(&mut self) -> Result<()> { + let skip = self + .variant_map + .values() + .fold(self.skip.clone(), |accumulator, variant| { + match variant.kind() { + VariantKind::Comment(..) => (), + + _ => return accumulator, + } + + let comment = Regex::Operand(RegexOperand::Rule { + name: variant.name().clone(), + capture: None, + }); + + match accumulator { + None => Some(comment), + + Some(accumulator) => Some(Regex::Binary { + operator: RegexOperator::Union, + left: Box::new(accumulator), + right: Box::new(comment), + }), + } + }); + + let skip = match skip { + None => return Ok(()), + Some(skip) => Regex::Unary { + operator: RegexOperator::ZeroOrMore { separator: None }, + inner: Box::new(skip), + }, + }; + + self.skip_leftmost = skip.leftmost(); + + for node in self.skip_leftmost.nodes().clone() { + self.skip_leftmost + .append(self.variant(&node).leftmost().clone()); + } + + let skip_automata = skip.encode(&mut self.scope)?; + + skip_automata.is_skip()?; + + self.skip_automata = Some(skip_automata); + + for variant in &self.variant_map.keys().cloned().collect::>() { + self.modify(variant, |_, variant| { + variant.inject_skip(&skip); + + Ok(()) + }) + .expect("Internal error. Skip injection failure"); + } + + Ok(()) + } + + fn build_automata(&mut self) -> Result<()> { + for variant in &self.variant_map.keys().cloned().collect::>() { + self.modify(variant, |builder, variant| variant.build_automata(builder))? + } + + Ok(()) + } + + fn check_conflicts(&self) -> Result<()> { + for variant in self.variant_map.values() { + let allow_skips = match variant.kind() { + VariantKind::Unspecified(..) => continue, + VariantKind::Root(..) | VariantKind::Sentence(..) => false, + VariantKind::Comment(..) => true, + }; + + variant.automata().check_conflicts(self, allow_skips)?; + } + + Ok(()) + } + + fn build_synchronizations(&mut self) -> Result<()> { + enum Suffix<'a> { + Leftmost(&'a Ident), + Rightmost(&'a Ident), + } + + let mut set = Map::empty(); + + for variant in self.variant_map.values() { + match variant.kind() { + VariantKind::Sentence(..) if variant.is_global_synchronization() => (), + + _ => continue, + } + + let variant_synchronization = variant.synchronization(); + + let open = variant_synchronization + .open() + .expect("Internal error. Missing synchronization's Open token."); + + let close = variant_synchronization + .close() + .expect("Internal error. Missing synchronization's Close token."); + + if let Some(candidate) = self.synchronization.get(open) { + if candidate == close { + continue; + } + } + + if let Some(conflict) = set.insert( + open, + Suffix::Leftmost(variant_synchronization.variant_name()), + ) { + return match &conflict { + Suffix::Leftmost(conflict) => Err(Error::new( + variant_synchronization.span(), + format!( + "Synchronization conflict.\nRule's leftmost token \"${}\" \ + conflicts with \"{}\" rule's leftmost token.\n.The set of all \ + leftmost and rightmost tokens across all synchronization rules \ + must be unique.", + open, conflict + ), + )), + + Suffix::Rightmost(conflict) => Err(Error::new( + variant_synchronization.span(), + format!( + "Synchronization conflict.\nRule's leftmost token \"${}\" \ + conflicts with \"{}\" rule's rightmost token.\n.The set of all \ + leftmost and rightmost tokens across all synchronization rules \ + must be unique.", + open, conflict + ), + )), + }; + } + + if let Some(conflict) = set.insert( + close, + Suffix::Rightmost(variant_synchronization.variant_name()), + ) { + return match &conflict { + Suffix::Leftmost(conflict) => Err(Error::new( + variant_synchronization.span(), + format!( + "Synchronization conflict.\nRule's rightmost token \"${}\" \ + conflicts with \"{}\" rule's leftmost token.\n.The set of all \ + leftmost and rightmost tokens across all synchronization rules \ + must be unique.", + open, conflict + ), + )), + + Suffix::Rightmost(conflict) => Err(Error::new( + variant_synchronization.span(), + format!( + "Synchronization conflict.\nRule's rightmost token \"${}\" \ + conflicts with \"{}\" rule's rightmost token.\n.The set of all \ + leftmost and rightmost tokens across all synchronization rules \ + must be unique.", + open, conflict + ), + )), + }; + } + + let _ = self.synchronization.insert(open.clone(), close.clone()); + } + + Ok(()) + } + + fn is_vacant(&self, name: &Ident) -> Result<()> { + if self.inline_map.contains_key(name) { + return Err(Error::new( + name.span(), + "An inline expression with this name already defined.", + )); + } + + if self.variant_map.contains_key(name) { + return Err(Error::new( + name.span(), + "An enum variant with this name already defined.", + )); + } + + Ok(()) + } +} diff --git a/work/crates/derive/src/node/builder/rule.rs b/work/crates/derive/src/node/builder/rule.rs new file mode 100644 index 0000000..a7c722e --- /dev/null +++ b/work/crates/derive/src/node/builder/rule.rs @@ -0,0 +1,244 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::{Ident, Span}; +use syn::{spanned::Spanned, Attribute, Error, Result}; + +use crate::{ + node::{ + automata::{ + merge::AutomataMergeCaptures, + synchronization::{AutomataSynchronization, Synchronization}, + variables::{AutomataVariables, VariableMap}, + NodeAutomata, + }, + builder::{kind::VariantKind, Builder}, + regex::{ + encode::Encode, + inject::Inject, + inline::Inline, + prefix::{Leftmost, RegexPrefix}, + references::CheckReferences, + Regex, + }, + }, + utils::Set, +}; + +pub(in crate::node) struct Rule { + span: Span, + regex: Regex, + leftmost: Option, + synchronization: Option, + automata: Option, + variables: Option, +} + +impl Spanned for Rule { + #[inline(always)] + fn span(&self) -> Span { + self.span + } +} + +impl<'a> TryFrom<&'a Attribute> for Rule { + type Error = Error; + + fn try_from(attribute: &'a Attribute) -> Result { + let span = attribute.span(); + let regex = attribute.parse_args::()?; + + Ok(Self { + span, + regex, + leftmost: None, + automata: None, + synchronization: None, + variables: None, + }) + } +} + +impl From for Rule { + #[inline(always)] + fn from(regex: Regex) -> Self { + let span = regex.span(); + + Self { + span, + regex, + leftmost: None, + automata: None, + synchronization: None, + variables: None, + } + } +} + +impl Rule { + #[inline(always)] + pub(in crate::node) fn get_leftmost(&self) -> Option<&Leftmost> { + self.leftmost.as_ref() + } + + #[inline(always)] + pub(in crate::node) fn variables(&self) -> &VariableMap { + self.variables + .as_ref() + .expect("Internal error. Missing rule Variable Map.") + } + + #[inline(always)] + pub(in crate::node) fn automata(&self) -> &NodeAutomata { + self.automata + .as_ref() + .expect("Internal error. Missing rule Automata.") + } + + #[inline(always)] + pub(in crate::node) fn synchronization(&self) -> &Synchronization { + self.synchronization + .as_ref() + .expect("Internal error. Missing rule Synchronization.") + } + + #[inline(always)] + pub(in crate::node) fn inline(&mut self, builder: &Builder) -> Result<()> { + assert!( + self.leftmost.is_none(), + "Internal error. Rule leftmost already built." + ); + + self.regex.inline(builder) + } + + #[inline(always)] + pub(in crate::node) fn check_references( + &self, + context: &VariantKind, + builder: &Builder, + ) -> Result> { + self.regex.check_references(context, builder) + } + + pub(in crate::node) fn build_leftmost(&mut self, builder: &mut Builder) -> Result<()> { + if self.leftmost.is_some() { + return Ok(()); + } + + let mut leftmost = self.regex.leftmost(); + + for node in leftmost.nodes().clone() { + { + let variant = match builder.get_variant(&node) { + Some(variant) => variant, + + None => { + return Err(Error::new( + node.span(), + format!( + "Reference \"{}\" in the leftmost position leads to a left \ + recursion. Left recursion is forbidden.", + node, + ), + )); + } + }; + + if let Some(resolution) = variant.get_leftmost() { + leftmost.append(resolution.clone()); + continue; + } + } + + builder.modify(&node, |builder, variant| variant.build_leftmost(builder))?; + + leftmost.append(builder.variant(&node).leftmost().clone()); + } + + self.leftmost = Some(leftmost); + + Ok(()) + } + + #[inline(always)] + pub(in crate::node) fn surround(&mut self, injection: &Regex) { + assert!( + self.automata.is_none(), + "Internal error. Rule automata already built.", + ); + + self.regex.surround(injection); + } + + #[inline(always)] + pub(in crate::node) fn inject(&mut self, injection: &Regex) { + assert!( + self.automata.is_none(), + "Internal error. Rule automata already built.", + ); + + self.regex.inject(injection); + } + + pub(in crate::node) fn build_automata( + &mut self, + builder: &mut Builder, + variant_name: &Ident, + synchronization_span: &Option, + ) -> Result<()> { + assert!( + self.automata.is_none(), + "Internal error. Rule automata already built.", + ); + + let mut automata = self.regex.encode(builder.scope())?; + + automata.merge_captures(builder.scope())?; + + self.variables = Some(automata.variable_map()?); + + self.synchronization = Some(automata.synchronization( + variant_name.clone(), + synchronization_span.clone().unwrap_or(self.span), + )); + + self.automata = Some(automata); + + Ok(()) + } +} diff --git a/work/crates/derive/src/node/builder/variant.rs b/work/crates/derive/src/node/builder/variant.rs new file mode 100644 index 0000000..e3a05ac --- /dev/null +++ b/work/crates/derive/src/node/builder/variant.rs @@ -0,0 +1,391 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::{Ident, Span}; +use syn::{spanned::Spanned, AttrStyle, Error, Result, Variant}; + +use crate::{ + node::{ + automata::{synchronization::Synchronization, variables::VariableMap, NodeAutomata}, + builder::{constructor::Constructor, kind::VariantKind, rule::Rule, Builder}, + regex::{prefix::Leftmost, Regex}, + }, + utils::{PredictableCollection, Set}, +}; + +pub(in crate::node) struct NodeVariant { + name: Ident, + kind: VariantKind, + rule: Option, + synchronization: Option, + constructor: Option, +} + +impl Spanned for NodeVariant { + #[inline(always)] + fn span(&self) -> Span { + self.name.span() + } +} + +impl<'a> TryFrom<&'a Variant> for NodeVariant { + type Error = Error; + + fn try_from(variant: &'a Variant) -> Result { + use VariantKind::*; + + let name = variant.ident.clone(); + + let mut kind = Unspecified(variant.span()); + let mut rule = None; + let mut synchronization = None; + let mut constructor = None; + + for attribute in &variant.attrs { + match attribute.style { + AttrStyle::Inner(_) => continue, + AttrStyle::Outer => (), + } + + let name = match attribute.path.get_ident() { + None => continue, + Some(name) => name, + }; + + match name.to_string().as_str() { + "root" => { + kind.is_vacant(attribute.span())?; + kind = Root(attribute.span()); + } + + "comment" => { + kind.is_vacant(attribute.span())?; + kind = Comment(attribute.span()); + } + + "rule" => { + if rule.is_some() { + return Err(Error::new(attribute.span(), "Duplicate Rule attribute.")); + } + + rule = Some(Rule::try_from(attribute)?); + } + + "synchronization" => { + if synchronization.is_some() { + return Err(Error::new( + attribute.span(), + "Duplicate Synchronization attribute.", + )); + } + + synchronization = Some(attribute.span()); + } + + "constructor" => { + if constructor.is_some() { + return Err(Error::new( + attribute.span(), + "Duplicate Constructor attribute.", + )); + } + + constructor = Some(Constructor::try_from(attribute)?); + } + + _ => (), + } + } + + let kind = match (kind, &rule) { + (Unspecified(..), Some(rule)) => Sentence(rule.span()), + + (kind @ Root(..), Some(..)) => kind, + + (Root(span), None) => { + return Err(Error::new( + span, + "Root annotation is not applicable to non-parsable rules.\n\ + Associate this variant with #[rule(...)] attribute.", + )); + } + + (kind @ Comment(..), Some(..)) => kind, + + (Comment(span), None) => { + return Err(Error::new( + span, + "Comment annotation is not applicable to non-parsable rules.\n\ + Associate this variant with #[rule(...)] attribute.", + )); + } + + (kind @ Unspecified(..), None) => kind, + + (Sentence(..), _) => unreachable!("Variant kind set to Sentence."), + }; + + match (&kind, &synchronization) { + (Unspecified(..), Some(span)) => { + return Err(Error::new( + *span, + "Synchronization annotation is not applicable to non-parsable rules.\n\ + Associate this variant with #[rule(...)] attribute.", + )); + } + + (Root(..), Some(span)) => { + return Err(Error::new( + *span, + "Synchronization annotation is not applicable to the Root rule.", + )); + } + + (Comment(..), Some(span)) => { + return Err(Error::new( + *span, + "Synchronization annotation is not applicable to the Comment rule.", + )); + } + + _ => (), + } + + let constructor = match (&kind, constructor) { + (Unspecified(..), Some(constructor)) => { + return Err(Error::new( + constructor.span(), + "Explicit constructor is not applicable to non-parsable rules.\n\ + Associate this variant with rule type.", + )); + } + + (Unspecified(..), None) => None, + + (_, Some(constructor)) => Some(constructor), + + (_, None) => Some(Constructor::try_from(variant)?), + }; + + Ok(Self { + name, + kind, + rule, + synchronization, + constructor, + }) + } +} + +impl NodeVariant { + #[inline(always)] + pub(in crate::node) fn name(&self) -> &Ident { + &self.name + } + + #[inline(always)] + pub(in crate::node) fn kind(&self) -> &VariantKind { + &self.kind + } + + #[inline(always)] + pub(in crate::node) fn get_leftmost(&self) -> Option<&Leftmost> { + self.rule + .as_ref() + .expect("Internal error. Missing variant rule.") + .get_leftmost() + } + + #[inline(always)] + pub(in crate::node) fn leftmost(&self) -> &Leftmost { + self.get_leftmost() + .expect("Internal error. Missing variant rule's leftmost.") + } + + #[inline(always)] + pub(in crate::node) fn variables(&self) -> &VariableMap { + self.rule + .as_ref() + .expect("Internal error. Missing variant rule.") + .variables() + } + + #[inline(always)] + pub(in crate::node) fn is_global_synchronization(&self) -> bool { + self.synchronization.is_some() + } + + #[inline(always)] + pub(in crate::node) fn synchronization(&self) -> &Synchronization { + self.rule + .as_ref() + .expect("Internal error. Missing variant rule.") + .synchronization() + } + + #[inline(always)] + pub(in crate::node) fn constructor(&self) -> &Constructor { + self.constructor + .as_ref() + .expect("Internal error. Missing variant constructor.") + } + + #[inline(always)] + pub(in crate::node) fn automata(&self) -> &NodeAutomata { + self.rule + .as_ref() + .expect("Internal error. Missing variant rule.") + .automata() + } + + #[inline(always)] + pub(in crate::node) fn inline(&mut self, builder: &Builder) -> Result<()> { + match &mut self.rule { + None => Ok(()), + + Some(rule) => rule.inline(builder), + } + } + + #[inline(always)] + pub(in crate::node) fn check_references(&self, builder: &Builder) -> Result> { + match &self.rule { + None => Ok(Set::empty()), + + Some(rule) => rule.check_references(&self.kind, builder), + } + } + + #[inline(always)] + pub(in crate::node) fn build_leftmost(&mut self, builder: &mut Builder) -> Result<()> { + let rule = match &mut self.rule { + None => return Ok(()), + + Some(rule) => rule, + }; + + rule.build_leftmost(builder) + } + + pub(in crate::node) fn inject_skip(&mut self, injection: &Regex) { + match self.kind { + VariantKind::Unspecified(..) | VariantKind::Comment(..) => (), + + VariantKind::Root(..) => { + self.rule + .as_mut() + .expect("Internal error. Missing Root rule.") + .surround(injection); + } + + VariantKind::Sentence(..) => { + self.rule + .as_mut() + .expect("Internal error. Missing Sentence rule.") + .inject(injection); + } + } + } + + #[inline(always)] + pub(in crate::node) fn build_automata(&mut self, builder: &mut Builder) -> Result<()> { + let rule = match &mut self.rule { + None => return Ok(()), + + Some(rule) => rule, + }; + + let constructor = self + .constructor + .as_ref() + .expect("Internal error. Missing Variant constructor."); + + rule.build_automata(builder, &self.name, &self.synchronization)?; + + match &self.kind { + VariantKind::Root(..) => (), + + _ => { + if rule.automata().accepts_null() { + return Err(Error::new( + rule.span(), + "Variant's rule expression can match empty token sequence.\n\ + Non-root nodes of empty token sequences not allowed.", + )); + } + } + } + + match &self.synchronization { + None => (), + + Some(span) => { + let synchronization = rule.synchronization(); + + if synchronization.open().is_none() { + return Err(Error::new( + *span, + "Synchronization attribute is not applicable to this rule.\nRule's \ + leftmost token set contains more than one token, or the leftmost \ + set refers another rule.", + )); + } + + if synchronization.close().is_none() { + return Err(Error::new( + *span, + "Synchronization attribute is not applicable to this rule.\nRule's \ + rightmost token set contains more than one token, or the rightmost \ + set refers another rule.", + )); + } + + if synchronization.open() == synchronization.close() { + return Err(Error::new( + *span, + "Synchronization attribute is not applicable to this rule.\nRule's \ + leftmost token is equal to the rule's rightmost token.", + )); + } + } + } + + rule.variables().fits(constructor)?; + + Ok(()) + } +} diff --git a/work/crates/derive/src/node/compiler/case.rs b/work/crates/derive/src/node/compiler/case.rs new file mode 100644 index 0000000..099cb13 --- /dev/null +++ b/work/crates/derive/src/node/compiler/case.rs @@ -0,0 +1,64 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Ident; + +use crate::node::{ + builder::kind::VariantKind, + compiler::{function::Function, Compiler}, +}; + +impl<'a, 'b> Function<'a, 'b> { + pub(in crate::node) fn compile_case(compiler: &mut Compiler<'_>, variant_name: &Ident) { + let variant = compiler.builder.variant(variant_name); + + match variant.kind() { + VariantKind::Unspecified(..) => return, + _ => (), + } + + let kind = compiler.kind_of(variant_name); + let function_name = compiler.function_of(variant_name); + + compiler.add_case( + kind, + quote! { + #kind => #function_name(session) + }, + ); + } +} diff --git a/work/crates/derive/src/node/compiler/constructor.rs b/work/crates/derive/src/node/compiler/constructor.rs new file mode 100644 index 0000000..0855f37 --- /dev/null +++ b/work/crates/derive/src/node/compiler/constructor.rs @@ -0,0 +1,94 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::{Ident, TokenStream}; + +use crate::node::{builder::constructor::Constructor, compiler::Compiler}; + +impl Constructor { + pub(in crate::node) fn compile( + &self, + compiler: &Compiler<'_>, + variant_name: &Ident, + ) -> TokenStream { + let constructor_name = self.name(); + let node_name = compiler.builder().node_name(); + + let variables = compiler.builder().variant(variant_name).variables(); + + match self.is_explicit() { + true => { + let parameters = self + .parameters() + .iter() + .map(|parameter| variables.get(parameter.name()).read()); + + quote! { + #node_name::#constructor_name( + #( #parameters ),* + ) + } + } + + false => { + let parameters = self.parameters().iter().map(|parameter| { + let key = parameter.name(); + + let value = match parameter.is_default() { + false => variables.get(key).read(), + + true => { + let default = parameter.default_value(); + + quote! { #default } + } + }; + + quote! { + #key: #value, + } + }); + + quote! { + #node_name::#constructor_name { + #( #parameters )* + } + } + } + } + } +} diff --git a/work/crates/derive/src/node/compiler/delimiters.rs b/work/crates/derive/src/node/compiler/delimiters.rs new file mode 100644 index 0000000..4e10eb2 --- /dev/null +++ b/work/crates/derive/src/node/compiler/delimiters.rs @@ -0,0 +1,146 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Ident; + +use crate::{ + node::builder::{kind::VariantKind, variant::NodeVariant, Builder}, + utils::{Map, PredictableCollection}, +}; + +#[derive(Clone, Default)] +pub(in crate::node) struct PanicDelimiters<'a> { + single: Option<&'a Ident>, + local: Option<(&'a Ident, &'a Ident)>, + global: Map<&'a Ident, SynchronizationAction>, +} + +impl<'a> PanicDelimiters<'a> { + #[inline(always)] + pub(in crate::node) fn single(&self) -> Option<&'a Ident> { + self.single + } + + #[inline(always)] + pub(in crate::node) fn local(&self) -> Option<(&'a Ident, &'a Ident)> { + self.local + } + + #[inline(always)] + pub(in crate::node) fn global(&self) -> &Map<&'a Ident, SynchronizationAction> { + &self.global + } + + pub(in crate::node) fn new(variant: &'a NodeVariant, builder: &'a Builder) -> Self { + let mut single; + let mut local; + + match variant.kind() { + VariantKind::Unspecified(..) | VariantKind::Root(..) => { + single = None; + local = None; + } + VariantKind::Comment(..) | VariantKind::Sentence(..) => { + let variant_local = variant.synchronization(); + + single = variant_local.close(); + + local = match (variant_local.open(), variant_local.close()) { + (Some(open), Some(close)) => Some((open, close)), + _ => None, + }; + } + }; + + let mut global; + + match variant.kind() { + VariantKind::Unspecified(..) | VariantKind::Comment(..) => { + global = Map::empty(); + local = None; + } + + VariantKind::Root(..) | VariantKind::Sentence(..) => { + let synchronization_map = builder.synchronization(); + + let mut local_found = false; + let mut states = 1..; + global = Map::with_capacity(synchronization_map.len() * 2); + + for (from, to) in synchronization_map { + if Some((from, to)) == local { + local_found = true; + } + + if single == Some(from) || single == Some(to) { + single = None; + } + + let state = states + .next() + .expect("Internal error. State generate exceeded."); + + let outer = match &single { + Some(delimiter) if *delimiter == from => false, + + _ => true, + }; + + let _ = global.insert(from, SynchronizationAction::Push { state, outer }); + + let _ = global.insert(to, SynchronizationAction::Pop { state, outer: true }); + } + + if !local_found { + local = None; + } + } + }; + + Self { + single, + local, + global, + } + } +} + +#[derive(Clone)] +pub(in crate::node) enum SynchronizationAction { + Push { state: usize, outer: bool }, + Pop { state: usize, outer: bool }, +} diff --git a/work/crates/derive/src/node/compiler/function.rs b/work/crates/derive/src/node/compiler/function.rs new file mode 100644 index 0000000..835965e --- /dev/null +++ b/work/crates/derive/src/node/compiler/function.rs @@ -0,0 +1,863 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{collections::VecDeque, ops::RangeFrom}; + +use proc_macro2::{Ident, TokenStream}; + +use crate::{ + node::{ + automata::{scope::SyntaxState, variables::VariableMap, NodeAutomata}, + builder::kind::VariantKind, + compiler::{ + delimiters::{PanicDelimiters, SynchronizationAction}, + inserts::InsertRecovery, + transitions::{TransitionsVector, TransitionsVectorImpl}, + Compiler, + }, + regex::terminal::Terminal, + }, + utils::{Map, PredictableCollection, Set, SetImpl}, +}; + +pub(in crate::node) struct Function<'a, 'b> { + exclude_skips: bool, + synchronization_context: bool, + context_name: &'a str, + compiler: &'a Compiler<'b>, + automata: &'a NodeAutomata, + delimiters: PanicDelimiters<'a>, + variables: &'a VariableMap, + pending: VecDeque<&'a SyntaxState>, + visited: Set<&'a SyntaxState>, + transitions: Vec, + state_map: Map<&'a SyntaxState, usize>, + state_generator: RangeFrom, +} + +impl<'a, 'b> Function<'a, 'b> { + pub(in crate::node) fn compile_skip_function( + compiler: &'a mut Compiler<'b>, + ) -> Option { + let core = compiler.facade().core_crate(); + let unreachable = compiler.facade().unreachable(); + + let node_type = compiler.node_type(); + let code_lifetime = compiler.generics().code_lifetime(); + let outer_lifetime = compiler.generics().outer_lifetime(); + let function_impl_generics = compiler.generics().function_impl_generics(); + let function_where_clause = compiler.generics().function_where_clause(); + + let variables = VariableMap::default(); + let delimiters = PanicDelimiters::default(); + + let transitions = { + let automata = match compiler.builder().skip_automata() { + None => return None, + Some(automata) => automata, + }; + + let mut function = Function { + exclude_skips: false, + synchronization_context: true, + context_name: "Skip", + compiler, + automata, + delimiters, + variables: &variables, + pending: VecDeque::from([&automata.start]), + visited: Set::new([&automata.start]), + transitions: Vec::with_capacity(automata.transitions.len() * 3), + state_map: Map::empty(), + state_generator: 1.., + }; + + loop { + if let Some(state) = function.pending.pop_front() { + function.handle_state(state); + + continue; + } + + break; + } + + function.transitions + }; + + Some(quote! { + #[inline] + #[allow(unused_mut)] + #[allow(unused_labels)] + #[allow(unused_assignments)] + #[allow(unused_variables)] + fn skip #function_impl_generics( + session: &mut impl #core::syntax::SyntaxSession< + #code_lifetime, + Node = #node_type, + >, + ) + #function_where_clause + { + let mut state = 1usize; + let mut start = #core::lexis::TokenCursor::site_ref(session, 0); + + #outer_lifetime: loop { + match (state, #core::lexis::TokenCursor::token(session, 0)) { + #( #transitions )* + + _ => #unreachable("Unknown state."), + } + } + } + }) + } + + pub(in crate::node) fn compile_variant_function( + compiler: &'a mut Compiler<'b>, + variant_name: &'a Ident, + ) { + let core = compiler.facade().core_crate(); + let option = compiler.facade().option(); + let convert = compiler.facade().convert(); + let unreachable = compiler.facade().unreachable(); + + let variant = compiler.builder().variant(variant_name); + + match variant.kind() { + VariantKind::Unspecified(..) => return, + _ => (), + } + + let kind = compiler.kind_of(variant_name); + let function_name = compiler.function_of(variant_name); + let node_type = compiler.node_type(); + let code_lifetime = compiler.generics().code_lifetime(); + let outer_lifetime = compiler.generics().outer_lifetime(); + let function_impl_generics = compiler.generics().function_impl_generics(); + let function_where_clause = compiler.generics().function_where_clause(); + let variables = variant.variables(); + + let init_variables = variables.init(compiler); + let result = variant.constructor().compile(compiler, variant_name); + + let context_name = variant_name.to_string(); + + let transitions = { + let exclude_skips = match variant.kind() { + VariantKind::Comment(..) => false, + _ => true, + }; + + let synchronization_context = variant.is_global_synchronization(); + + let automata = variant.automata(); + + let delimiters = PanicDelimiters::new(variant, compiler.builder()); + + let mut function = Function { + exclude_skips, + synchronization_context, + context_name: &context_name, + compiler, + automata, + delimiters, + variables, + pending: VecDeque::from([&automata.start]), + visited: Set::empty(), + transitions: Vec::with_capacity(automata.transitions.len() * 3), + state_map: Map::empty(), + state_generator: 1.., + }; + + loop { + if let Some(state) = function.pending.pop_front() { + function.handle_state(state); + + continue; + } + + break; + } + + function.transitions + }; + + let end_of_input_check = match variant.kind() { + VariantKind::Root(..) => { + let error_type = compiler.builder().error_type(); + + Some(quote! { + if let #option::Some(_) = #core::lexis::TokenCursor::token(session, 0) { + start = #core::lexis::TokenCursor::site_ref(session, 0); + let end = #core::lexis::TokenCursor::end_site_ref(session); + + let _ = #core::syntax::SyntaxSession::error( + session, + <#error_type as #convert::<#core::syntax::SyntaxError>>::from( + #core::syntax::SyntaxError::UnexpectedEndOfInput { + span: start..end, + context: #context_name, + } + ), + ); + } + }) + } + + _ => None, + }; + + let body = quote! { + #[allow(non_snake_case)] + #[allow(unused_mut)] + #[allow(unused_labels)] + #[allow(unused_assignments)] + #[allow(unused_variables)] + fn #function_name #function_impl_generics( + session: &mut impl #core::syntax::SyntaxSession< + #code_lifetime, + Node = #node_type, + >, + ) -> #node_type + #function_where_clause + { + let mut state = 1usize; + let mut start = #core::lexis::TokenCursor::site_ref(session, 0); + + #init_variables + + #outer_lifetime: loop { + match (state, #core::lexis::TokenCursor::token(session, 0)) { + #( #transitions )* + + _ => #unreachable("Unknown state."), + } + } + + #end_of_input_check + + #result + } + }; + + compiler.add_function(kind, body); + } + + fn handle_state(&mut self, state: &'a SyntaxState) { + let core = self.compiler.facade().core_crate(); + let option = self.compiler.facade().option(); + + let from_name = self.name_of(state); + let token_type = self.compiler.builder().token_type(); + let mut outgoing = TransitionsVector::outgoing(self.automata, state); + + for (_, through, to) in &outgoing { + let is_final = !self.has_outgoing(to); + let is_looping = state == to; + + let (finalize, set_state) = match (is_final, is_looping) { + (true, _) => (Some(quote! { break; }), None), + + (false, true) => (None, None), + + (false, false) => { + let to_name = self.name_of(to); + + (None, Some(quote! { state = #to_name; })) + } + }; + + let set_start = match is_final || through.is_skip(self.compiler.builder()) { + true => None, + + false => Some(quote! { + start = #core::lexis::TokenCursor::site_ref(session, 0); + }), + }; + + match through { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name, capture } => { + let write = match capture { + None => None, + Some(name) => Some(self.variables.get(name).write( + self.compiler.facade(), + quote! { + #core::lexis::TokenCursor::token_ref(session, 0) + }, + )), + }; + + self.transitions.push(quote! { + (#from_name, #option::Some(#token_type::#name { .. })) => { + #set_state + #write + let _ = #core::lexis::TokenCursor::advance(session); + #set_start + #finalize + } + }); + } + + Terminal::Node { name, capture } => { + let kind = self.compiler.kind_of(name); + + let descend = match capture { + None => { + quote! { + let _ = #core::syntax::SyntaxSession::descend(session, &#kind); + } + } + + Some(name) => self.variables.get(name).write( + self.compiler.facade(), + quote! { + #core::syntax::SyntaxSession::descend(session, &#kind) + }, + ), + }; + + let leftmost = self.compiler.builder().variant(name).leftmost(); + + for token in leftmost.tokens() { + self.transitions.push(quote! { + (#from_name, #option::Some(#token_type::#token { .. })) => { + #set_state + #descend + #set_start + #finalize + } + }); + } + } + } + + if !self.visited.contains(to) { + let _ = self.visited.insert(to); + + if !is_final { + self.pending.push_back(to); + } + } + } + + match self.automata.finish.contains(state) { + true => { + self.transitions.push(quote! { + (#from_name, _) => { + break; + } + }); + } + + false => { + if self.exclude_skips { + outgoing = outgoing.filter_skip(self.compiler.builder()); + } + + self.insert_recover(state, &outgoing); + self.panic_recovery(state, &outgoing); + } + } + } + + fn insert_recover(&mut self, state: &'a SyntaxState, outgoing: &TransitionsVector<'a>) { + let core = self.compiler.facade().core_crate(); + let option = self.compiler.facade().option(); + let convert = self.compiler.facade().convert(); + + let token_type = self.compiler.builder().token_type(); + let error_type = self.compiler.builder().error_type(); + let context_name = self.context_name; + + let from_name = self.name_of(state); + + let recovery = InsertRecovery::prepare(self.compiler.builder(), self.automata, &outgoing); + + for insert in recovery { + let error = match insert.expected_terminal() { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name, .. } => { + let token = name.to_string(); + + quote! { + let _ = #core::syntax::SyntaxSession::error( + session, + <#error_type as #convert::<#core::syntax::SyntaxError>>::from( + #core::syntax::SyntaxError::MissingToken { + span: start..start, + context: #context_name, + token: #token, + } + ), + ); + } + } + + Terminal::Node { name, .. } => { + let rule = name.to_string(); + + quote! { + let _ = #core::syntax::SyntaxSession::error( + session, + <#error_type as #convert::<#core::syntax::SyntaxError>>::from( + #core::syntax::SyntaxError::MissingRule { + span: start..start, + context: #context_name, + rule: #rule, + } + ), + ); + } + } + }; + + let is_final = !self.has_outgoing(insert.destination_state()); + let is_looping = state == insert.destination_state(); + + let (finalize, set_state) = match (is_final, is_looping) { + (true, _) => (Some(quote! { break; }), None), + + (false, true) => (None, None), + + (false, false) => { + let destination_name = self.name_of(insert.destination_state()); + + (None, Some(quote! { state = #destination_name; })) + } + }; + + let set_start = match is_final + || insert + .destination_terminal() + .is_skip(self.compiler.builder()) + { + true => None, + + false => Some(quote! { + start = #core::lexis::TokenCursor::site_ref(session, 0); + }), + }; + + let insertion = match insert.expected_terminal().capture() { + None => None, + + Some(capture) => self + .variables + .get(capture) + .insert(self.compiler.facade(), insert.expected_terminal()), + }; + + let reading = match insert.destination_terminal() { + Terminal::Null => unreachable!("Automata with null transition"), + + Terminal::Token { capture, .. } => { + let write = match capture { + None => None, + Some(name) => Some(self.variables.get(name).write( + self.compiler.facade(), + quote! { + #core::lexis::TokenCursor::token_ref(session, 0) + }, + )), + }; + + quote! { + #write + let _ = #core::lexis::TokenCursor::advance(session); + } + } + + Terminal::Node { name, capture } => { + let kind = self.compiler.kind_of(name); + + match capture { + None => { + quote! { + let _ = #core::syntax::SyntaxSession::descend(session, &#kind); + } + } + + Some(name) => self.variables.get(name).write( + self.compiler.facade(), + quote! { + #core::syntax::SyntaxSession::descend(session, &#kind) + }, + ), + } + } + }; + + let matching = insert.matching(); + + self.transitions.push(quote! { + (#from_name, #option::Some(#token_type::#matching { .. })) => { + #error + #insertion + #set_state + #reading + #set_start + #finalize + } + }); + } + } + + fn panic_recovery(&mut self, state: &'a SyntaxState, outgoing: &TransitionsVector<'a>) { + let core = self.compiler.facade().core_crate(); + let option = self.compiler.facade().option(); + let vec = self.compiler.facade().vec(); + let convert = self.compiler.facade().convert(); + + let token_type = self.compiler.builder().token_type(); + let error_type = self.compiler.builder().error_type(); + let outer_lifetime = self.compiler.generics().outer_lifetime(); + let context_name = self.context_name; + + let from_name = self.name_of(state); + + let error = { + let (expected_tokens, expected_rules) = outgoing.split_terminals(); + + let expected_tokens_len = expected_tokens.len(); + let expected_rules_len = expected_rules.len(); + + quote! { + let _ = #core::syntax::SyntaxSession::error( + session, + <#error_type as #convert::<#core::syntax::SyntaxError>>::from( + #core::syntax::SyntaxError::Mismatch { + span: start..end, + context: #context_name, + expected_tokens: <#vec<&'static str> as #convert::<[&'static str; #expected_tokens_len]>>::from( + [#( #expected_tokens ),*] + ), + expected_rules: <#vec<&'static str> as #convert::<[&'static str; #expected_rules_len]>>::from( + [#( #expected_rules ),*] + ), + } + ), + ); + } + }; + + let mut panic_transitions = + Vec::with_capacity(outgoing.len() + self.delimiters.global().len() + 2); + + for (_, through, _) in outgoing { + match through { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name, .. } => { + panic_transitions.push(self.handle_panic_expected( + &self.delimiters, + &error, + name, + )); + } + + Terminal::Node { name, .. } => { + let leftmost = self.compiler.builder().variant(name).leftmost(); + + for token in leftmost.tokens() { + panic_transitions.push(self.handle_panic_expected( + &self.delimiters, + &error, + token, + )); + } + } + } + } + + match self.delimiters.single() { + None => (), + + Some(delimiter) => match self.delimiters.global().is_empty() { + true => panic_transitions.push(quote! { + #option::Some(#token_type::#delimiter { .. }) => { + let _ = #core::lexis::TokenCursor::advance(session); + end = #core::lexis::TokenCursor::site_ref(session, 0); + #error + break #outer_lifetime; + } + }), + + false => panic_transitions.push(quote! { + #option::Some(#token_type::#delimiter { .. }) => { + match #vec::is_empty(&synchronization_stack) { + false => { + let _ = #core::lexis::TokenCursor::advance(session); + end = #core::lexis::TokenCursor::site_ref(session, 0); + } + + true => { + let _ = #core::lexis::TokenCursor::advance(session); + end = #core::lexis::TokenCursor::site_ref(session, 0); + #error + break #outer_lifetime; + } + } + } + }), + }, + } + + for (token, action) in self.delimiters.global() { + match action { + SynchronizationAction::Push { state, outer } if *outer => { + panic_transitions.push(quote! { + #option::Some(#token_type::#token { .. }) => { + #vec::push(&mut synchronization_stack, #state); + let _ = #core::lexis::TokenCursor::advance(session); + end = #core::lexis::TokenCursor::site_ref(session, 0); + } + }); + } + + SynchronizationAction::Pop { state, outer } if *outer => { + let synchronization = + match (self.synchronization_context, self.delimiters.local()) { + (true, Some((open, close))) if close != *token => { + quote! { + let mut balance = 1usize; + + for lookahead in 1usize.. { + match #core::lexis::TokenCursor::token(session, lookahead) { + #option::Some(#token_type::#open { .. }) => { + balance += 1usize; + } + + #option::Some(#token_type::#close { .. }) => { + balance -= 1usize; + + if balance == 0usize { + break; + } + } + + #option::Some(_) => (), + + #option::None => break, + } + } + + match balance { + 0usize => { + let _ = #core::lexis::TokenCursor::advance(session); + end = #core::lexis::TokenCursor::site_ref(session, 0); + + break; + } + _ => { + #error + break #outer_lifetime; + } + } + } + } + + (true, _) => { + quote! { + let _ = #core::lexis::TokenCursor::advance(session); + end = #core::lexis::TokenCursor::site_ref(session, 0); + + break; + } + } + + (false, _) => quote! { + #error + break #outer_lifetime; + }, + }; + + panic_transitions.push(quote! { + #option::Some(#token_type::#token { .. }) => { + loop { + match synchronization_stack.pop() { + #option::None => { + #synchronization + }, + + #option::Some(top) => { + if top != #state { + continue; + } + }, + } + } + } + }); + } + + _ => (), + } + } + + panic_transitions.push(quote! { + #option::Some(_) => { + let _ = #core::lexis::TokenCursor::advance(session); + end = #core::lexis::TokenCursor::site_ref(session, 0); + } + }); + + panic_transitions.push(quote! { + #option::None => { + #error + break #outer_lifetime; + } + }); + + let init_synchronization = match self.delimiters.global().is_empty() { + true => None, + + false => Some(quote! { + let mut synchronization_stack = #vec::::new(); + }), + }; + + let skip = match self.exclude_skips { + false => None, + + true => Some(quote! { + skip(session); + }), + }; + + self.transitions.push(quote! { + (#from_name, _) => { + start = #core::lexis::TokenCursor::site_ref(session, 0); + let mut end = start; + + #init_synchronization + + loop { + match #core::lexis::TokenCursor::token(session, 0) { + #( #panic_transitions )* + } + + #skip + } + } + }); + } + + fn handle_panic_expected( + &self, + delimiters: &PanicDelimiters, + error: &TokenStream, + expected: &Ident, + ) -> TokenStream { + let core = self.compiler.facade().core_crate(); + let option = self.compiler.facade().option(); + let vec = self.compiler.facade().vec(); + + let token_type = self.compiler.builder().token_type(); + + match delimiters.global().get(expected) { + None => { + quote! { + #option::Some(#token_type::#expected { .. }) => { + #error + break; + } + } + } + + Some(SynchronizationAction::Push { state, .. }) => { + quote! { + #option::Some(#token_type::#expected { .. }) => { + match #vec::is_empty(&synchronization_stack) { + false => { + #vec::push(&mut synchronization_stack, #state); + let _ = #core::lexis::TokenCursor::advance(session); + end = #core::lexis::TokenCursor::site_ref(session, 0); + }, + + true => { + #error + break; + } + } + } + } + } + + Some(SynchronizationAction::Pop { state, .. }) => { + quote! { + #option::Some(#token_type::#expected { .. }) => { + match #vec::pop(&mut synchronization_stack) { + #option::Some(top) if top == #state => { + let _ = #core::lexis::TokenCursor::advance(session); + end = #core::lexis::TokenCursor::site_ref(session, 0); + }, + + _ => { + #error + break; + } + } + } + } + } + } + } + + #[inline] + fn has_outgoing(&self, state: &SyntaxState) -> bool { + for (from, _, _) in &self.automata.transitions { + if from == state { + return true; + } + } + + return false; + } + + #[inline(always)] + fn name_of(&mut self, state: &'a SyntaxState) -> usize { + *self.state_map.entry(state).or_insert_with(|| { + self.state_generator + .next() + .expect("Internal error. State generator exceeded.") + }) + } +} diff --git a/work/crates/derive/src/node/compiler/generics.rs b/work/crates/derive/src/node/compiler/generics.rs new file mode 100644 index 0000000..142159e --- /dev/null +++ b/work/crates/derive/src/node/compiler/generics.rs @@ -0,0 +1,172 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Span; +use syn::{ + punctuated::Punctuated, + GenericParam, + Generics, + ImplGenerics, + Lifetime, + LifetimeDef, + TypeGenerics, + WhereClause, +}; + +pub(in crate::node) struct GenericsSplit { + node_generics: Generics, + function_generics: Generics, + code_lifetime: Lifetime, + outer_lifetime: Lifetime, +} + +impl GenericsSplit { + #[inline(always)] + pub(in crate::node) fn node_impl_generics(&self) -> ImplGenerics<'_> { + let (impl_generics, _, _) = self.node_generics.split_for_impl(); + + impl_generics + } + + #[inline(always)] + pub(in crate::node) fn node_type_generics(&self) -> TypeGenerics<'_> { + let (_, type_generics, _) = self.node_generics.split_for_impl(); + + type_generics + } + + #[inline(always)] + pub(in crate::node) fn node_where_clause(&self) -> Option<&WhereClause> { + let (_, _, where_clause) = self.node_generics.split_for_impl(); + + where_clause + } + #[inline(always)] + pub(in crate::node) fn function_impl_generics(&self) -> ImplGenerics<'_> { + let (impl_generics, _, _) = self.function_generics.split_for_impl(); + + impl_generics + } + + #[inline(always)] + #[allow(unused)] + pub(in crate::node) fn function_type_generics(&self) -> TypeGenerics<'_> { + let (_, type_generics, _) = self.function_generics.split_for_impl(); + + type_generics + } + + #[inline(always)] + pub(in crate::node) fn function_where_clause(&self) -> Option<&WhereClause> { + let (_, _, where_clause) = self.function_generics.split_for_impl(); + + where_clause + } + + #[inline(always)] + pub(in crate::node) fn code_lifetime(&self) -> &Lifetime { + &self.code_lifetime + } + + #[inline(always)] + pub(in crate::node) fn outer_lifetime(&self) -> &Lifetime { + &self.outer_lifetime + } +} + +impl GenericsExt for Generics { + fn to_split(&self) -> GenericsSplit { + let node_generics = self.clone(); + + let code_lifetime = { + let mut candidate = String::from("'code"); + + 'outer: loop { + for lifetime_def in self.lifetimes() { + if candidate == lifetime_def.lifetime.ident.to_string() { + candidate.push('_'); + continue 'outer; + } + } + + break; + } + + Lifetime::new(candidate.as_str(), Span::call_site()) + }; + + let outer_lifetime = { + let mut candidate = String::from("'outer"); + + 'outer: loop { + for lifetime_def in self.lifetimes() { + if candidate == lifetime_def.lifetime.ident.to_string() { + candidate.push('_'); + continue 'outer; + } + } + + break; + } + + Lifetime::new(candidate.as_str(), Span::call_site()) + }; + + let mut function_generics = self.clone(); + + function_generics.params.insert( + 0, + GenericParam::Lifetime(LifetimeDef { + attrs: Vec::new(), + lifetime: code_lifetime.clone(), + colon_token: None, + bounds: Punctuated::new(), + }), + ); + + GenericsSplit { + node_generics, + function_generics, + code_lifetime, + outer_lifetime, + } + } +} + +pub(in crate::node) trait GenericsExt { + fn to_split(&self) -> GenericsSplit; +} diff --git a/work/crates/derive/src/node/compiler/inserts.rs b/work/crates/derive/src/node/compiler/inserts.rs new file mode 100644 index 0000000..26c8ddc --- /dev/null +++ b/work/crates/derive/src/node/compiler/inserts.rs @@ -0,0 +1,187 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::vec::IntoIter; + +use proc_macro2::Ident; + +use crate::{ + node::{ + automata::{scope::SyntaxState, NodeAutomata}, + builder::Builder, + compiler::transitions::{TransitionsVector, TransitionsVectorImpl}, + regex::terminal::Terminal, + }, + utils::{PredictableCollection, Set}, +}; + +pub(in crate::node) struct Insert<'a> { + matching: &'a Ident, + expected_terminal: &'a Terminal, + destination_terminal: &'a Terminal, + destination_state: &'a SyntaxState, +} + +impl<'a> Insert<'a> { + #[inline(always)] + pub(in crate::node) fn matching(&self) -> &'a Ident { + self.matching + } + + #[inline(always)] + pub(in crate::node) fn expected_terminal(&self) -> &'a Terminal { + self.expected_terminal + } + + #[inline(always)] + pub(in crate::node) fn destination_terminal(&self) -> &'a Terminal { + self.destination_terminal + } + + #[inline(always)] + pub(in crate::node) fn destination_state(&self) -> &'a SyntaxState { + self.destination_state + } +} + +pub(in crate::node) struct InsertRecovery<'a> { + forbidden: Set<&'a Ident>, + inserts: Vec>, +} + +impl<'a> IntoIterator for InsertRecovery<'a> { + type Item = Insert<'a>; + type IntoIter = IntoIter>; + + #[inline(always)] + fn into_iter(self) -> Self::IntoIter { + self.inserts.into_iter() + } +} + +impl<'a> InsertRecovery<'a> { + pub(in crate::node) fn prepare( + builder: &'a Builder, + automata: &'a NodeAutomata, + outgoing: &TransitionsVector<'a>, + ) -> Self { + let mut recovery = Self { + forbidden: Set::with_capacity(outgoing.len()), + inserts: Vec::new(), + }; + + for (_, through, _) in outgoing { + match through { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name, .. } => { + let _ = recovery.forbidden.insert(name); + } + + Terminal::Node { name, .. } => { + let leftmost = builder.variant(name).leftmost(); + + for token in leftmost.tokens() { + let _ = recovery.forbidden.insert(token); + } + } + } + } + + for (_, expected_terminal, expected_state) in outgoing { + let destination = + TransitionsVector::outgoing(automata, expected_state).filter_skip(builder); + + for (_, destination_terminal, destination_state) in destination { + match destination_terminal { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name: matching, .. } => { + if !recovery.forbid(matching) { + recovery.inserts.push(Insert { + matching, + expected_terminal, + destination_terminal, + destination_state, + }); + } + } + + Terminal::Node { name, .. } => { + let leftmost = builder.variant(name).leftmost(); + + for matching in leftmost.tokens() { + if !recovery.forbid(matching) { + recovery.inserts.push(Insert { + matching, + expected_terminal, + destination_terminal, + destination_state, + }); + } + } + } + } + } + } + + recovery + } + + fn forbid(&mut self, matching: &'a Ident) -> bool { + if self.forbidden.contains(matching) { + return true; + } + + let mut found = false; + + self.inserts.retain(|insert| { + if insert.matching == matching { + found = true; + return false; + } + + true + }); + + if found { + let _ = self.forbidden.insert(matching); + } + + found + } +} diff --git a/work/crates/derive/src/node/compiler/mod.rs b/work/crates/derive/src/node/compiler/mod.rs new file mode 100644 index 0000000..184e935 --- /dev/null +++ b/work/crates/derive/src/node/compiler/mod.rs @@ -0,0 +1,228 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +pub(in crate::node) mod case; +pub(in crate::node) mod constructor; +pub(in crate::node) mod delimiters; +pub(in crate::node) mod function; +pub(in crate::node) mod generics; +pub(in crate::node) mod inserts; +pub(in crate::node) mod transitions; +pub(in crate::node) mod variables; + +use proc_macro2::{Ident, TokenStream}; + +use crate::{ + node::{ + builder::{kind::VariantKind, Builder}, + compiler::{ + function::Function, + generics::{GenericsExt, GenericsSplit}, + }, + }, + utils::{Facade, Map, PredictableCollection}, +}; + +pub(in crate::node) struct Compiler<'a> { + facade: Facade, + builder: &'a Builder, + node_type: TokenStream, + generics: GenericsSplit, + kind_map: Map<&'a Ident, usize>, + cases: Map, + functions: Map, +} + +impl<'a> Compiler<'a> { + pub(in crate::node) fn compile(builder: &'a Builder) -> TokenStream { + let node_name = builder.node_name(); + let token_type = builder.token_type(); + let error_type = builder.error_type(); + + let generics = builder.generics().to_split(); + + let node_type = { + let node_type_generics = generics.node_type_generics(); + let turbofish = node_type_generics.as_turbofish(); + + quote! { + #node_name #turbofish + } + }; + + let variants_count = builder.variants_count(); + + let kind_map = { + let mut kind = 0; + + builder + .into_iter() + .filter_map(|name| { + let variant = builder.variant(name); + + match variant.kind() { + VariantKind::Unspecified(..) => None, + + VariantKind::Root(..) => Some((name, 0)), + + VariantKind::Comment(..) | VariantKind::Sentence(..) => { + kind += 1; + + Some((name, kind)) + } + } + }) + .collect() + }; + + let mut compiler = Compiler { + facade: Facade::new(), + builder, + node_type, + generics, + kind_map, + cases: Map::with_capacity(variants_count), + functions: Map::with_capacity(variants_count), + }; + + for variant_name in builder { + Function::compile_case(&mut compiler, variant_name); + Function::compile_variant_function(&mut compiler, variant_name); + } + + let skip = Function::compile_skip_function(&mut compiler); + + let node_impl_generics = compiler.generics.node_impl_generics(); + let node_type_generics = compiler.generics.node_type_generics(); + let node_where_clause = compiler.generics.node_where_clause(); + let code_lifetime = compiler.generics.code_lifetime(); + + let cases = { + let mut cases = compiler.cases.into_iter().collect::>(); + + cases.sort_by(|a, b| a.0.cmp(&b.0)); + + cases.into_iter().map(|(_, body)| body) + }; + + let functions = { + let mut functions = compiler.functions.into_iter().collect::>(); + + functions.sort_by(|a, b| a.0.cmp(&b.0)); + + functions.into_iter().map(|(_, body)| body) + }; + + let core = compiler.facade.core_crate(); + let unimplemented = compiler.facade.unimplemented(); + + quote! { + impl #node_impl_generics #core::syntax::Node for #node_name #node_type_generics + #node_where_clause + { + type Token = #token_type; + type Error = #error_type; + + #[inline(always)] + fn new<#code_lifetime>( + rule: #core::syntax::SyntaxRule, + session: &mut impl #core::syntax::SyntaxSession<#code_lifetime, Node = Self>, + ) -> Self + { + #( #functions )* + + #skip + + match rule { + #( #cases, )* + + other => #unimplemented("Unsupported rule {}.", other), + } + } + } + } + } + + #[inline(always)] + pub(in crate::node) fn facade(&self) -> &Facade { + &self.facade + } + + #[inline(always)] + pub(in crate::node) fn builder(&self) -> &Builder { + &self.builder + } + + #[inline(always)] + pub(in crate::node) fn generics(&self) -> &GenericsSplit { + &self.generics + } + + #[inline(always)] + pub(in crate::node) fn node_type(&self) -> &TokenStream { + &self.node_type + } + + #[inline(always)] + pub(in crate::node) fn add_case(&mut self, kind: usize, body: TokenStream) { + assert!( + self.cases.insert(kind, body).is_none(), + "internal error. Duplicate case.", + ); + } + + #[inline(always)] + pub(in crate::node) fn add_function(&mut self, kind: usize, body: TokenStream) { + assert!( + self.functions.insert(kind, body).is_none(), + "internal error. Duplicate function.", + ); + } + + #[inline(always)] + pub(in crate::node) fn kind_of(&self, variant_name: &Ident) -> usize { + *self + .kind_map + .get(variant_name) + .expect("Internal error. Missing variant kind.") + } + + #[inline(always)] + pub(in crate::node) fn function_of(&self, variant_name: &Ident) -> Ident { + Ident::new(&format!("parse_{}", variant_name), variant_name.span()) + } +} diff --git a/work/crates/derive/src/node/compiler/transitions.rs b/work/crates/derive/src/node/compiler/transitions.rs new file mode 100644 index 0000000..034868e --- /dev/null +++ b/work/crates/derive/src/node/compiler/transitions.rs @@ -0,0 +1,120 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::cmp::Ordering; + +use crate::node::{ + automata::{scope::SyntaxState, NodeAutomata}, + builder::{kind::VariantKind, Builder}, + regex::terminal::Terminal, +}; + +pub(in crate::node) type TransitionsVector<'a> = Vec<&'a (SyntaxState, Terminal, SyntaxState)>; + +impl<'a> TransitionsVectorImpl<'a> for TransitionsVector<'a> { + fn outgoing(automata: &'a NodeAutomata, state: &'a SyntaxState) -> Self { + let mut outgoing = automata + .transitions + .iter() + .filter(|(from, _, _)| from == state) + .collect::>(); + + outgoing.sort_by(|a, b| { + if a.2 < b.2 { + return Ordering::Less; + } + + if a.2 > b.2 { + return Ordering::Greater; + } + + if a.1 < b.1 { + return Ordering::Less; + } + + if a.1 > b.1 { + return Ordering::Greater; + } + + Ordering::Equal + }); + + outgoing + } + + fn filter_skip(self, builder: &Builder) -> Self { + let skip_tokens = builder.skip_leftmost().tokens(); + + self.into_iter() + .filter(|(_, through, _)| match through { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name, .. } => !skip_tokens.contains(name), + + Terminal::Node { name, .. } => match builder.variant(name).kind() { + VariantKind::Comment(..) => false, + _ => true, + }, + }) + .collect() + } + + fn split_terminals(&self) -> (Vec, Vec) { + let mut tokens = Vec::with_capacity(self.len()); + let mut nodes = Vec::with_capacity(self.len()); + + for (_, through, _) in self { + match through { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { name, .. } => tokens.push(name.to_string()), + + Terminal::Node { name, .. } => nodes.push(name.to_string()), + } + } + + (tokens, nodes) + } +} + +pub(in crate::node) trait TransitionsVectorImpl<'a> { + fn outgoing(automata: &'a NodeAutomata, state: &'a SyntaxState) -> Self; + + fn filter_skip(self, builder: &Builder) -> Self; + + fn split_terminals(&self) -> (Vec, Vec); +} diff --git a/work/crates/derive/src/node/compiler/variables.rs b/work/crates/derive/src/node/compiler/variables.rs new file mode 100644 index 0000000..1c18878 --- /dev/null +++ b/work/crates/derive/src/node/compiler/variables.rs @@ -0,0 +1,160 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::{Ident, TokenStream}; + +use crate::{ + node::{ + automata::variables::{VariableKind, VariableMap, VariableMeta, VariableRepetition}, + compiler::Compiler, + regex::terminal::Terminal, + }, + utils::Facade, +}; + +impl VariableMap { + pub(in crate::node) fn init(&self, compiler: &Compiler<'_>) -> TokenStream { + let variables = self.into_iter().map(|name| { + let meta = self.get(name); + let capture_name = meta.capture_name(); + + let core = compiler.facade().core_crate(); + + match (&meta.kind(), &meta.repetition()) { + ( + VariableKind::TokenRef, + VariableRepetition::Single | VariableRepetition::Optional, + ) => { + quote! { + let mut #capture_name = #core::lexis::TokenRef::nil(); + } + } + + ( + VariableKind::NodeRef, + VariableRepetition::Single | VariableRepetition::Optional, + ) => { + quote! { + let mut #capture_name = #core::syntax::NodeRef::nil(); + } + } + + (VariableKind::TokenRef, VariableRepetition::Multiple) => { + let vec = compiler.facade().vec(); + + quote! { + let mut #capture_name = #vec::<#core::lexis::TokenRef>::with_capacity(1); + } + } + + (VariableKind::NodeRef, VariableRepetition::Multiple) => { + let vec = compiler.facade().vec(); + + quote! { + let mut #capture_name = #vec::<#core::syntax::NodeRef>::with_capacity(1); + } + } + } + }); + + quote! { + #( #variables )* + } + } +} + +impl VariableMeta { + pub(in crate::node) fn insert( + &self, + facade: &Facade, + terminal: &Terminal, + ) -> Option { + let variable = self.capture_name(); + + match self.repetition() { + VariableRepetition::Single | VariableRepetition::Optional => None, + + VariableRepetition::Multiple => { + let core = facade.core_crate(); + let vec = facade.vec(); + + match terminal { + Terminal::Null => unreachable!("Automata with null transition."), + + Terminal::Token { .. } => Some(quote! { + #vec::push(&mut #variable, #core::lexis::TokenRef::nil()); + }), + + Terminal::Node { .. } => Some(quote! { + #vec::push(&mut #variable, #core::syntax::NodeRef::nil()); + }), + } + } + } + } + + pub(in crate::node) fn write(&self, facade: &Facade, value: TokenStream) -> TokenStream { + let variable = self.capture_name(); + + match self.repetition() { + VariableRepetition::Single | VariableRepetition::Optional => { + quote! { + #variable = #value; + } + } + + VariableRepetition::Multiple => { + let vec = facade.vec(); + + quote! { + #vec::push(&mut #variable, #value); + } + } + } + } + + pub(in crate::node) fn read(&self) -> TokenStream { + let variable = self.capture_name(); + + quote! { #variable } + } + + #[inline(always)] + fn capture_name(&self) -> Ident { + Ident::new(&format!("capture_{}", self.name()), self.name().span()) + } +} diff --git a/work/crates/derive/src/node/mod.rs b/work/crates/derive/src/node/mod.rs new file mode 100644 index 0000000..3bc3db5 --- /dev/null +++ b/work/crates/derive/src/node/mod.rs @@ -0,0 +1,68 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +mod automata; +mod builder; +mod compiler; +mod regex; + +use syn::{ + parse::{Parse, ParseStream, Result}, + DeriveInput, +}; + +use crate::node::{builder::Builder, compiler::Compiler}; + +pub struct Node { + builder: Builder, +} + +impl Parse for Node { + #[inline(always)] + fn parse(input: ParseStream) -> Result { + Ok(Self { + builder: Builder::try_from(&input.parse::()?)?, + }) + } +} + +impl From for proc_macro::TokenStream { + #[inline(always)] + fn from(node: Node) -> Self { + Compiler::compile(&node.builder).into() + } +} diff --git a/work/crates/derive/src/node/readme.md b/work/crates/derive/src/node/readme.md new file mode 100644 index 0000000..dc1de91 --- /dev/null +++ b/work/crates/derive/src/node/readme.md @@ -0,0 +1,543 @@ +A derive macro of the Node trait to construct Syntax Parser using a set of +context-free LL(1) grammar rules. + +This macro implements a [Node](::lady_deirdre::syntax::Node) trait for the +Rust enum type. + +An API user specifies Node parse rules directly on enum variants through the +macro attributes. The macro analyses these rules validity in compile-time and +constructs run-time optimized and error-resistant +[`LL(1) parser`](https://en.wikipedia.org/wiki/LL_parser) +of the [Token](::lady_deirdre::lexis::Token) sequences as a Node trait +implementation. + +In case of invalid definitions or misuse the macro throws descriptive +compile-time errors to the macro programmer. + +## Grammar Specification. + +Derive macro application outline: + +```rust +#[derive(Node)] +#[token(MyToken)] // Specifies a Token type. +#[error(MyError)] // Specifies an Error type. +#[skip($Whitespace | $LineBreak)] // Tokens to be auto-skipped during parsing. +enum MyNode { + #[root] // An entry-point Rule. + #[rule(foos: Foo*)] + MyRoot { + foos: Vec + }, + + // A Regular Rule. + #[rule($Foo & field_1: $Bar & field_2: AnotherRule+)] + Foo { + field_1: TokenRef, + field_2: Vec, + }, + + // A Comment Rule. + #[comment] + #[rule($CommentStart & ($Whitespace | $Foo)* & $LineBreak?)] + Comment, + + // ... +} +``` + +Enum variants labeled with `#[rule()]` attributes specify the set +of LL(1) grammar rules. And the instances of these variants are products of +corresponding rule execution. The expression language is a regular-expression +alike language that could refer(possibly in recursive way) other rules. + +During the expression execution the parser is capable to track selected +Tokens and other referred rule Nodes, and to store their weak references +inside the Variant's product fields. This process called Capturing. A system +of Node Variants with captured weak references builds up an Abstract Syntax Tree +of the source code. + +As the LL(1) grammar cannot express +[left recursion](https://en.wikipedia.org/wiki/Left_recursion), an API user +cannot express common infix expressions("mathematical" expressions) with +operator precedence directly using this grammar language. To work around this +problem, it is assumed that the parser would perform a raw parsing of a sequence +of operands and operators ignoring operators' priority to be post processed +later on using +[Shunting Yard](https://en.wikipedia.org/wiki/Shunting_yard_algorithm) or +[Pratt](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing) +parsing algorithms. + +### Terms. + + - __Parsable Rule__. Any Enum variant labeled with `#[rule(...)]` attribute. + Such variants represent LL(1) Grammar rules and the Syntax Tree Nodes + produced by these rules. + - __Regular Rule__. A Parsable Rule. Defines a regular programming language syntax + component(e.g. a function, a class definition, a statement block, etc). + - __Root Rule__. A Parsable Rule. A grammar entry-point rule. + - __Comment Rule__. A Parsable Rule. Defines a syntax of a comment that could + implicitly appear at any site of the source code(except other Comments). + - __Skip Tokens__. A set of tokens to be ignored in the Regular and the Root + rules during parsing. Such tokens are not ignoring in the Comment Rules. + - __Expression__. A matcher of a sequence of Tokens. This is a body of + a Parsable Rule or an Inline Expression. + - __Inline Expression__. A named Expression to be inline by name directly + into any other Expression. + - __Reference__. A named reference of a Parsable Rule inside Expression. + - __Capturing__. An identifiable part of the expression that either matches + a Token or another Rule. The weak references into such Tokens or Rule's + Nodes will be stored in the Variant's named fields. + - __Leftmost Tokens__. A set of first Tokens of sequences of tokens + that could be parsed by specified Expression. + - __Rightmost Tokens__. A set of last Tokens of sequences of tokens + that could be parsed by specified Expression. + +### Expressions. + +Expression language is any combination of the following sentences that +fully recognizes a sequence of Tokens. + +| Sentence | Example | Description | +|:-------------------------|:-----------------|:--------------------------------------------------------------------------------------------| +| Token Match. | $Foo | Matches a single token. | +| Inline. | Foo | If referred Identifier is an Inline Expression, matches this Expression. | +| Reference. | Foo | If referred Identifier is a Parable Rule, descends into this Rule. | +| Group. | (Foo & $Bar) | In a pattern "(A)", sentence A matches. | +| Sequence Match. | Foo & $Bar | In a pattern "A & B", sentence A matches, and then sentence B matches. | +| Choice Match. | Foo | $Bar | In a pattern "A | B", either sentence A matches, or sentence B matches. | +| Zero or More Repetition. | $Foo* | In a pattern "A*", sentence A matches zero or more times. | +| Zero or More Repetition. | $Foo*{$Bar} | In a pattern "A*{B}", sentences A delimitered by B matches zero or more times. | +| One or More Repetition. | $Foo+ | In a pattern "A+", sentence A matches one or more times. | +| One or More Repetition. | $Foo+{$Bar} | In a pattern "A+{B}", sentences A delimitered by B matches one or more times. | +| Optional Match. | $Foo? | In a pattern "A?", sentence A fully matches or does not match at all. | +| Capture. | field_1: $Foo | In a pattern "id: A", matches a pattern of A, and stores matching result in the field "id". | + +For Binary operators such as Sequence Match(&) and Choice Match(|) +the Sequence Match have priority over the Choice Match. +Unary operators(*, +, ?, capturing) have priorities over the binary +operators. And the Group operator prioritizes anything inside the parenthesis. + +### Restrictions. + + 1. There is one and only one Root rule in the Grammar that is a Parser entry + point. + 2. The Root Rule cannot be referred by any other Rule. As such the Root Rule + is not recurrent. + 3. Any Regular Rule must be directly or indirectly referred by the Root Rule. + In other words, any Regular Rule must be distinguished. + 4. Referred Parsable Rule's Leftmost Tokens cannot conflict with another + Tokens in Expression in the same matching position. + + ```rust + enum MyNode { + #[rule($A)] + Foo1, + + #[rule($A | $B)] + Foo2, + + // Conflicts, because Foo1's leftmost token is $A. + #[rule($A | Foo1)] + Conflict1, + + // Conflicts, because both Foo1 and Foo2 could start with $A. + #[rule(Foo1 | Foo2)] + Conflict2, + + // This is fine, because Foo1 and Foo2 are in the different matching + // positions. + #[rule(Foo1 & Foo2)] + Ok, + } + ``` + 5. All Inline Expression names and Parsable Rules' variant names must be + unique. + 6. The Capturing variable inside a Rule expression cannot capture values of + different kinds. For example, Capturing Variable cannot capture Token + and Node at the same time. + + ```rust + enum MyNode { + #[rule($Foo)] + SomeNode, + + // Conflicts, because `capt_1` tries to capture Token and Node at + // the same time. + #[rule((capt_1: $SomeToken) & (capt_1: SomeNode))] + Conflict { capt_1: TokenRef }, + + // No conflict, `capt_1` and `capt_2` are two distinct variables. + #[rule((capt_1: $SomeToken) & (capt_2: SomeNode))] + Ok { capt_1: TokenRef, capt_2: NodeRef }, + } + ``` + 7. Capturing variable type must match variant field's type. + + ```rust + enum MyNode { + // `capt_1` captures a Token, not a Node. + #[rule(capt_1: $Foo)] + Error1 { capt_1: NodeRef }, + + #[rule(capt_1: $Foo)] + Ok1 { capt_1: TokenRef }, + + // `capt_1` captures a token multiple times. + #[rule(capt_1: $Foo+)] + Error2 { capt_1: TokenRef }, + + #[rule(capt_1: $Foo+)] + Ok2 { capt_1: Vec }, + + // Even though $Foo could be matched zero times this is still fine, + // because TokenRef could be a `TokenRef::nil()` reference. + #[rule($Bar & capt_1: $Foo?)] + Ok3 { capt_1: TokenRef }, + } + ``` + 8. Comment Rules cannot refer Parsable Rules. And Parsable Rules cannot refer + Comments directly. + 9. Skip Tokens cannot be matched in the Root Rule and Regular Rules + explicitly. But they can(and should) be matched inside Comment Rule + Expression. + 10. Regular and Comment Rules cannot match empty sequences of Tokens. + +## Error Recovery. + +The Macro constructs Syntax Parser with syntax errors recovery capabilities. + +Error Recovery is a heuristic process. There are two error recovery strategies: +Insert Mode and Panic Mode. The choice between two of them determined by +the Macro in every possible parsing situation preliminary during compile-time +static analysis of specified grammar. + +A particular Parsable Rule enters recovery mode when the next reading Token +does not fit any possibility specified by the rule's Expression in particular +parsing state. + +For example, if the rule with Expression `$Foo & $Bar & $Baz` tries to parse a +`[$Foo, $Baz]` sequence, it would successfully match the first Token, but then +fail on the second Token($Bar was expected, but $Baz found) entering error +recovery mode to fulfill this rule requirements. + +In the error recovery mode, if the Parser did not match required Capturing +variable, the corresponding Variant field will be set to Nil(`TokenRef::nil()` +or `NodeRef::nil()`). + +### Insert Mode. + +If the Rule has mismatched one specific Token, and this Token expected to be the +only possibility in particular parsing situation, and the next reading Token +is the Token expected to be matched after that missing one, the Parser +will ignore this mismatch as if the missing Token would be in place(virtually +"inserting" this Token). + +For example, for `$Foo & $Bar & $Baz` Expression the Parser would "insert" a +Token in the middle of the `[$Foo, $Baz]` sequence to fulfill Expression's +matching requirements. + +When the Insert Mode applicable it has a priority over the Panic Mode. + +### Panic Mode. + +If the syntax error cannot be recovered by the Insert Mode, the Panic Recovery +Mode takes place instead. + +In Panic Mode the Parser eagerly skips all incoming mismatched Tokens until +the possible expected Token found. + +For example, for the `$Foo & ($Bar | &Baz) & $Aaa` Expression and the +`[$Foo, $Bbb, $Ccc, $Bar, $Aaa]` input sequence, the Parser will skip the second +and the third Tokens, and then continues parsing process normally. + +In Panic Mode token skipping process is usually limited by a set of heuristic +contextual assumptions to prevent overuse of recovery strategy. If the Panic +Mode cannot fulfill Expression requirements the Parser leaves error recovery +mode earlier finishing corresponding Syntax Tree Node as it is, and normally +returns control flow to the ancestor's rule. + +There are three possibilities when the Parser could early finish Panic Mode: + 1. Rule's Expression unambiguously ends with the delimiter token(e.g. + a semicolon token in the end of the statement). If the parser encounters + such Token, the Panic Mode will be finished earlier. + 2. An API user has explicitly specified a set of Synchronization Rules(using + `#[synchronization]` attribute) to define a global set of synchronization + Token pairs. For example, in Rust code block tokens `{` and `}` would be + a good candidate of such global synchronization. In this case during the + Panic Recovery the Parser will count of nesting of such Tokens, and + it will early finish Panic Mode when the outer synchronization context + termination detected. + 3. If no more Tokens are left in the input sequence. + + +## Type-level attributes. + +These attributes meant to be bound with the Enum type. + +```rust +#[derive(Node)] +// Type-level attributes go here. +enum MyNode { + // .. +} +``` + + - ### Token Type. + + **Format:** `#[token()]`. + + Specifies a type of the Source Code tokens. `` must be an enum + type accessed from the current context, and it must implement a Token + trait. It is assumed that the `` would be derived by the + [Token](crate::Token) macro, but this is not a strict requirement. + + This attribute is **required**. + + - ### Syntax Error Type. + + **Format:** `#[error()]`. + + Specifies a type of the syntax error. This type must be accessed from the + current context, and it must implement a `From` trait. + In particular the `SyntaxError` itself fits this requirement. + + This attribute is **required**. + + - ### Skip Tokens. + + **Format:** `#[error()]`. + + Specifies a set of tokens to be auto-ignored in the Root and Regular + parsable rules. + + A Whitespace or a Line-break tokens are good candidates for Skip Tokens. + + This attribute is optional. + + - ### Inline Expression. + + **Format:** `#[define( = )]`. + + Defines a named inline expression. These expressions could be further + referred inside other regular expressions by `` (including Parsable + Rules and other Inline Expressions). + + The macro interprets such references as direct inlines of the + `` value. + + Inline expression must be defined before use. As such, inline expression + cannot define direct or indirect recursion. + + Inline expression is a mechanism to reuse of frequently repeated fragments + of expressions by name. + + Expression's name must be unique in the entire set of all Inline Expressions + and the enum variant names. + + This attribute is optional. + +## Variant-level attributes. + +These attributes meant to be bound with the Enum Variants. + +```rust +#[derive(Node)] +enum MyNode { + // Variant attributes go here. + Variant1, + + // Variant attributes go here. + Variant2, + + // ... +} +``` + + - ### Rule. + + **Format:** `#[rule()]`. + + Defines a Parsable Rule of the enum variant. + + This is an optional attribute, but an API user must define at least one + Parsable rule per Grammar, and to also label one Parsable Rule as a Root + Rule. + + - ### Root Rule. + + **Format:** `#[root]`. + + Specializes a Parsable Rule to be the Grammar entry-point rule. + + This attribute must be bound to the Enum Variant already labeled as a + [Parsable Rule](#rule), but which does not have any other specializations. + + - ### Comment. + + **Format:** `#[comment]`. + + Specializes a Parsable Rule to be the Comment Rule. + + This attribute must be bound to the Enum Variant already labeled as a + [Parsable Rule](#rule), but which does not have any other specializations. + + Similarly to [`Skip Tokens`](#skip-tokens), Comments could appear at any + place of any Root or Regular Rule. An API user doesn't have to refer them + explicitly. In contrast to Skip Tokens, the Comment Rule produces a + Syntax Tree Node instance. + + - ### Constructor. + + **Format:** `#[constructor((variable_1, variable_2, ...))]`. + + Specifies Parsable Rule node's explicit construction function. + + The Parser will call provided `` function to + construct enum's instance when rule's expression matches. + + The function must be defined on the enum type as a static function + accessible from the current Rust scope, it must accept provided set of + Capturing variables, and it must return an instance of this enum type. + + An API user specifies this attribute when the enum's Variant has + non-standard construction mechanism. For example, if the Variant has some + non-capturing fields with complex initialization strategy, or if the Variant + has anonymous fields. + + This attribute must be bound to the Enum Variant already labeled as a + [Parsable Rule](#rule). + + ```rust + #[derive(Node)] + //... + enum MyNode { + // ... + + #[rule($Foo & bar: $Bar)] + #[constructor(new_some_variant(bar))] + SomeVariant(TokenRef, usize), + + // ... + } + + impl MyNode { + fn new_some_variant(bar: TokenRef) -> Self { + Self::SomeVariant(bar, 10) + } + } + ``` + + - ### Synchronization. + + **Format:** `#[syncrhronization]`. + + Specifies a globally unique nested context for the error recovery + synchronization. + + To improve error recovery mechanism it is recommended to some label Regular + Rules that represent nested contexts that could frequently appear around the + code. For example, in Rust a system of nested code blocks is a good + candidate of "synchronization", because the code blocks could be nested, + they frequently appear everywhere in the code, and they have simple pair + of enter and leave contextual tokens("{" and "}"). + + See [Panic Mode](#panic-mode) for details. + + Synchronization Rule must fit the following two requirements: + 1. Expression's leftmost token and the rightmost token are explicitly + and unambiguously defined and distinct to each other. + 2. There are no any other Synchronization rules with the same leftmost + and the rightmost tokens. + + +## Field-level attributes. + +These attributes meant to be bound with the Enum Variants' Named Fields. + +```rust +#[derive(Node)] +enum MyNode { + // ... + + Variant1 { + // Field attributes go here. + field_1: usize, + }, + + // ... +} +``` + + - ### Default. + + **Format:** `#[default()]`. + + Specifies default value of the Variant's custom field. + + When an API user relies on default Node constructor(a constructor that is + overloaded by the [Constructor](#constructor) attribute), it is assumed + that Variant fields must exactly correspond to the Capturing variables. + + However, an API user can specify custom fields too by labeling them with + this attribute. Their values will be set to the `` expression during + the Node constructing. + + ```rust + #[derive(Node)] + //... + enum MyNode { + // ... + + #[rule($Foo & bar: $Bar)] + SomeVariant { + bar: TokenRef, // Will be set to the "bar" Capturing variable. + + #[default(100)] + baz: usize, // Will be set to "100" as defined in the attribute Value. + }, + + // ... + } + ``` + +## Json Syntax Example. + +```rust +#[derive(Node, Clone)] +#[token(JsonToken)] +#[error(SyntaxError)] +#[skip($Whitespace)] +#[define(ANY = Object | Array | True | False | String | Number | Null)] +pub enum JsonNode { + #[root] + #[rule(object: Object)] + Root { object: NodeRef }, + + #[rule($BraceOpen & (entries: Entry)*{$Comma} & $BraceClose)] + #[synchronization] + Object { entries: Vec }, + + #[rule(key: $String & $Colon & value: ANY)] + Entry { key: TokenRef, value: NodeRef }, + + #[rule($BracketOpen & (items: ANY)*{$Comma} & $BracketClose)] + #[synchronization] + Array { items: Vec }, + + #[rule(value: $String)] + String { value: TokenRef }, + + #[rule(value: $Number)] + Number { value: TokenRef }, + + #[rule($True)] + True, + + #[rule($False)] + False, + + #[rule($Null)] + Null, +} +``` \ No newline at end of file diff --git a/work/crates/derive/src/node/regex/encode.rs b/work/crates/derive/src/node/regex/encode.rs new file mode 100644 index 0000000..2d52d1c --- /dev/null +++ b/work/crates/derive/src/node/regex/encode.rs @@ -0,0 +1,151 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use syn::{Error, Result}; + +use crate::{ + node::{ + automata::{ + merge::AutomataMergeCaptures, + scope::Scope, + variables::AutomataVariables, + NodeAutomata, + }, + regex::{operand::RegexOperand, operator::RegexOperator, terminal::Terminal, Regex}, + }, + utils::{AutomataContext, Set, SetImpl}, +}; + +impl Encode for Regex { + #[inline(always)] + fn encode(&self, scope: &mut Scope) -> Result { + match self { + Self::Operand(RegexOperand::Unresolved { .. }) => unreachable!("Unresolved operand."), + + Self::Operand(RegexOperand::Debug { span, inner }) => { + let mut inner = inner.encode(scope)?; + + inner.merge_captures(scope)?; + + let variables = inner.variable_map()?; + + return Err(Error::new( + *span, + format!( + "This expression is a subject for debugging.\n\nCapturing variables \ + are:\n{:#}\nState machine transitions are:\n{:#}", + variables, inner, + ), + )); + } + + Self::Operand(RegexOperand::Token { name, capture }) => { + Ok(scope.terminal(Set::new([Terminal::Token { + name: name.clone(), + capture: capture.clone(), + }]))) + } + + Self::Operand(RegexOperand::Rule { name, capture }) => { + Ok(scope.terminal(Set::new([Terminal::Node { + name: name.clone(), + capture: capture.clone(), + }]))) + } + + Self::Unary { operator, inner } => { + let inner = inner.encode(scope)?; + + match operator { + RegexOperator::OneOrMore { separator: None } => { + let zero_or_more = scope.repeat(inner.clone()); + + Ok(scope.concatenate(inner, zero_or_more)) + } + + RegexOperator::OneOrMore { + separator: Some(separator), + } => { + let separator = separator.encode(scope)?; + + let rest = scope.concatenate(separator, inner.clone()); + let repeat_rest = scope.repeat(rest); + + Ok(scope.concatenate(inner, repeat_rest)) + } + + RegexOperator::ZeroOrMore { separator: None } => Ok(scope.repeat(inner)), + + RegexOperator::ZeroOrMore { + separator: Some(separator), + } => { + let separator = separator.encode(scope)?; + + let rest = scope.concatenate(separator, inner.clone()); + let repeat_rest = scope.repeat(rest); + let one_or_more = scope.concatenate(inner, repeat_rest); + + Ok(scope.optional(one_or_more)) + } + + RegexOperator::Optional => Ok(scope.optional(inner)), + + _ => unreachable!("Unsupported Unary operator."), + } + } + + Self::Binary { + operator, + left, + right, + } => { + let left = left.encode(scope)?; + let right = right.encode(scope)?; + + match operator { + RegexOperator::Union => Ok(scope.union(left, right)), + RegexOperator::Concat => Ok(scope.concatenate(left, right)), + _ => unreachable!("Unsupported Binary operator."), + } + } + } + } +} + +pub(in crate::node) trait Encode { + fn encode(&self, scope: &mut Scope) -> Result; +} diff --git a/work/crates/derive/src/node/regex/inject.rs b/work/crates/derive/src/node/regex/inject.rs new file mode 100644 index 0000000..2018382 --- /dev/null +++ b/work/crates/derive/src/node/regex/inject.rs @@ -0,0 +1,174 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use syn::spanned::Spanned; + +use crate::node::regex::{operand::RegexOperand, operator::RegexOperator, span::SetSpan, Regex}; + +impl Inject for Regex { + fn surround(&mut self, injection: &Self) { + match self { + Self::Operand(RegexOperand::Debug { inner, .. }) => { + inner.surround(injection); + } + + operand @ Self::Operand { .. } => { + let mut injection = injection.clone(); + + if let Self::Operand(operand) = operand { + injection.set_span(operand.span()); + } + + *operand = Self::Binary { + operator: RegexOperator::Concat, + left: Box::new(injection.clone()), + right: Box::new(Self::Binary { + operator: RegexOperator::Concat, + left: Box::new(operand.clone()), + right: Box::new(injection), + }), + }; + } + + Self::Unary { operator, inner } => { + match operator { + RegexOperator::ZeroOrMore { + separator: Some(separator), + } => { + separator.surround(injection); + } + + RegexOperator::OneOrMore { + separator: Some(separator), + } => { + separator.surround(injection); + } + + _ => (), + } + + inner.surround(injection); + } + + Self::Binary { left, right, .. } => { + left.surround(injection); + right.surround(injection); + } + } + } + + fn inject(&mut self, injection: &Self) { + match self { + Self::Operand(RegexOperand::Debug { inner, .. }) => { + inner.inject(injection); + } + + Self::Operand { .. } => (), + + Self::Unary { operator, inner } => { + match operator { + RegexOperator::ZeroOrMore { + separator: Some(separator), + } => { + separator.surround(injection); + } + + RegexOperator::ZeroOrMore { separator } => { + let mut injection = injection.clone(); + + injection.set_span(inner.span()); + + *separator = Some(Box::new(injection)); + } + + RegexOperator::OneOrMore { + separator: Some(separator), + } => { + separator.surround(injection); + } + + RegexOperator::OneOrMore { separator } => { + let mut injection = injection.clone(); + + injection.set_span(inner.span()); + + *separator = Some(Box::new(injection)); + } + + _ => (), + } + + inner.inject(injection); + } + + Self::Binary { + operator, + left, + right, + } => { + left.inject(injection); + right.inject(injection); + + match operator { + RegexOperator::Concat => { + let mut injection = injection.clone(); + + injection.set_span(left.span()); + + *self = Self::Binary { + operator: RegexOperator::Concat, + left: left.clone(), + right: Box::new(Self::Binary { + operator: RegexOperator::Concat, + left: Box::new(injection), + right: right.clone(), + }), + } + } + + _ => (), + } + } + } + } +} + +pub(in crate::node) trait Inject { + fn surround(&mut self, injection: &Self); + + fn inject(&mut self, injection: &Self); +} diff --git a/work/crates/derive/src/node/regex/inline.rs b/work/crates/derive/src/node/regex/inline.rs new file mode 100644 index 0000000..fb24a25 --- /dev/null +++ b/work/crates/derive/src/node/regex/inline.rs @@ -0,0 +1,135 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::mem::take; + +use proc_macro2::Ident; +use syn::{Error, Result}; + +use crate::node::{ + builder::Builder, + regex::{operand::RegexOperand, span::SetSpan, Regex}, +}; + +impl Inline for Regex { + fn inline(&mut self, builder: &Builder) -> Result<()> { + match self { + Self::Operand(RegexOperand::Unresolved { name, capture }) => { + match builder.get_inline(name) { + None => { + *self = Self::Operand(RegexOperand::Rule { + name: name.clone(), + capture: take(capture), + }) + } + + Some(inline) => { + let mut inline = inline.clone(); + + inline.set_span(name.span()); + + if let Some(target) = capture { + inline.capture(target)?; + } + + *self = inline; + } + }; + + Ok(()) + } + + Self::Operand(RegexOperand::Debug { inner, .. }) => inner.inline(builder), + + Self::Operand(RegexOperand::Token { .. }) => Ok(()), + + Self::Operand(RegexOperand::Rule { .. }) => Ok(()), + + Self::Unary { inner, .. } => inner.inline(builder), + + Self::Binary { left, right, .. } => { + left.inline(builder)?; + right.inline(builder)?; + + Ok(()) + } + } + } + + fn capture(&mut self, target: &Ident) -> Result<()> { + match self { + Self::Operand( + RegexOperand::Unresolved { capture, .. } + | RegexOperand::Token { capture, .. } + | RegexOperand::Rule { capture, .. }, + ) => { + if let Some(capture) = capture { + if capture != target { + return Err(Error::new( + target.span(), + format!( + "Capturing variable \"{}\" conflicts with inner capturing variable \"{}\".", + target, capture + ), + )); + } + } + + *capture = Some(target.clone()); + + Ok(()) + } + + Self::Operand(RegexOperand::Debug { inner, .. }) => inner.capture(target), + + Self::Unary { inner, .. } => inner.capture(target), + + Self::Binary { left, right, .. } => { + left.capture(target)?; + right.capture(target)?; + + Ok(()) + } + } + } +} + +pub(in crate::node) trait Inline { + fn inline(&mut self, builder: &Builder) -> Result<()>; + + fn capture(&mut self, target: &Ident) -> Result<()>; +} diff --git a/work/crates/derive/src/node/regex/mod.rs b/work/crates/derive/src/node/regex/mod.rs new file mode 100644 index 0000000..ea4c1bd --- /dev/null +++ b/work/crates/derive/src/node/regex/mod.rs @@ -0,0 +1,51 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{node::regex::operator::RegexOperator, utils::Expression}; + +pub(in crate::node) mod encode; +pub(in crate::node) mod inject; +pub(in crate::node) mod inline; +pub(in crate::node) mod operand; +pub(in crate::node) mod operator; +pub(in crate::node) mod prefix; +pub(in crate::node) mod references; +pub(in crate::node) mod skip; +pub(in crate::node) mod span; +pub(in crate::node) mod terminal; + +pub(in crate::node) type Regex = Expression; diff --git a/work/crates/derive/src/node/regex/operand.rs b/work/crates/derive/src/node/regex/operand.rs new file mode 100644 index 0000000..5dcb2cb --- /dev/null +++ b/work/crates/derive/src/node/regex/operand.rs @@ -0,0 +1,178 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::{Ident, Span}; +use syn::{parse::ParseStream, spanned::Spanned, token::Paren, Result}; + +use crate::{ + node::regex::{inline::Inline, operator::RegexOperator, Regex}, + utils::ExpressionOperand, +}; + +#[derive(Clone)] +pub(in crate::node) enum RegexOperand { + Unresolved { name: Ident, capture: Option }, + Debug { span: Span, inner: Box }, + Token { name: Ident, capture: Option }, + Rule { name: Ident, capture: Option }, +} + +impl Default for RegexOperand { + #[inline(always)] + fn default() -> Self { + Self::Unresolved { + name: Ident::new("_", Span::call_site()), + capture: None, + } + } +} + +impl Spanned for RegexOperand { + #[inline(always)] + fn span(&self) -> Span { + match self { + Self::Unresolved { name, .. } => name.span(), + Self::Debug { span, .. } => *span, + Self::Token { name, .. } => name.span(), + Self::Rule { name, .. } => name.span(), + } + } +} + +impl ExpressionOperand for RegexOperand { + fn parse(input: ParseStream) -> Result { + let lookahead = input.lookahead1(); + + if lookahead.peek(syn::Ident) { + let identifier_a = input.parse::()?; + let identifier_a_string = identifier_a.to_string(); + + if identifier_a_string == "debug" && input.peek(Paren) { + let content; + + parenthesized!(content in input); + + let inner = content.parse::()?; + + if !content.is_empty() { + return Err(content.error("Unexpected expression end.")); + } + + return Ok(Regex::Operand(RegexOperand::Debug { + span: identifier_a.span(), + inner: Box::new(inner), + })); + } + + if input.peek(Token![:]) { + let _ = input.parse::()?; + + let lookahead = input.lookahead1(); + + if input.peek(Token![$]) { + let _ = input.parse::()?; + + let identifier_b = input.parse::()?; + + return Ok(Regex::Operand(RegexOperand::Token { + name: identifier_b, + capture: Some(identifier_a), + })); + } + + if lookahead.peek(syn::Ident) { + let identifier_b = input.parse::()?; + + return Ok(Regex::Operand(RegexOperand::Unresolved { + name: identifier_b, + capture: Some(identifier_a), + })); + } + + if lookahead.peek(syn::token::Paren) { + let content; + + parenthesized!(content in input); + + let mut result = content.parse::()?; + + if !content.is_empty() { + return Err(content.error("Unexpected expression end.")); + } + + result.capture(&identifier_a)?; + + return Ok(result); + } + + return Err(lookahead.error()); + } + + return Ok(Regex::Operand(RegexOperand::Unresolved { + name: identifier_a, + capture: None, + })); + } + + if input.peek(Token![$]) { + let _ = input.parse::()?; + + let identifier = input.parse::()?; + + return Ok(Regex::Operand(RegexOperand::Token { + name: identifier, + capture: None, + })); + } + + if lookahead.peek(syn::token::Paren) { + let content; + + parenthesized!(content in input); + + let result = content.parse::()?; + + if !content.is_empty() { + return Err(content.error("Unexpected expression end.")); + } + + return Ok(result); + } + + Err(lookahead.error()) + } +} diff --git a/work/crates/derive/src/node/regex/operator.rs b/work/crates/derive/src/node/regex/operator.rs new file mode 100644 index 0000000..d4ae474 --- /dev/null +++ b/work/crates/derive/src/node/regex/operator.rs @@ -0,0 +1,128 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use syn::parse::{Lookahead1, ParseStream, Result}; + +use crate::{ + node::regex::{operand::RegexOperand, Regex}, + utils::{Applicability, ExpressionOperator}, +}; + +#[derive(Clone)] +pub(in crate::node) enum RegexOperator { + Union, + Concat, + OneOrMore { separator: Option> }, + ZeroOrMore { separator: Option> }, + Optional, +} + +impl ExpressionOperator for RegexOperator { + type Operand = RegexOperand; + + #[inline] + fn enumerate() -> Vec { + vec![ + Self::Union, + Self::Concat, + Self::OneOrMore { separator: None }, + Self::ZeroOrMore { separator: None }, + Self::Optional, + ] + } + + #[inline(always)] + fn binding_power(&self) -> u8 { + match self { + Self::Union => 10, + Self::Concat => 20, + Self::OneOrMore { .. } => 30, + Self::ZeroOrMore { .. } => 40, + Self::Optional => 50, + } + } + + #[inline] + fn peek(&self, lookahead: &Lookahead1) -> Applicability { + match self { + Self::Union if lookahead.peek(Token![|]) => Applicability::Binary, + Self::Concat if lookahead.peek(Token![&]) => Applicability::Binary, + Self::OneOrMore { .. } if lookahead.peek(Token![+]) => Applicability::Unary, + Self::ZeroOrMore { .. } if lookahead.peek(Token![*]) => Applicability::Unary, + Self::Optional if lookahead.peek(Token![?]) => Applicability::Unary, + + _ => Applicability::Mismatch, + } + } + + #[inline] + fn parse(&mut self, input: ParseStream) -> Result<()> { + match self { + Self::Union => drop(input.parse::()?), + + Self::Concat => drop(input.parse::()?), + + Self::OneOrMore { separator } => { + let _ = input.parse::()?; + + if input.peek(syn::token::Brace) { + let content; + + braced!(content in input); + + *separator = Some(Box::new(content.parse::()?)); + } + } + + Self::ZeroOrMore { separator } => { + let _ = input.parse::()?; + + if input.peek(syn::token::Brace) { + let content; + + braced!(content in input); + + *separator = Some(Box::new(content.parse::()?)); + } + } + + Self::Optional => drop(input.parse::()?), + }; + + Ok(()) + } +} diff --git a/work/crates/derive/src/node/regex/prefix.rs b/work/crates/derive/src/node/regex/prefix.rs new file mode 100644 index 0000000..adc4c56 --- /dev/null +++ b/work/crates/derive/src/node/regex/prefix.rs @@ -0,0 +1,122 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Ident; + +use crate::{ + node::regex::{operand::RegexOperand, operator::RegexOperator, Regex}, + utils::{PredictableCollection, Set, SetImpl}, +}; + +#[derive(Clone, Default)] +pub(in crate::node) struct Leftmost { + tokens: Set, + nodes: Set, +} + +impl Leftmost { + pub(in crate::node) fn append(&mut self, other: Self) { + self.tokens.append(other.tokens); + self.nodes.append(other.nodes); + } + + #[inline(always)] + pub(in crate::node) fn tokens(&self) -> &Set { + &self.tokens + } + + #[inline(always)] + pub(in crate::node) fn nodes(&self) -> &Set { + &self.nodes + } + + #[inline(always)] + fn new_token(token: Ident) -> Self { + Self { + tokens: Set::new([token]), + nodes: Set::empty(), + } + } + + #[inline(always)] + fn new_node(node: Ident) -> Self { + Self { + tokens: Set::empty(), + nodes: Set::new([node]), + } + } +} + +impl RegexPrefix for Regex { + fn leftmost(&self) -> Leftmost { + match self { + Self::Operand(RegexOperand::Unresolved { .. }) => unreachable!("Unresolved operand."), + + Self::Operand(RegexOperand::Debug { inner, .. }) => inner.leftmost(), + + Self::Operand(RegexOperand::Token { name, .. }) => Leftmost::new_token(name.clone()), + + Self::Operand(RegexOperand::Rule { name, .. }) => Leftmost::new_node(name.clone()), + + Self::Unary { inner, .. } => inner.leftmost(), + + Self::Binary { + operator, + left, + right, + } => { + let mut left = left.leftmost(); + + match operator { + RegexOperator::Union => { + left.append(right.leftmost()); + + left + } + + RegexOperator::Concat => left, + + _ => unreachable!("Unsupported Binary operator."), + } + } + } + } +} + +pub(in crate::node) trait RegexPrefix { + fn leftmost(&self) -> Leftmost; +} diff --git a/work/crates/derive/src/node/regex/references.rs b/work/crates/derive/src/node/regex/references.rs new file mode 100644 index 0000000..df70078 --- /dev/null +++ b/work/crates/derive/src/node/regex/references.rs @@ -0,0 +1,158 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Ident; +use syn::{Error, Result}; + +use crate::{ + node::{ + builder::{kind::VariantKind, Builder}, + regex::{operand::RegexOperand, Regex}, + }, + utils::{PredictableCollection, Set, SetImpl}, +}; + +impl CheckReferences for Regex { + fn check_references(&self, context: &VariantKind, builder: &Builder) -> Result> { + use VariantKind::*; + + match self { + Self::Operand(RegexOperand::Unresolved { .. }) => unreachable!("Unresolved operand."), + + Self::Operand(RegexOperand::Debug { inner, .. }) => { + inner.check_references(context, builder) + } + + Self::Operand(RegexOperand::Rule { name, capture }) => { + let reference = match builder.get_variant(name) { + Some(variant) => variant, + + None if capture.is_some() => { + return Err(Error::new( + name.span(), + format!( + "Unresolved reference \"{}\".\nTry to introduce an enum variant \ + with this name.", + name, + ), + )); + } + + _ => { + return Err(Error::new( + name.span(), + format!( + "Unresolved reference \"{}\".\nEither introduce an enum variant \ + with this name, or an inline expression using #[define(...)] \ + attribute on the enum type.", + name, + ), + )); + } + }; + + match (context, reference.kind()) { + (Unspecified(..), _) => unreachable!("Unspecified variant with rule."), + + (Comment(..), _) => { + return Err(Error::new( + name.span(), + format!( + "Reference \"{}\" points to a rule from the comment context. \ + Comments cannot refer other rules.", + name, + ), + )); + } + + (_, Root(..)) => { + return Err(Error::new( + name.span(), + format!( + "Reference \"{}\" points to the root rule. Root rule cannot be \ + referred.", + name, + ), + )); + } + + (_, Comment(..)) => { + return Err(Error::new( + name.span(), + format!( + "Reference \"{}\" points to a comment rule. Comment rule cannot be \ + referred.", + name, + ), + )); + } + + (_, Unspecified(..)) => { + return Err(Error::new( + name.span(), + format!( + "Reference \"{}\" points to an enum variant without associated \ + parsing rule.\nAssociate that variant with parsing rule using \ + #[rule(..)] attribute.", + name, + ), + )); + } + + _ => (), + } + + Ok(Set::new([name.clone()])) + } + + Self::Operand(RegexOperand::Token { .. }) => Ok(Set::empty()), + + Self::Unary { inner, .. } => inner.check_references(context, builder), + + Self::Binary { left, right, .. } => { + let left = left.check_references(context, builder)?; + let right = right.check_references(context, builder)?; + + Ok(left.merge(right)) + } + } + } +} + +pub(in crate::node) trait CheckReferences { + fn check_references(&self, context: &VariantKind, builder: &Builder) -> Result>; +} diff --git a/work/crates/derive/src/node/regex/skip.rs b/work/crates/derive/src/node/regex/skip.rs new file mode 100644 index 0000000..8480a66 --- /dev/null +++ b/work/crates/derive/src/node/regex/skip.rs @@ -0,0 +1,79 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use syn::{Error, Result}; + +use crate::node::regex::{operand::RegexOperand, Regex}; + +impl IsSkipRegex for Regex { + fn is_skip(&self) -> Result<()> { + match self { + Self::Operand(RegexOperand::Unresolved { .. }) => unreachable!("Unresolved operand."), + + Self::Operand(RegexOperand::Debug { inner, .. }) => inner.is_skip(), + + Self::Operand(RegexOperand::Token { + capture: Some(target), + .. + }) => Err(Error::new( + target.span(), + "Capturing is not allowed in the skip expression.", + )), + + Self::Operand(RegexOperand::Token { .. }) => Ok(()), + + Self::Operand(RegexOperand::Rule { name, .. }) => { + return Err(Error::new( + name.span(), + "Rule reference is not allowed in the skip expression.", + )); + } + + Self::Unary { inner, .. } => inner.is_skip(), + + Self::Binary { left, right, .. } => { + left.is_skip()?; + right.is_skip()?; + Ok(()) + } + } + } +} + +pub(in crate::node) trait IsSkipRegex { + fn is_skip(&self) -> Result<()>; +} diff --git a/work/crates/derive/src/node/regex/span.rs b/work/crates/derive/src/node/regex/span.rs new file mode 100644 index 0000000..e8f03ff --- /dev/null +++ b/work/crates/derive/src/node/regex/span.rs @@ -0,0 +1,110 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Span; + +use crate::node::regex::{operand::RegexOperand, Regex}; + +impl SetSpan for Regex { + fn set_span(&mut self, span: Span) { + match self { + Self::Operand(RegexOperand::Unresolved { + name, + capture: None, + }) => { + name.set_span(span); + } + + Self::Operand(RegexOperand::Unresolved { + name, + capture: Some(capture), + }) => { + name.set_span(span); + capture.set_span(span); + } + + Self::Operand(RegexOperand::Debug { inner, .. }) => { + inner.set_span(span); + } + + Self::Operand(RegexOperand::Token { + name, + capture: Some(capture), + .. + }) => { + name.set_span(span); + capture.set_span(span); + } + + Self::Operand(RegexOperand::Token { + name, + capture: None, + .. + }) => { + name.set_span(span); + } + + Self::Operand(RegexOperand::Rule { + name, + capture: Some(capture), + }) => { + name.set_span(span); + capture.set_span(span); + } + + Self::Operand(RegexOperand::Rule { + name, + capture: None, + }) => { + name.set_span(span); + } + + Self::Unary { inner, .. } => { + inner.set_span(span); + } + + Self::Binary { left, right, .. } => { + left.set_span(span); + right.set_span(span); + } + } + } +} + +pub(in crate::node) trait SetSpan { + fn set_span(&mut self, span: Span); +} diff --git a/work/crates/derive/src/node/regex/terminal.rs b/work/crates/derive/src/node/regex/terminal.rs new file mode 100644 index 0000000..86b8e55 --- /dev/null +++ b/work/crates/derive/src/node/regex/terminal.rs @@ -0,0 +1,170 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{ + cmp::Ordering, + fmt::{Display, Formatter}, +}; + +use proc_macro2::{Ident, Span}; +use syn::spanned::Spanned; + +use crate::{node::builder::Builder, utils::AutomataTerminal}; + +#[derive(Clone, Hash, PartialEq, Eq)] +pub(in crate::node) enum Terminal { + Null, + Token { name: Ident, capture: Option }, + Node { name: Ident, capture: Option }, +} + +impl AutomataTerminal for Terminal { + #[inline(always)] + fn null() -> Self { + Self::Null + } + + #[inline(always)] + fn is_null(&self) -> bool { + match self { + Self::Null => true, + _ => false, + } + } +} + +impl Ord for Terminal { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + use Ordering::*; + + match self.order().cmp(&other.order()) { + Less => Less, + Greater => Greater, + Equal => match self.value().cmp(&other.value()) { + Less => Less, + Greater => Greater, + Equal => self.capture().cmp(&other.capture()), + }, + } + } +} + +impl PartialOrd for Terminal { + #[inline(always)] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Display for Terminal { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => formatter.write_str("null"), + + Self::Token { + name, + capture: None, + } => formatter.write_fmt(format_args!("${}", name.to_string())), + + Self::Token { + name, + capture: Some(target), + } => formatter.write_fmt(format_args!( + "{}: ${}", + target.to_string(), + name.to_string() + )), + + Self::Node { + name, + capture: None, + } => formatter.write_fmt(format_args!("{}", name.to_string())), + + Self::Node { + name, + capture: Some(target), + } => formatter.write_fmt(format_args!("{}: {}", target.to_string(), name.to_string())), + } + } +} + +impl Spanned for Terminal { + #[inline(always)] + fn span(&self) -> Span { + self.value().span() + } +} + +impl Terminal { + #[inline(always)] + pub(in crate::node) fn capture(&self) -> Option<&Ident> { + match self { + Self::Null => None, + Self::Token { capture, .. } => capture.as_ref(), + Self::Node { capture, .. } => capture.as_ref(), + } + } + + #[inline(always)] + pub(in crate::node) fn is_skip(&self, builder: &Builder) -> bool { + match self { + Self::Null => false, + Self::Token { name, .. } => builder.skip_leftmost().tokens().contains(name), + Self::Node { name, .. } => builder.skip_leftmost().nodes().contains(name), + } + } + + #[inline(always)] + fn order(&self) -> u8 { + match self { + Self::Null => 0, + Self::Token { capture, .. } => 1 + (capture.is_some() as u8), + Self::Node { capture, .. } => 3 + (capture.is_some() as u8), + } + } + + #[inline(always)] + fn value(&self) -> Option<&Ident> { + match self { + Self::Null => None, + Self::Token { name, .. } => Some(name), + Self::Node { name, .. } => Some(name), + } + } +} diff --git a/work/crates/derive/src/token/characters.rs b/work/crates/derive/src/token/characters.rs new file mode 100644 index 0000000..fdb3959 --- /dev/null +++ b/work/crates/derive/src/token/characters.rs @@ -0,0 +1,200 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Span; +use syn::{ + parse::{Parse, ParseStream}, + punctuated::Punctuated, + spanned::Spanned, + Error, + LitChar, + Result, +}; + +use crate::{ + token::{scope::Scope, terminal::Terminal, NULL}, + utils::{Automata, AutomataContext, PredictableCollection, Set, SetImpl}, +}; + +#[derive(Clone)] +pub(super) struct CharacterSet { + span: Span, + set: Set, +} + +impl Parse for CharacterSet { + fn parse(input: ParseStream) -> Result { + struct Component(Set); + + impl Parse for Component { + fn parse(input: ParseStream) -> Result { + let lookahead = input.lookahead1(); + + if lookahead.peek(syn::LitChar) { + let start_literal = input.parse::()?; + let start_character = start_literal.value(); + + if start_character == NULL { + return Err(Error::new( + start_literal.span(), + "Null characters forbidden.", + )); + } + + if input.peek(Token![..]) { + let span = input.parse::()?.span(); + + let end_literal = input.parse::()?; + let end_character = end_literal.value(); + + if start_character >= end_character { + return Err(Error::new( + span, + "Range start must be lesser than the range end.", + )); + } + + return Ok(Self( + (start_character..=end_character) + .into_iter() + .map(|character| { + if character == NULL { + return Err(Error::new(span, "Null characters forbidden.")); + } + + Ok(character) + }) + .collect::>()?, + )); + } + + return Ok(Self(Set::new([start_character]))); + } + + Err(lookahead.error()) + } + } + + let span = input.span(); + + let set = Punctuated::::parse_terminated(input)? + .into_iter() + .fold(Set::empty(), |accumulator, component| { + accumulator.merge(component.0) + }); + + if set.is_empty() { + return Err(Error::new(span, "Empty character sets are forbidden.")); + } + + Ok(Self { span, set }) + } +} + +impl Default for CharacterSet { + #[inline(always)] + fn default() -> Self { + Self { + span: Span::call_site(), + set: Set::empty(), + } + } +} + +impl Spanned for CharacterSet { + #[inline(always)] + fn span(&self) -> Span { + self.span + } +} + +impl From for CharacterSet { + fn from(literal: LitChar) -> Self { + Self { + span: literal.span(), + set: Set::new([literal.value()]), + } + } +} + +impl CharacterSet { + #[inline(always)] + pub(super) fn merge(self, other: Self) -> Self { + Self { + span: self.span, + set: self.set.merge(other.set), + } + } + + #[inline] + pub(super) fn into_inclusion(self, scope: &mut Scope) -> Automata { + scope.terminal( + self.set + .into_iter() + .map(|character| Terminal::Character(character)) + .collect(), + ) + } + + #[inline] + pub(super) fn into_exclusion(self, scope: &mut Scope) -> Result> { + let mut alphabet = scope.alphabet().clone(); + + for character in self.set { + if !alphabet.set.remove(&character) { + return Err(Error::new( + self.span, + format!( + "An exclusion character '{}' is not included in any of the parsable rules' \ + non-exclusion expression.", + character + ), + )); + } + } + + let alphabet = alphabet.into_inclusion(scope); + let other = scope.other(); + + Ok(scope.union(alphabet, other)) + } + + #[inline(always)] + pub(super) fn into_inner(self) -> Set { + self.set + } +} diff --git a/work/crates/derive/src/token/compiler.rs b/work/crates/derive/src/token/compiler.rs new file mode 100644 index 0000000..4f246c3 --- /dev/null +++ b/work/crates/derive/src/token/compiler.rs @@ -0,0 +1,281 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Ident; +use syn::Generics; + +use crate::{ + token::{ + rule::{RuleIndex, RuleMeta}, + scope::{ScannerState, Scope}, + terminal::Terminal, + transition::Transition, + Token, + NULL, + }, + utils::{Automata, Map, PredictableCollection, Set, SetImpl, State}, +}; + +pub(super) struct Compiler { + scope: Scope, + product_map: Map, + input: Set<(ScannerState, (char, char), ScannerState)>, + names: Map, + pending: Vec, + registered: Set, + result: Vec, +} + +impl Compiler { + pub(super) fn compile( + token_name: Ident, + generics: Generics, + rules: Vec, + mismatch: Ident, + scope: Scope, + automata: Automata, + products: Map, + ) -> Token { + let input = automata + .transitions + .into_iter() + .map(|(from, through, to)| { + let incoming = match through { + Terminal::Null => unreachable!("Automata with null transition."), + Terminal::Product(..) => unreachable!("Unfiltered production terminal."), + Terminal::Character(character) => character, + }; + + let mut alphabet = scope.alphabet().clone().into_inner(); + + alphabet.insert(NULL); + + alphabet + .into_iter() + .map(move |peek| (from, (incoming, peek), to)) + }) + .flatten() + .collect(); + + let mut compiler = Self { + scope, + product_map: Map::empty(), + input, + names: Map::empty(), + pending: vec![automata.start], + registered: Set::empty(), + result: Vec::new(), + }; + + for (state, product) in products { + compiler.insert_product(state, product); + } + + compiler.scope.reset(); + + while compiler.next() {} + + compiler.result.sort(); + + Token { + token_name, + generics, + rules, + mismatch, + transitions: compiler.result, + } + } + + fn next(&mut self) -> bool { + let from = match self.pending.pop() { + None => return false, + Some(state) => state, + }; + + let _ = self.registered.insert(from); + + let outgoing = self.outgoing_view(&from).group(|transition| transition); + + let from = self.name_of(from); + + for (to, transitions) in outgoing { + let product = self.product_map.get(&to).cloned(); + + let to = match self.is_termination(&to) { + true => None, + false => { + if !self.registered.contains(&to) { + self.pending.push(to); + } + + Some(self.name_of(to)) + } + }; + + let group_by_incoming = transitions.group(|lookahead| lookahead); + + let mut group_both = + Vec::<(Set, Set)>::with_capacity(group_by_incoming.len()); + + 'outer: for (incoming, peek) in group_by_incoming { + for (incoming_set, peek_set) in group_both.iter_mut() { + if peek_set == &peek { + let _ = incoming_set.insert(incoming); + continue 'outer; + } + } + + group_both.push((Set::new([incoming]), peek)); + } + + if group_both.is_empty() { + continue; + } + + for (incoming, peek) in group_both { + self.result + .push(Transition::new(from, incoming, peek, to, product)); + } + } + + true + } + + #[inline] + fn name_of(&mut self, original: ScannerState) -> ScannerState { + *self + .names + .entry(original) + .or_insert_with(|| ScannerState::gen_state(&mut self.scope)) + } + + fn insert_product(&mut self, state: ScannerState, product: RuleIndex) { + let outgoing = self.outgoing_view(&state); + + let inner_characters = match self.inner_characters(&state, &outgoing) { + None => { + self.product_map + .entry(state) + .and_modify(|previous| { + if *previous > product { + *previous = product + } + }) + .or_insert(product); + + return; + } + + Some(symbols) => symbols, + }; + + let incoming = self.incoming_view(&state); + + let new_state = ScannerState::gen_state(&mut self.scope); + let _ = self.product_map.insert(new_state, product); + + for (_, (incoming, peek)) in outgoing { + if !inner_characters.contains(&peek) { + let _ = self.input.remove(&(state, (incoming, peek), state)); + let _ = self.input.insert((state, (incoming, peek), new_state)); + } + } + + for (from, (incoming, peek)) in incoming { + if !inner_characters.contains(&peek) { + let _ = self.input.remove(&(from, (incoming, peek), state)); + let _ = self.input.insert((from, (incoming, peek), new_state)); + } + } + } + + #[inline] + fn inner_characters( + &self, + from: &ScannerState, + outgoing: &Set<(ScannerState, (char, char))>, + ) -> Option> { + if outgoing.is_empty() { + return None; + } + + let mut result = Set::with_capacity(outgoing.len()); + + for (to, (incoming, _)) in outgoing { + if to != from { + return None; + } + + let _ = result.insert(*incoming); + } + + Some(result) + } + + #[inline] + fn outgoing_view(&self, state: &ScannerState) -> Set<(ScannerState, (char, char))> { + self.input + .iter() + .filter_map(|(from, through, to)| { + if from != state { + return None; + } + + Some((*to, *through)) + }) + .collect() + } + + #[inline] + fn incoming_view(&self, state: &ScannerState) -> Set<(ScannerState, (char, char))> { + self.input + .iter() + .filter_map(|(from, through, to)| { + if to != state || from == to { + return None; + } + + Some((*from, *through)) + }) + .collect() + } + + #[inline] + fn is_termination(&self, state: &ScannerState) -> bool { + !self.input.iter().any(|(from, _, _)| from == state) + } +} diff --git a/work/crates/derive/src/token/entry.rs b/work/crates/derive/src/token/entry.rs new file mode 100644 index 0000000..7e20bdf --- /dev/null +++ b/work/crates/derive/src/token/entry.rs @@ -0,0 +1,469 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Ident; +use syn::{ + parse::{Parse, ParseStream}, + AttrStyle, + Data, + DeriveInput, + Error, + Generics, + Result, +}; + +use crate::{ + token::{ + characters::CharacterSet, + compiler::Compiler, + regex::{InlineMap, Regex, RegexImpl}, + rule::RuleMeta, + scope::Scope, + terminal::Terminal, + transition::Transition, + variant::TokenVariant, + }, + utils::{ + AutomataContext, + Facade, + Map, + MapImpl, + MultimapImpl, + PredictableCollection, + Set, + SetImpl, + }, +}; + +pub struct Token { + pub(super) token_name: Ident, + pub(super) generics: Generics, + pub(super) rules: Vec, + pub(super) mismatch: Ident, + pub(super) transitions: Vec, +} + +impl Parse for Token { + fn parse(input: ParseStream) -> Result { + let input = input.parse::()?; + + let token_name = input.ident; + let generics = input.generics; + + let data = match input.data { + Data::Enum(data) => data, + + other => { + let span = match other { + Data::Struct(data) => data.struct_token.span, + Data::Union(data) => data.union_token.span, + _ => unimplemented!(), + }; + + return Err(Error::new( + span, + "Token must be derived on the enum type with variants representing \ + language lexis.", + )); + } + }; + + let mut inline_map = InlineMap::empty(); + + for attribute in input.attrs { + match attribute.style { + AttrStyle::Inner(_) => continue, + AttrStyle::Outer => (), + } + + let name = match attribute.path.get_ident() { + None => continue, + Some(name) => name, + }; + + match name.to_string().as_str() { + "define" => { + let (name, mut expression) = + attribute.parse_args_with(|input: ParseStream| { + let name = input.parse::()?; + let _ = input.parse::()?; + let name_string = name.to_string(); + + if inline_map.contains_key(&name_string) { + return Err(Error::new( + name.span(), + "Inline expression with this name already defined.", + )); + } + + Ok((name_string, input.parse::()?)) + })?; + + expression.inline(&inline_map)?; + + if inline_map.insert(name, expression).is_some() { + unreachable!("Inline expression redefined."); + } + } + + _ => continue, + } + } + + let mut mismatch: Option = None; + let mut rules = Vec::with_capacity(data.variants.len()); + + for variant in data.variants.into_iter() { + let variant = TokenVariant::from_variant(variant, rules.len(), &inline_map)?; + + match variant { + TokenVariant::Mismatch { name } => { + if let Some(previous) = &mismatch { + return Err(Error::new( + name.span(), + format!( + "The variant {:?} already labeled as mismatch fallback.\nToken \ + must specify only one mismatch variant.", + previous.to_string(), + ), + )); + } + + mismatch = Some(name); + } + + TokenVariant::Other => (), + + rule @ TokenVariant::Rule { .. } => { + rules.push(rule); + } + } + } + + let mismatch = match mismatch { + Some(mismatch) => mismatch, + + None => { + return Err(Error::new( + token_name.span(), + "One of the variants must be labeled as a mismatch fallback.\nUse \ + #[mismatch] attribute to label such variant.", + )); + } + }; + + let alphabet = rules + .iter() + .fold(None, |accumulator: Option, rule| { + let alphabet = match rule { + TokenVariant::Rule { expression, .. } => expression.alphabet(), + _ => unreachable!("Non-rule variant."), + }; + + Some(match accumulator { + None => alphabet, + Some(accumulator) => accumulator.merge(alphabet), + }) + }) + .ok_or(Error::new( + token_name.span(), + "The enumeration must define at least one variant with definitive rule \ + expression.\nUse #[rule()] attributes to label such variants.", + ))?; + + let mut scope = Scope::new(alphabet); + + let mut automata = rules + .iter() + .try_fold(None, |accumulator, rule| { + let rule_name; + let rule_index; + let rule_expression; + + match rule { + TokenVariant::Rule { + name, + index, + expression, + .. + } => { + rule_name = name; + rule_index = index; + rule_expression = expression; + } + + _ => unreachable!("Non-rule variant."), + } + + let mut automata = rule_expression.encode(&mut scope)?; + + if automata.accepts_null() { + return Err(Error::new( + rule_name.span(), + "Variant's rule expression can match empty string.\nTokens of empty \ + strings not allowed.", + )); + } + + let product = scope.terminal(Set::new([Terminal::Product(*rule_index)])); + + automata = scope.concatenate(automata, product); + + Ok(Some(match accumulator { + None => automata, + Some(accumulator) => scope.union(accumulator, automata), + })) + })? + .expect("Internal error. Empty rule set."); + + loop { + let mut has_changes = false; + + automata.transitions = automata + .transitions + .group(|(from, through, to)| ((from, to), through)) + .try_for_each(|_, symbols| { + let mut products = Map::empty(); + let mut conflict = None; + + symbols.retain(|through| match through { + Terminal::Product(index) => { + let precedence = rules[*index].rule_precedence(); + + if let Some(previous) = products.insert(precedence, *index) { + conflict = Some((*index, previous)); + } + + false + } + + _ => true, + }); + + if let Some((a, b)) = conflict { + let a = rules[a].rule_name(); + let b = rules[b].rule_name(); + + return Err(Error::new( + a.span(), + format!( + "This rule conflicts with {:?} rule. Both rules can match the same \ + substring.\nTo resolve ambiguity try to label these rules with \ + explicit distinct precedences using #[precedence()] \ + attribute.\nDefault precedence is 1. Rules with higher precedence \ + value have priority over the rules with lower precedence value.", + b.to_string(), + ), + )); + } + + if products.len() > 1 { + has_changes = true; + } + + let product = products.iter().max_by_key(|(precedence, _)| *precedence); + + if let Some((_, index)) = product { + assert!( + symbols.insert(Terminal::Product(*index)), + "Internal error. Duplicate production terminal.", + ); + } + + Ok(()) + })? + .join(|(from, to), through| (from, through, to)); + + if !has_changes { + break; + } + + automata.canonicalize(&mut scope); + } + + let mut products = Map::empty(); + let mut matched_products = Set::empty(); + + automata + .transitions + .retain(|(from, through, _)| match through { + Terminal::Null => unreachable!("Automata with null transition."), + Terminal::Character(..) => true, + Terminal::Product(index) => { + assert!( + products.insert(*from, *index).is_none(), + "Internal error. Unresolved ambiguity.", + ); + + let _ = matched_products.insert(*index); + + false + } + }); + + for (index, rule) in rules.iter().enumerate() { + match rule { + TokenVariant::Rule { name, .. } if !matched_products.contains(&index) => { + return Err(Error::new( + name.span(), + format!( + "Rule {:?} is overlapping by other rules due to a low precedence. This \ + rule never matches.\nTry to increase rule's precedence by labeling \ + it with explicit precedence specification using \ + #[precedence()] attribute.\nDefault precedence is 1. Rules \ + with higher precedence value have priority over the rules with lower \ + precedence value.", + name.to_string(), + ), + )); + } + + _ => (), + } + } + + let rules = rules.into_iter().map(RuleMeta::from).collect(); + + Ok(Compiler::compile( + token_name, generics, rules, mismatch, scope, automata, products, + )) + } +} + +impl From for proc_macro::TokenStream { + fn from(mut input: Token) -> Self { + let facade = Facade::new(); + let core = facade.core_crate(); + + let token_name = input.token_name; + let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); + + let transitions = input + .transitions + .iter() + .map(|transition| transition.output(&facade, &mut input.rules)) + .collect::>(); + + let start = 1usize; + let mismatch = &input.mismatch; + + let mut token_in_use = false; + let mut kind_in_use = false; + + for rule in &input.rules { + token_in_use = token_in_use || rule.uses_token_variable(); + kind_in_use = kind_in_use || rule.uses_kind_variable(); + } + + let token_init = match token_in_use { + false => None, + true => Some(quote! { + let mut token = Self::#mismatch; + }), + }; + + let kind_init = match kind_in_use { + false => None, + true => Some(quote! { + let mut kind = 0; + }), + }; + + let result = match kind_in_use { + false => { + if token_in_use { + quote! { token } + } else { + quote! { Self::#mismatch } + } + } + + true => { + let variants = input.rules.into_iter().map(|rule| { + let index = rule.index(); + let in_place = rule.output_in_place(&facade); + + quote! { #index => #in_place } + }); + + if token_in_use { + quote! { + match kind { + #( #variants, )* + _ => token, + } + } + } else { + quote! { + match kind { + #( #variants, )* + _ => Self::#mismatch, + } + } + } + } + }; + + let output = quote! { + impl #impl_generics #core::lexis::Token for #token_name #ty_generics + #where_clause + { + fn new(session: &mut impl #core::lexis::LexisSession) -> Self { + #[allow(unused_mut)] + let mut state = #start; + #token_init; + #kind_init; + + loop { + let current = #core::lexis::LexisSession::character(session); + #core::lexis::LexisSession::advance(session); + let next = #core::lexis::LexisSession::character(session); + + match (state, current, next) { + #( #transitions )* + _ => break, + } + } + + #result + } + } + }; + + output.into() + } +} diff --git a/work/crates/derive/src/token/mod.rs b/work/crates/derive/src/token/mod.rs new file mode 100644 index 0000000..eda1767 --- /dev/null +++ b/work/crates/derive/src/token/mod.rs @@ -0,0 +1,50 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +mod characters; +mod compiler; +mod entry; +mod regex; +mod rule; +mod scope; +mod terminal; +mod transition; +mod variant; + +const NULL: char = '\0'; + +pub use crate::token::entry::Token; diff --git a/work/crates/derive/src/token/readme.md b/work/crates/derive/src/token/readme.md new file mode 100644 index 0000000..71e2ca2 --- /dev/null +++ b/work/crates/derive/src/token/readme.md @@ -0,0 +1,284 @@ +A derive macro of the Token trait to construct Lexical Scanner using a set of +regular expression. + +This macro implements a [Token](::lady_deirdre::lexis::Token) trait for the +Rust enum type. + +An API user specifies Token parse rules directly on enum variants through the +macro attributes. The macro analyses these rules validity in compile-time and +constructs run-time optimized +[`Finite State Automaton`](https://en.wikipedia.org/wiki/Finite-state_machine) +scanner of arbitrary Unicode strings as a Token trait implementation. + +In case of invalid definitions or misuse the macro throws descriptive +compile-time errors to the macro programmer. + +## Regular Expressions Specification + +Regular Expression language is any combination of the following sentences that +fully recognizes a sequence of Unicode characters. + +| Sentence | Example | Description | +|:-------------------------|:---------------------------|:-----------------------------------------------------------------------------| +| Character Match. | 'a' | Matches a single character. | +| Character Set Match. | ['A'..'Z', '0'..'9', '_'] | Matches any single character within specified range(s). | +| Inverse Match. | ^['A'..'Z', '0'..'9', '_'] | Matches any[^1] single character except the one from specified range(s). | +| Inline. | FOO | Matches [Inline Expression](#inline-expression). | +| String Match. | "foo" | Matches specified set of characters in specified order. | +| Group. | ('a' & 'b') | In a pattern "(A)", sentence A matches. | +| Sequence Match. | "foo" & "bar" | In a pattern "A & B", sentence A matches, and then sentence B matches. | +| Choice Match. | "foo" | "bar" | In a pattern "A | B", either sentence A matches, or sentence B matches. | +| Zero or More Repetition. | ['A'..'Z']* | In a pattern "A*", sentence A matches zero or more times. | +| One or More Repetition. | ['A'..'Z']+ | In a pattern "A+", sentence A matches one or more times. | +| Optional Match. | "foo"? | In a pattern "A?", sentence A fully matches or does not match at all. | + +For Binary operators such as Sequence Match(&) and Choice Match(|) +the Sequence Match have priority over the Choice Match. +Unary operators(*, +, ?) have priorities over the binary operators. And the +Group operator prioritizes anything inside the parenthesis. + +E.g. `'a' & 'b' | ('c' | 'd')+ & 'e'` expression matches either a string "ab", +or a string that starts with repetitions of the 'c' and 'd' characters and that +ends with character 'e'. + +[^1]: Note that the Inverse Match sentence matches through the set of characters +explicitly mentioned in the entire set of the Parsable rules expressions that +define the entire scanning "Alphabet". In principle the macro cannot set +"Alphabet" to be the full Unicode set. + +## Type-level attributes. + +These attributes meant to be bound with the Enum type. + +```rust +#[derive(Token)] +// Type-level attributes go here. +enum MyToken { + // .. +} +``` + + - ### Inline Expression. + + **Format:** `#[define( = )]`. + + Defines a named inline expression. These expressions could be further + referred inside other regular expressions by `` (including Variant + Rules and other Inline Expressions). + + The macro interprets such references as direct inlines of the + ``. + + Inline expression must be defined before use. As such, inline expression + cannot define direct or indirect recursion. + + Inline expression is a mechanism to reuse of frequently repeated fragments + of regular expression by name. + + This attribute is optional. + + ```rust + #[derive(Token)] + #[define(POSITIVE_DIGIT = ['1'..'9'])] + #[define(DIGIT = '0' | POSITIVE_DIGIT)] // Referring POSITIVE_DIGIT. + enum MyToken { + #[rule(POSITIVE_DIGIT & DIGIT*)] // Referring POSITIVE_DIGIT and DIGIT. + Number, + + // ... + + #[mismatch] + Mismatch, + } + ``` + +## Variant-level attributes + +These attributes meant to be bound with the Enum Variants. + +```rust +#[derive(Token)] +enum MyToken { + // Variant attributes go here. + Variant1, + + // Variant attributes go here. + Variant2, + + // ... +} +``` + + - ### Rule. + + **Format:** `#[rule()]`. + + Defines Parsable token variant. + + This attribute must be bound to all Parsable variants of underlying enum + type except the [Mismatch](#mismatch) variant. + + An API user must define at least one Parable variant per enum type. + + All Parsable variants must not conflict to each other. Two variants + considered to be conflicting if one of them could parse a string that + would be a substring of another variant's parsable string. See + [Precedence](#precedence) attribute for conflict resolution details. + + The `` must not parse empty strings. + + ```rust + #[derive(Token)] + enum MyToken { + #[rule(['a'..'z']+)] + Identifier, + + #[mismatch] + Mismatch, + } + ``` + + - ### Precedence. + + **Format:** `#[precedence()]`. + + Establishes execution priority between two conflicting Parsable token + variant. + + Two Token variants considered to be conflicting if one of them could parse a + string that would be a substring of another variant's parsable string. Such + conflicts required to be resolved explicitly using this attribute. + + If one Parsable token has higher `numeric precedence` over another one, + the first one would always shadow the second one. + + The default precedence is 1. This attribute is optional, and is not + applicable to non-parsable Variants(the Variant must be labeled with the + `#[rule(...)]` attribute too). + + For example, an arbitrary alphabetical identifier would conflict with the + programming language's alphabetical reserved words. + + ```rust + #[derive(Token)] + enum MyToken { + #[rule(['a'..'z']+)] + Identifier, + + // "keyword" string could be recognized as an Identifier too, so we have + // to raise it's precedence explicitly. + // + // Note, however, that raising an Identifier's precedence instead would + // lead to a compile-time error, because in this case the "keyword" + // string will never match as a Keyword. It would always be recognizable + // as an Identifier. + #[rule(["keyword"])] + #[precedence(2)] + Keyword, + + #[mismatch] + Mismatch, + } + ``` + + - ### Mismatch. + + **Format:** `#[mismatch]`. + + One and only one non-Parsable enum variant must be labeled with this + attribute. This attribute is required. + + All strings that cannot be recognized by any other Parsable variants will be + sinked into this token variant. + + Such tokens considered to be lexically valid tokens, however they could be + recognized as syntactically incorrect on the Syntax Parsing stage. + + - ### Constructor. + + **Format:** `#[constructor()]`. + + Specifies Parsable enum variant construction function. + + The Scanner will call provided `` function to + construct enum's instance when the variant's rule matches. + + The function must be defined on the enum type as a static function + accessible from the current Rust scope, it must accept `&str` string type of + recognized string, and it must return an instance of this enum type. + + If a Parsable enum variant has a body, this attribute must be specified + explicitly, otherwise this attribute is optional. The attribute is + applicable to Parsable variants only(the Variant must be labeled with the + `#[rule(...)]` attribute too). + + ```rust + #[derive(Token)] + enum MyToken { + #[rule(['1'..'9'] & ['0'..'9']* | '0')] + #[constructor(parse_num)] + Num(usize), + + #[mismatch] + Mismatch, + } + + impl MyToken { + fn parse_num(input: &str) -> Self { + Self::Num(input.parse().unwrap()) + } + } + ``` + +## Json Lexis Example. + +```rust +#[derive(Token)] +#[define(DEC = ['0'..'9'])] +#[define(HEX = DEC | ['A'..'F'])] +#[define(POSITIVE = ['1'..'9'] & DEC*)] +#[define(ESCAPE = '\\' & ( + ['"', '\\', '/', 'b', 'f', 'n', 'r', 't'] + | ('u' & HEX & HEX & HEX & HEX) +))] +enum JsonToken { + #[rule("true")] + True, + + #[rule("false")] + False, + + #[rule("null")] + Null, + + #[rule('{')] + BraceOpen, + + #[rule('}')] + BraceClose, + + #[rule('[')] + BracketOpen, + + #[rule(']')] + BracketClose, + + #[rule(',')] + Comma, + + #[rule(':')] + Colon, + + #[rule('"' & (ESCAPE | ^['"', '\\'])* & '"')] + String, + + #[rule('-'? & ('0' | POSITIVE) & ('.' & DEC+)? & (['e', 'E'] & ['-', '+']? & DEC+)?)] + Number, + + #[rule([' ', '\t', '\n', '\x0c', '\r']+)] + Whitespace, + + #[mismatch] + Mismatch, +} +``` diff --git a/work/crates/derive/src/token/regex.rs b/work/crates/derive/src/token/regex.rs new file mode 100644 index 0000000..7aac214 --- /dev/null +++ b/work/crates/derive/src/token/regex.rs @@ -0,0 +1,368 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::{Ident, Span}; +use syn::{ + parse::{Lookahead1, ParseStream}, + spanned::Spanned, + token::Paren, + Error, + LitChar, + LitStr, + Result, +}; + +use crate::{ + token::{characters::CharacterSet, scope::Scope, NULL}, + utils::{ + Applicability, + Automata, + AutomataContext, + Expression, + ExpressionOperand, + ExpressionOperator, + Map, + }, +}; + +pub(super) type Regex = Expression; + +impl RegexImpl for Regex { + fn inline(&mut self, inline_map: &InlineMap) -> Result<()> { + match self { + Self::Operand(Operand::Inline { name }) => { + match inline_map.get(&name.to_string()) { + None => { + return Err(Error::new( + name.span(), + "Unknown inline expression.\nEach inline expression name is \ + case-sensitive and should be defined before use.\nTo define an inline \ + expression use #[define(name = )] attribute on the derived \ + type.", + )); + } + + Some(inline) => { + *self = inline.clone(); + } + }; + } + + Self::Operand(Operand::Debug { inner, .. }) => { + inner.inline(inline_map)?; + } + + Self::Unary { inner, .. } => inner.inline(inline_map)?, + + Self::Binary { left, right, .. } => { + left.inline(inline_map)?; + right.inline(inline_map)?; + } + + _ => (), + } + + Ok(()) + } + + fn alphabet(&self) -> CharacterSet { + match self { + Self::Operand(Operand::Inclusion { character_set }) => character_set.clone(), + Self::Operand(Operand::Debug { inner, .. }) => inner.alphabet(), + Self::Binary { left, right, .. } => left.alphabet().merge(right.alphabet()), + Self::Unary { inner, .. } => inner.alphabet(), + _ => CharacterSet::default(), + } + } + + fn encode(&self, scope: &mut Scope) -> Result> { + Ok(match self { + Self::Operand(Operand::Any) => scope.any(), + + Self::Operand(Operand::Inline { .. }) => unreachable!("Unresolved inline."), + + Self::Operand(Operand::Debug { span, inner }) => { + let inner = inner.encode(scope)?; + + return Err(Error::new( + *span, + format!( + "This expression is a subject for debugging.\n\nState machine transitions \ + are:\n{:#}", + inner, + ), + )); + } + + Self::Operand(Operand::Inclusion { character_set }) => { + character_set.clone().into_inclusion(scope) + } + + Self::Operand(Operand::Exclusion { character_set }) => { + character_set.clone().into_exclusion(scope)? + } + + Self::Binary { + operator: Operator::Concat, + left, + right, + } => { + let left = left.encode(scope)?; + let right = right.encode(scope)?; + + scope.concatenate(left, right) + } + + Self::Binary { + operator: Operator::Union, + left, + right, + } => { + let left = left.encode(scope)?; + let right = right.encode(scope)?; + + scope.union(left, right) + } + + Self::Unary { + operator: Operator::ZeroOrMore, + inner, + } => { + let inner = inner.encode(scope)?; + + scope.repeat(inner) + } + + Self::Unary { + operator: Operator::OneOrMore, + inner, + } => { + let inner = inner.encode(scope)?; + + let left = inner.clone(); + let right = scope.repeat(inner); + + scope.concatenate(left, right) + } + + Self::Unary { + operator: Operator::Optional, + inner, + } => { + let inner = inner.encode(scope)?; + + scope.optional(inner) + } + + _ => unreachable!("Unsupported operation."), + }) + } +} + +pub(super) trait RegexImpl { + fn inline(&mut self, inline_map: &InlineMap) -> Result<()>; + + fn alphabet(&self) -> CharacterSet; + + fn encode(&self, scope: &mut Scope) -> Result>; +} + +pub(super) type InlineMap = Map; + +#[derive(Clone)] +pub(super) enum Operand { + Any, + Inline { name: Ident }, + Debug { span: Span, inner: Box }, + Inclusion { character_set: CharacterSet }, + Exclusion { character_set: CharacterSet }, +} + +impl ExpressionOperand for Operand { + fn parse(input: ParseStream) -> Result { + let lookahead = input.lookahead1(); + + if lookahead.peek(syn::LitChar) { + let literal = input.parse::()?; + + if literal.value() == NULL { + return Err(Error::new(literal.span(), "Null characters forbidden.")); + } + + return Ok(Expression::Operand(Operand::Inclusion { + character_set: CharacterSet::from(literal), + })); + } + + if lookahead.peek(syn::LitStr) { + let literal = input.parse::()?; + let string = literal.value(); + + return string + .chars() + .try_fold(None, |accumulator, character| { + if character == NULL { + return Err(Error::new(literal.span(), "Null characters forbidden.")); + } + + let right = Expression::Operand(Operand::Inclusion { + character_set: CharacterSet::from(LitChar::new(character, literal.span())), + }); + + Ok(Some(match accumulator { + None => right, + Some(left) => Expression::Binary { + operator: Operator::Concat, + left: Box::new(left), + right: Box::new(right), + }, + })) + })? + .ok_or_else(|| Error::new(literal.span(), "Empty strings are forbidden.")); + } + + if lookahead.peek(Token![.]) { + let _ = input.parse::()?; + + return Ok(Expression::Operand(Operand::Any)); + } + + if lookahead.peek(syn::token::Bracket) { + let content; + + bracketed!(content in input); + + let character_set = content.parse::()?; + + return Ok(Expression::Operand(Operand::Inclusion { character_set })); + } + + if lookahead.peek(Token![^]) { + let _ = input.parse::()?; + + let content; + + bracketed!(content in input); + + let character_set = content.parse::()?; + + return Ok(Expression::Operand(Operand::Exclusion { character_set })); + } + + if lookahead.peek(syn::Ident) { + let identifier = input.parse::()?; + + if identifier.to_string() == "debug" && input.peek(Paren) { + let content; + + parenthesized!(content in input); + + return Ok(Expression::Operand(Operand::Debug { + span: identifier.span(), + inner: Box::new(content.parse::()?), + })); + } + + return Ok(Expression::Operand(Operand::Inline { name: identifier })); + } + + if lookahead.peek(syn::token::Paren) { + let content; + + parenthesized!(content in input); + + return content.parse::(); + } + + Err(lookahead.error()) + } +} + +#[derive(Clone, Copy)] +pub(super) enum Operator { + Union = 10, + Concat = 20, + OneOrMore = 30, + ZeroOrMore = 40, + Optional = 50, +} + +impl ExpressionOperator for Operator { + type Operand = Operand; + + #[inline] + fn enumerate() -> Vec { + vec![ + Self::Union, + Self::Concat, + Self::OneOrMore, + Self::ZeroOrMore, + Self::Optional, + ] + } + + #[inline(always)] + fn binding_power(&self) -> u8 { + *self as u8 + } + + #[inline] + fn peek(&self, lookahead: &Lookahead1) -> Applicability { + match self { + Self::Union if lookahead.peek(Token![|]) => Applicability::Binary, + Self::Concat if lookahead.peek(Token![&]) => Applicability::Binary, + Self::OneOrMore if lookahead.peek(Token![+]) => Applicability::Unary, + Self::ZeroOrMore if lookahead.peek(Token![*]) => Applicability::Unary, + Self::Optional if lookahead.peek(Token![?]) => Applicability::Unary, + + _ => Applicability::Mismatch, + } + } + + #[inline] + fn parse(&mut self, input: ParseStream) -> Result<()> { + match self { + Self::Union => drop(input.parse::()?), + Self::Concat => drop(input.parse::()?), + Self::OneOrMore => drop(input.parse::()?), + Self::ZeroOrMore => drop(input.parse::()?), + Self::Optional => drop(input.parse::()?), + }; + + Ok(()) + } +} diff --git a/work/crates/derive/src/token/rule.rs b/work/crates/derive/src/token/rule.rs new file mode 100644 index 0000000..40411e7 --- /dev/null +++ b/work/crates/derive/src/token/rule.rs @@ -0,0 +1,134 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::{Ident, TokenStream}; + +use crate::{token::variant::TokenVariant, utils::Facade}; + +pub(super) type RuleIndex = usize; +pub(super) type RulePrecedence = usize; + +pub(super) struct RuleMeta { + name: Ident, + index: RuleIndex, + derive_in_use: bool, + constructor: Option, +} + +impl From for RuleMeta { + #[inline] + fn from(variant: TokenVariant) -> Self { + match variant { + TokenVariant::Rule { + name, + index, + constructor, + .. + } => Self { + name, + index, + derive_in_use: false, + constructor, + }, + + _ => unreachable!("Non-rule variant."), + } + } +} + +impl RuleMeta { + #[inline] + pub(super) fn index(&self) -> &RuleIndex { + &self.index + } + + #[inline] + pub(super) fn uses_token_variable(&self) -> bool { + self.derive_in_use && self.constructor.is_none() + } + + #[inline] + pub(super) fn uses_kind_variable(&self) -> bool { + self.derive_in_use && self.constructor.is_some() + } + + #[inline] + pub(super) fn output_in_place(&self, facade: &Facade) -> TokenStream { + match &self.constructor { + None => { + let name = &self.name; + + quote! { + Self::#name + } + } + + Some(constructor) => { + let core = facade.core_crate(); + + let span = constructor.span(); + + quote_spanned! {span=> + Self::#constructor(#core::lexis::LexisSession::substring(session)) + } + } + } + } + + #[inline] + pub(super) fn output_derive(&mut self) -> TokenStream { + self.derive_in_use = true; + + match &self.constructor { + None => { + let name = &self.name; + + quote! { + token = Self::#name + } + } + + Some(..) => { + let index = &self.index; + + quote! { + kind = #index + } + } + } + } +} diff --git a/work/crates/derive/src/token/scope.rs b/work/crates/derive/src/token/scope.rs new file mode 100644 index 0000000..6e0944c --- /dev/null +++ b/work/crates/derive/src/token/scope.rs @@ -0,0 +1,98 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::ops::RangeFrom; + +use crate::{ + token::{characters::CharacterSet, terminal::Terminal, NULL}, + utils::{Automata, AutomataContext, Set, SetImpl, State}, +}; + +pub(super) struct Scope { + alphabet: CharacterSet, + generator: RangeFrom, +} + +impl AutomataContext for Scope { + type State = ScannerState; + type Terminal = Terminal; +} + +impl Scope { + #[inline(always)] + pub(super) fn new(alphabet: CharacterSet) -> Self { + Self { + alphabet, + generator: 1.., + } + } + + #[inline(always)] + pub(super) fn alphabet(&self) -> &CharacterSet { + &self.alphabet + } + + #[inline] + pub(super) fn any(&mut self) -> Automata { + let alphabet = self.alphabet.clone().into_inclusion(self); + let other = self.other(); + + self.union(alphabet, other) + } + + #[inline] + pub(super) fn other(&mut self) -> Automata { + self.terminal(Set::new([Terminal::Character(NULL)])) + } + + #[inline(always)] + pub(super) fn reset(&mut self) { + self.generator = 1..; + } +} + +pub(super) type ScannerState = usize; + +impl State for ScannerState { + #[inline(always)] + fn gen_state(context: &mut Scope) -> Self { + context + .generator + .next() + .expect("Internal error. State generator exceeded.") + } +} diff --git a/work/crates/derive/src/token/terminal.rs b/work/crates/derive/src/token/terminal.rs new file mode 100644 index 0000000..705fcb5 --- /dev/null +++ b/work/crates/derive/src/token/terminal.rs @@ -0,0 +1,109 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{ + cmp::Ordering, + fmt::{Display, Formatter}, +}; + +use crate::{ + token::{rule::RuleIndex, NULL}, + utils::AutomataTerminal, +}; + +#[derive(Clone, PartialEq, Eq, Hash)] +pub(super) enum Terminal { + Null, + Character(char), + Product(RuleIndex), +} + +impl AutomataTerminal for Terminal { + #[inline(always)] + fn null() -> Self { + Self::Null + } + + #[inline(always)] + fn is_null(&self) -> bool { + match self { + Self::Null => true, + _ => false, + } + } +} + +impl Ord for Terminal { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + match (self, other) { + (Self::Null, Self::Null) => Ordering::Equal, + (Self::Null, _) => Ordering::Less, + (_, Self::Null) => Ordering::Greater, + (Self::Character(..), Self::Product(..)) => Ordering::Less, + (Self::Product(..), Self::Character(..)) => Ordering::Greater, + (Self::Character(a), Self::Character(b)) => a.cmp(b), + (Self::Product(a), Self::Product(b)) => a.cmp(b), + } + } +} + +impl PartialOrd for Terminal { + #[inline(always)] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Display for Terminal { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => formatter.write_str("null"), + + Self::Character(character) => { + if character == &NULL { + return formatter.write_str("_"); + } + + formatter.write_fmt(format_args!("{:?}", character)) + } + + Self::Product(index) => formatter.write_fmt(format_args!("P({})", index)), + } + } +} diff --git a/work/crates/derive/src/token/transition.rs b/work/crates/derive/src/token/transition.rs new file mode 100644 index 0000000..b6fb4d5 --- /dev/null +++ b/work/crates/derive/src/token/transition.rs @@ -0,0 +1,317 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{cmp::Ordering, ops::RangeInclusive}; + +use proc_macro2::{Span, TokenStream}; +use syn::LitChar; + +use crate::{ + token::{ + rule::{RuleIndex, RuleMeta}, + scope::ScannerState, + NULL, + }, + utils::{Facade, Set}, +}; + +#[derive(PartialEq, Eq, PartialOrd)] +pub(super) struct Transition { + from: ScannerState, + incoming: Group, + peek: Group, + to: Option, + product: Option, +} + +impl Ord for Transition { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + if self.from < other.from { + return Ordering::Less; + } + + if self.from > other.from { + return Ordering::Greater; + } + + match self.incoming.cmp(&other.incoming) { + Ordering::Less => Ordering::Less, + Ordering::Greater => Ordering::Greater, + Ordering::Equal => self.peek.cmp(&other.peek), + } + } +} + +impl Transition { + #[inline(always)] + pub(super) fn new( + from: ScannerState, + incoming: Set, + peek: Set, + to: Option, + product: Option, + ) -> Self { + Self { + from, + incoming: incoming.into(), + peek: peek.into(), + to, + product, + } + } + + pub(super) fn output(&self, facade: &Facade, rules: &mut Vec) -> TokenStream { + let core = facade.core_crate(); + + let from = &self.from; + + let pattern = match (self.incoming.is_placeholder(), self.peek.is_placeholder()) { + (true, true) => { + quote! { + (#from, current, _) if current != '\0' + } + } + + (false, true) => { + let incoming = self.incoming.output(); + + quote! { + (#from, #incoming, _) + } + } + + (true, false) => { + let peek = self.peek.output(); + + quote! { + (#from, #peek, _) + } + } + + (false, false) => { + let incoming = self.incoming.output(); + let peek = self.peek.output(); + + quote! { + (#from, #incoming, #peek) + } + } + }; + + let to = match (&self.to, &self.product) { + (None, None) => unreachable!("Dead state."), + + (None, Some(index)) => { + let in_place = rules[*index].output_in_place(facade); + + quote! { + { + #core::lexis::LexisSession::submit(session); + return #in_place; + } + } + } + + (Some(state), None) => { + if state == from { + quote! { (), } + } else { + quote! { + state = #state, + } + } + } + + (Some(state), Some(index)) => { + let core = facade.core_crate(); + let derived = rules[*index].output_derive(); + + if state == from { + quote! { + { + #core::lexis::LexisSession::submit(session); + #derived; + } + } + } else { + quote! { + { + #core::lexis::LexisSession::submit(session); + #derived; + state = #state; + } + } + } + } + }; + + quote! { + #pattern => #to + } + } +} + +#[derive(PartialEq, Eq)] +enum Group { + Placeholder, + Subgroups { + sequential: Vec, + grouped: Vec, + }, +} + +impl PartialOrd for Group { + #[inline(always)] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Group { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + match (self, other) { + (Self::Placeholder, Self::Placeholder) => Ordering::Equal, + (Self::Placeholder, _) => Ordering::Greater, + (_, Self::Placeholder) => Ordering::Less, + (Self::Subgroups { sequential: a, .. }, Self::Subgroups { sequential: b, .. }) => { + a.cmp(b) + } + } + } +} + +impl Group { + #[inline(always)] + fn is_placeholder(&self) -> bool { + match self { + Self::Placeholder => true, + _ => false, + } + } + + fn output(&self) -> TokenStream { + match self { + Self::Placeholder => unreachable!("An attempt to output placeholder"), + Self::Subgroups { grouped, .. } => { + let grouped = grouped.iter().map(Subgroup::output); + quote! { #( #grouped )|* } + } + } + } +} + +#[derive(Debug, PartialEq, Eq)] +enum Subgroup { + Single(char), + Range(RangeInclusive), +} + +impl From> for Group { + fn from(set: Set) -> Self { + if set.contains(&NULL) { + return Self::Placeholder; + } + + let mut sequential = set.into_iter().collect::>(); + + sequential.sort(); + + let grouped = sequential + .iter() + .fold(None, |accumulator, character| match accumulator { + None => Some(vec![Subgroup::Single(*character)]), + Some(mut grouped) => { + let last = grouped + .pop() + .expect("Internal error. Empty subgroup sequence."); + + match last { + Subgroup::Single(single) => { + if single as u32 + 1 == *character as u32 { + grouped.push(Subgroup::Range(single..=*character)) + } else { + grouped.push(Subgroup::Single(single)); + grouped.push(Subgroup::Single(*character)); + } + } + + Subgroup::Range(range) => { + if *range.end() as u32 + 1 == *character as u32 { + grouped.push(Subgroup::Range(*range.start()..=*character)) + } else { + grouped.push(Subgroup::Range(range)); + grouped.push(Subgroup::Single(*character)); + } + } + } + + Some(grouped) + } + }) + .expect("Internal error. Empty character set."); + + Self::Subgroups { + sequential, + grouped, + } + } +} + +impl Subgroup { + fn output(&self) -> TokenStream { + match self { + Self::Single(character) => { + let literal = LitChar::new(*character, Span::call_site()); + + quote! { + #literal + } + } + + Self::Range(range) => { + let start = LitChar::new(*range.start(), Span::call_site()); + let end = LitChar::new(*range.end(), Span::call_site()); + + quote! { + #start..=#end + } + } + } + } +} diff --git a/work/crates/derive/src/token/variant.rs b/work/crates/derive/src/token/variant.rs new file mode 100644 index 0000000..3f0ce59 --- /dev/null +++ b/work/crates/derive/src/token/variant.rs @@ -0,0 +1,246 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Ident; +use syn::{spanned::Spanned, AttrStyle, Error, ExprLit, Lit, Result, Variant}; + +use crate::token::{ + regex::{InlineMap, Regex, RegexImpl}, + rule::{RuleIndex, RulePrecedence}, +}; + +pub(super) enum TokenVariant { + Rule { + name: Ident, + index: RuleIndex, + precedence: Option, + constructor: Option, + expression: Regex, + }, + Mismatch { + name: Ident, + }, + Other, +} + +impl TokenVariant { + pub(super) fn from_variant( + variant: Variant, + index: RuleIndex, + inline_map: &InlineMap, + ) -> Result { + let name = variant.ident; + let trivial = variant.fields.is_empty(); + + let mut precedence = None; + let mut constructor = None; + let mut mismatch = false; + let mut expression = None; + + for attribute in variant.attrs { + match attribute.style { + AttrStyle::Inner(_) => continue, + AttrStyle::Outer => (), + } + + let name = match attribute.path.get_ident() { + None => continue, + Some(name) => name, + }; + + match name.to_string().as_str() { + "precedence" => { + if precedence.is_some() { + return Err(Error::new(name.span(), "Duplicate Precedence attribute.")); + } + + if mismatch { + return Err(Error::new( + name.span(), + "Mismatch rules cannot have precedence.", + )); + } + + let expression = attribute.parse_args::()?; + + match expression.lit { + Lit::Int(literal) => { + let value = literal.base10_parse::()?; + + if value == 0 { + return Err(Error::new( + literal.span(), + "Rule precedence value must be positive. Default \ + precedence is \"1\".", + )); + } + + precedence = Some(value); + } + + other => { + return Err(Error::new( + other.span(), + "Expected usize numeric literal.", + )); + } + } + } + + "constructor" => { + if constructor.is_some() { + return Err(Error::new( + attribute.span(), + "Duplicate Constructor attribute.", + )); + } + + constructor = Some(attribute.parse_args::()?); + } + + "mismatch" => { + if mismatch { + return Err(Error::new(name.span(), "Duplicate Mismatch attribute.")); + } + + if expression.is_some() { + return Err(Error::new( + name.span(), + "Explicit rules cannot serve as a mismatch fallback.", + )); + } + + if precedence.is_some() { + return Err(Error::new( + name.span(), + "Variants with precedence cannot be labeled as a mismatch fallback.", + )); + } + + if !attribute.tokens.is_empty() { + return Err(Error::new(name.span(), "Unexpected attribute parameters.")); + } + + if !trivial { + return Err(Error::new( + name.span(), + "Variants with defined body cannot be labeled as mismatch fallback.", + )); + } + + mismatch = true; + } + + "rule" => { + if expression.is_some() { + return Err(Error::new(name.span(), "Duplicate Rule attribute.")); + } + + if mismatch { + return Err(Error::new( + name.span(), + "Mismatch token variant cannot have an explicit rule.", + )); + } + + let mut regex = attribute.parse_args::()?; + + regex.inline(inline_map)?; + + expression = Some(regex); + } + + _ => continue, + } + } + + match expression { + None => { + if let Some(name) = constructor { + return Err(Error::new( + name.span(), + "Constructor attributes cannot be defined on the non-parsable \ + variants.\nTo make the variant parsable label it with \ + #[rule()] attribute.", + )); + } + + Ok(match mismatch { + true => Self::Mismatch { name }, + false => Self::Other, + }) + } + + Some(expression) => { + if !trivial && constructor.is_none() { + return Err(Error::new( + name.span(), + "Parsable variants with non-empty body must specify dedicated \ + constructor function.\nUse #[constructor()] attribute to \ + refer self constructor function.\nThe constructor function should be \ + implement for derived type manually.\nExpected function's signature is \ + \"fn (matched_substring: &str) -> Self\".", + )); + } + + Ok(Self::Rule { + name, + index, + precedence, + constructor, + expression, + }) + } + } + } + + #[inline(always)] + pub(super) fn rule_name(&self) -> &Ident { + match self { + TokenVariant::Rule { name, .. } => name, + _ => unreachable!("Non-rule variant."), + } + } + + #[inline(always)] + pub(super) fn rule_precedence(&self) -> RulePrecedence { + match self { + TokenVariant::Rule { precedence, .. } => precedence.clone().unwrap_or(0), + _ => unreachable!("Non-rule variant."), + } + } +} diff --git a/work/crates/derive/src/utils/automata.rs b/work/crates/derive/src/utils/automata.rs new file mode 100644 index 0000000..f6444a4 --- /dev/null +++ b/work/crates/derive/src/utils/automata.rs @@ -0,0 +1,261 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{ + cmp::Ordering, + collections::VecDeque, + fmt::{Display, Formatter}, + mem::{replace, swap, take}, + ops::RangeFrom, +}; + +use crate::utils::{ + deterministic::Deterministic, + transitions::{Transitions, TransitionsImpl}, + AutomataContext, + Map, + PredictableCollection, + Set, + SetImpl, + State, +}; + +pub struct Automata { + pub start: C::State, + pub finish: Set, + pub transitions: Transitions, +} + +impl Clone for Automata +where + C::State: Clone, + C::Terminal: Clone, +{ + #[inline(always)] + fn clone(&self) -> Self { + Self { + start: self.start.clone(), + finish: self.finish.clone(), + transitions: self.transitions.clone(), + } + } +} + +impl Display for Automata +where + C::State: Ord, + C::Terminal: Display + Ord, +{ + fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result { + struct Visitor<'a, 'f, C: AutomataContext> { + original: &'a Automata, + formatter: &'a mut Formatter<'f>, + pending: VecDeque<&'a C::State>, + visited: Set<&'a C::State>, + names: Map<&'a C::State, usize>, + generator: RangeFrom, + } + + impl<'a, 'f, C> Visitor<'a, 'f, C> + where + C: AutomataContext, + C::State: Ord, + C::Terminal: Display + Ord, + { + fn next(&mut self) -> std::fmt::Result { + if let Some(state) = self.pending.pop_front() { + let mut transitions = self + .original + .transitions + .iter() + .filter(|(from, _, _)| from == state) + .collect::>(); + + transitions.sort_by(|a, b| { + if a.2 < b.2 { + return Ordering::Less; + } + + if a.2 > b.2 { + return Ordering::Greater; + } + + if a.1 < b.1 { + return Ordering::Less; + } + + if a.1 > b.1 { + return Ordering::Greater; + } + + Ordering::Equal + }); + + let mut string_from = format!("{}", self.name_of(state)); + + if self.original.finish.contains(state) { + string_from = format!("{}\u{2192}", string_from); + } + + if state == &self.original.start { + string_from = format!("\u{2192}{}", string_from); + } + + for (_, through, to) in transitions { + let mut string_to = format!("{}", self.name_of(to)); + + if self.original.finish.contains(to) { + string_to = format!("{}\u{2192}", string_to); + } + + if to == &self.original.start { + string_to = format!("\u{2192}{}", string_to); + } + + writeln!( + self.formatter, + " {} \u{21D2} {:} \u{21D2} {}", + string_from, through, string_to, + )?; + + if !self.visited.contains(to) { + let _ = self.visited.insert(to); + self.pending.push_back(to); + } + } + } + + Ok(()) + } + + #[inline] + fn name_of(&mut self, state: &'a C::State) -> usize { + *self.names.entry(state).or_insert_with(|| { + self.generator + .next() + .expect("Internal error. Display state generator exceeded.") + }) + } + } + + let mut visitor = Visitor { + original: self, + formatter, + pending: VecDeque::from([&self.start]), + visited: Set::new([&self.start]), + names: Map::empty(), + generator: 1.., + }; + + while !visitor.pending.is_empty() { + visitor.next()? + } + + Ok(()) + } +} + +impl Automata { + #[inline(always)] + pub fn accepts_null(&self) -> bool { + self.finish.contains(&self.start) || self.transitions.is_empty() + } + + pub fn canonicalize(&mut self, context: &mut C) { + self.reverse(context); + self.determine(context); + self.reverse(context); + self.determine(context); + } + + #[cfg(test)] + pub(super) fn test(&self, input: Vec) -> bool { + use crate::utils::context::AutomataTerminal; + + let mut state = &self.start; + + 'outer: for terminal in &input { + for (from, through, to) in &self.transitions { + if from != state { + continue; + } + + assert!(!through.is_null(), "Automata with null-transition."); + + if through == terminal { + state = to; + continue 'outer; + } + } + + return false; + } + + self.finish.contains(state) + } + + #[inline(always)] + fn determine(&mut self, context: &mut C) { + *self = Deterministic::build(context, self); + } + + fn reverse(&mut self, context: &mut C) { + self.transitions = take(&mut self.transitions) + .into_iter() + .map(|mut transition| { + swap(&mut transition.0, &mut transition.2); + + transition + }) + .collect(); + + match self.finish.single() { + Some(mut finish) => { + swap(&mut self.start, &mut finish); + self.finish = Set::new([finish]); + } + + None => { + let finish = replace(&mut self.start, State::gen_state(context)); + + for start in replace(&mut self.finish, Set::new([finish])) { + self.transitions.through_null(self.start, start); + } + } + } + } +} diff --git a/work/crates/derive/src/utils/context.rs b/work/crates/derive/src/utils/context.rs new file mode 100644 index 0000000..8121a3a --- /dev/null +++ b/work/crates/derive/src/utils/context.rs @@ -0,0 +1,197 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{hash::Hash, mem::replace}; + +use crate::utils::{ + automata::Automata, + transitions::{Transitions, TransitionsImpl}, + PredictableCollection, + Set, + SetImpl, + State, +}; + +pub trait AutomataContext: Sized { + type State: State; + type Terminal: AutomataTerminal; + + fn terminal(&mut self, terminals: Set) -> Automata { + if terminals.is_empty() { + unreachable!("An attempt to create a terminal of empty set."); + } + + let start = Self::State::gen_state(self); + let finish = Self::State::gen_state(self); + + let mut transitions = Transitions::with_capacity(terminals.len()); + + for terminal in terminals { + transitions.through(start, terminal, finish); + } + + Automata { + start, + finish: Set::new([finish]), + transitions, + } + } + + fn union(&mut self, mut a: Automata, b: Automata) -> Automata { + let start = Self::State::gen_state(self); + + a.transitions.append(b.transitions); + + a.transitions.through_null(start, a.start); + a.transitions.through_null(start, b.start); + + a.start = start; + a.finish.append(b.finish); + + a.canonicalize(self); + + a + } + + fn concatenate(&mut self, mut a: Automata, b: Automata) -> Automata { + for a_finish in replace(&mut a.finish, b.finish) { + a.transitions.through_null(a_finish, b.start); + } + + a.transitions.append(b.transitions); + + a.canonicalize(self); + + a + } + + fn repeat(&mut self, mut inner: Automata) -> Automata { + for finish in &inner.finish { + inner.transitions.through_null(*finish, inner.start); + inner.transitions.through_null(inner.start, *finish); + } + + inner.canonicalize(self); + + inner + } + + fn optional(&mut self, mut inner: Automata) -> Automata { + let start = Self::State::gen_state(self); + + inner.finish.insert(start); + inner + .transitions + .through_null(start, replace(&mut inner.start, start)); + + inner.canonicalize(self); + + inner + } +} + +pub trait AutomataTerminal: Clone + Eq + Hash { + fn null() -> Self; + + fn is_null(&self) -> bool; +} + +#[cfg(test)] +mod tests { + use std::ops::RangeFrom; + + use crate::utils::{AutomataContext, AutomataTerminal, Set, SetImpl, State}; + + struct TestContext(RangeFrom); + + impl AutomataContext for TestContext { + type State = TestState; + type Terminal = TestTerminal; + } + + type TestTerminal = &'static str; + + impl AutomataTerminal for TestTerminal { + #[inline(always)] + fn null() -> Self { + "" + } + + #[inline(always)] + fn is_null(&self) -> bool { + self.is_empty() + } + } + + type TestState = usize; + + impl State for TestState { + fn gen_state(context: &mut TestContext) -> Self { + context.0.next().unwrap() + } + } + + #[test] + fn test_automata() { + let mut context = TestContext(1..); + + let foo = context.terminal(Set::new(["foo"])); + let bar = context.terminal(Set::new(["bar"])); + let comma = context.terminal(Set::new([","])); + + assert!(foo.test(vec!["foo"])); + assert!(!foo.test(vec!["bar"])); + assert!(!foo.test(vec![])); + + let foo_or_bar = context.union(foo, bar); + let comma_foo_or_bar = context.concatenate(comma, foo_or_bar.clone()); + let repeat_comma_foo_or_bar = context.repeat(comma_foo_or_bar); + let one_or_more = context.concatenate(foo_or_bar, repeat_comma_foo_or_bar); + + assert!(!one_or_more.test(vec![])); + + let zero_or_more = context.optional(one_or_more); + + assert!(zero_or_more.test(vec![])); + assert!(zero_or_more.test(vec!["foo"])); + assert!(!zero_or_more.test(vec!["foo", "bar"])); + assert!(zero_or_more.test(vec!["foo", ",", "bar"])); + assert!(!zero_or_more.test(vec!["foo", ",", "bar", "foo"])); + assert!(zero_or_more.test(vec!["foo", ",", "bar", ",", "foo"])); + assert!(zero_or_more.test(vec!["foo", ",", "bar", ",", "foo", ",", "foo"])); + } +} diff --git a/work/crates/derive/src/utils/deterministic.rs b/work/crates/derive/src/utils/deterministic.rs new file mode 100644 index 0000000..e7864ab --- /dev/null +++ b/work/crates/derive/src/utils/deterministic.rs @@ -0,0 +1,158 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::utils::{ + transitions::{Transitions, TransitionsImpl}, + Automata, + AutomataContext, + AutomataTerminal, + PredictableCollection, + Set, + SetImpl, + State, +}; + +pub(super) struct Deterministic<'a, C: AutomataContext> { + context: &'a mut C, + original: &'a Automata, + alphabet: Set, + pending: Vec<(C::State, Set)>, + registered: Vec<(C::State, Set)>, + transitions: Transitions, +} + +impl<'a, C: AutomataContext> Deterministic<'a, C> { + pub(super) fn build(context: &'a mut C, original: &'a Automata) -> Automata { + let mut alphabet = original.transitions.alphabet(); + alphabet.remove(&C::Terminal::null()); + + let start = original + .transitions + .closure_of(original.start, C::Terminal::null()); + + let mut pending = Vec::with_capacity(original.transitions.len()); + let registered = Vec::with_capacity(original.transitions.len()); + + pending.push((original.start, start)); + + let mut deterministic = Self { + context, + original, + alphabet, + pending, + registered, + transitions: Transitions::empty(), + }; + + while deterministic.pop() {} + + let finish = deterministic + .registered + .iter() + .filter_map(|(state, closure)| { + if closure.intersection(&original.finish).next().is_some() { + Some(*state) + } else { + None + } + }) + .collect(); + + Automata { + start: original.start, + finish, + transitions: deterministic.transitions, + } + } + + fn pop(&mut self) -> bool { + let (from, closure) = match self.pending.pop() { + None => return false, + Some(pair) => pair, + }; + + self.registered.push((from, closure.clone())); + + for symbol in self.alphabet.clone() { + let mut target = Set::empty(); + + for state in closure.iter().cloned() { + target.append(self.original.transitions.closure_of(state, symbol.clone())); + } + + if target.is_empty() { + continue; + } + + let to = self.push(target); + + self.transitions.insert((from, symbol, to)); + } + + true + } + + fn push(&mut self, closure: Set) -> C::State { + for (state, registered) in self.registered.iter() { + if registered == &closure { + return *state; + } + } + + for (state, pending) in self.pending.iter() { + if pending == &closure { + return *state; + } + } + + match closure.single() { + None => { + let state = C::State::gen_state(&mut self.context); + + self.pending.push((state, closure)); + + state + } + + Some(state) => { + self.pending.push((state, closure)); + + state + } + } + } +} diff --git a/work/crates/derive/src/utils/expression.rs b/work/crates/derive/src/utils/expression.rs new file mode 100644 index 0000000..5dcb259 --- /dev/null +++ b/work/crates/derive/src/utils/expression.rs @@ -0,0 +1,169 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::Span; +use syn::{ + parse::{Lookahead1, Parse, ParseStream, Result}, + spanned::Spanned, +}; + +#[derive(Clone)] +pub enum Expression { + Operand(O::Operand), + Binary { + operator: O, + left: Box, + right: Box, + }, + Unary { + operator: O, + inner: Box, + }, +} + +impl Default for Expression +where + O: ExpressionOperator, + ::Operand: Default, +{ + #[inline(always)] + fn default() -> Self { + Self::Operand(::Operand::default()) + } +} + +impl Spanned for Expression +where + O: ExpressionOperator, + ::Operand: Spanned, +{ + fn span(&self) -> Span { + match self { + Self::Operand(operand) => operand.span(), + Self::Binary { left, .. } => left.span(), + Self::Unary { inner, .. } => inner.span(), + } + } +} + +impl Parse for Expression { + #[inline(always)] + fn parse(input: ParseStream) -> Result { + Self::binding_parse(input, 0) + } +} + +impl Expression { + fn binding_parse(input: ParseStream, right_binding_power: u8) -> Result { + let mut result = O::Operand::parse(input)?; + + 'outer: loop { + if input.is_empty() || input.peek(Token![,]) { + break; + } + + let lookahead = input.lookahead1(); + + for mut operator in O::enumerate() { + let binding_power = operator.binding_power(); + + match operator.peek(&lookahead) { + Applicability::Mismatch => (), + + Applicability::Unary => { + if binding_power <= right_binding_power { + break 'outer; + } + + operator.parse(input)?; + + result = Expression::Unary { + operator, + inner: Box::new(result), + }; + + continue 'outer; + } + + Applicability::Binary => { + if binding_power <= right_binding_power { + break 'outer; + } + + operator.parse(input)?; + + let right = Self::binding_parse(input, binding_power - 1)?; + + result = Expression::Binary { + operator, + left: Box::new(result), + right: Box::new(right), + }; + + continue 'outer; + } + } + } + + return Err(lookahead.error()); + } + + Ok(result) + } +} + +pub trait ExpressionOperator: Sized { + type Operand: ExpressionOperand; + + fn enumerate() -> Vec; + + fn binding_power(&self) -> u8; + + fn peek(&self, lookahead: &Lookahead1) -> Applicability; + + fn parse(&mut self, input: ParseStream) -> Result<()>; +} + +pub trait ExpressionOperand { + fn parse(input: ParseStream) -> Result>; +} + +pub enum Applicability { + Mismatch, + Unary, + Binary, +} diff --git a/work/crates/derive/src/utils/facade.rs b/work/crates/derive/src/utils/facade.rs new file mode 100644 index 0000000..f93a267 --- /dev/null +++ b/work/crates/derive/src/utils/facade.rs @@ -0,0 +1,139 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use proc_macro2::TokenStream; + +pub struct Facade { + core_crate: TokenStream, + option: TokenStream, + vec: TokenStream, + into: TokenStream, + unreachable: TokenStream, + unimplemented: TokenStream, +} + +impl Facade { + pub fn new() -> Self { + let core_crate = quote! { + ::lady_deirdre + }; + + #[cfg(feature = "std")] + let option = quote! { + ::std::option::Option + }; + #[cfg(not(feature = "std"))] + let option = quote! { + ::core::option::Option + }; + + #[cfg(feature = "std")] + let vec = quote! { + ::std::vec::Vec + }; + #[cfg(not(feature = "std"))] + let vec = quote! { + ::alloc::vec::Vec + }; + + #[cfg(feature = "std")] + let into = quote! { + ::std::convert::From + }; + #[cfg(not(feature = "std"))] + let into = quote! { + ::core::convert::From + }; + + #[cfg(feature = "std")] + let unreachable = quote! { + ::std::unreachable! + }; + #[cfg(not(feature = "std"))] + let unreachable = quote! { + ::core::unreachable! + }; + + #[cfg(feature = "std")] + let unimplemented = quote! { + ::std::unimplemented! + }; + #[cfg(not(feature = "std"))] + let unimplemented = quote! { + ::core::unimplemented! + }; + + Self { + core_crate, + option, + vec, + into, + unreachable, + unimplemented, + } + } + + #[inline(always)] + pub fn core_crate(&self) -> &TokenStream { + &self.core_crate + } + + #[inline(always)] + pub fn option(&self) -> &TokenStream { + &self.option + } + + #[inline(always)] + pub fn vec(&self) -> &TokenStream { + &self.vec + } + + #[inline(always)] + pub fn convert(&self) -> &TokenStream { + &self.into + } + + #[inline(always)] + pub fn unreachable(&self) -> &TokenStream { + &self.unreachable + } + + #[inline(always)] + pub fn unimplemented(&self) -> &TokenStream { + &self.unimplemented + } +} diff --git a/work/crates/derive/src/utils/map.rs b/work/crates/derive/src/utils/map.rs new file mode 100644 index 0000000..287999a --- /dev/null +++ b/work/crates/derive/src/utils/map.rs @@ -0,0 +1,135 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{collections::HashMap, hash::Hash}; + +use syn::Result; + +use crate::utils::{predictable::PredictableHasher, PredictableCollection}; + +pub type Map = HashMap; + +impl PredictableCollection for Map { + #[inline(always)] + fn empty() -> Self { + Self::with_hasher(PredictableHasher) + } + + #[inline(always)] + fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_and_hasher(capacity, PredictableHasher) + } +} + +impl MapImpl for Map { + type Key = Key; + type Value = Value; + + #[inline(always)] + fn new(array: [(Self::Key, Self::Value); N]) -> Self + where + Self::Key: Eq + Hash, + { + Self::from_iter(array) + } + + #[inline(always)] + fn append(&mut self, other: Self) + where + Self::Key: Eq + Hash, + { + for (key, value) in other { + assert!( + self.insert(key, value).is_none(), + "Internal error. Duplicate keys in append." + ); + } + } + + #[inline(always)] + fn single_key(&self) -> Option<&Self::Key> { + if self.len() != 1 { + return None; + } + + self.keys().next() + } + + #[inline] + fn for_each(mut self, mut iterator: impl FnMut(&Self::Key, &mut Self::Value)) -> Self { + for (key, value) in &mut self { + iterator(key, value); + } + + self + } + + fn try_for_each( + mut self, + mut iterator: impl FnMut(&Self::Key, &mut Self::Value) -> Result<()>, + ) -> Result { + for (key, value) in &mut self { + iterator(key, value)?; + } + + Ok(self) + } +} + +pub trait MapImpl { + type Key; + type Value; + + fn new(array: [(Self::Key, Self::Value); N]) -> Self + where + Self::Key: Eq + Hash; + + fn append(&mut self, other: Self) + where + Self::Key: Eq + Hash; + + fn single_key(&self) -> Option<&Self::Key>; + + fn for_each(self, iterator: impl FnMut(&Self::Key, &mut Self::Value)) -> Self; + + fn try_for_each( + self, + iterator: impl FnMut(&Self::Key, &mut Self::Value) -> Result<()>, + ) -> Result + where + Self: Sized; +} diff --git a/work/crates/derive/src/utils/mod.rs b/work/crates/derive/src/utils/mod.rs new file mode 100644 index 0000000..cebdeb6 --- /dev/null +++ b/work/crates/derive/src/utils/mod.rs @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +mod automata; +mod context; +mod deterministic; +mod expression; +mod facade; +mod map; +mod multimap; +mod predictable; +mod set; +mod state; +mod transitions; + +pub use crate::utils::{ + automata::Automata, + context::{AutomataContext, AutomataTerminal}, + expression::{Applicability, Expression, ExpressionOperand, ExpressionOperator}, + facade::Facade, + map::{Map, MapImpl}, + multimap::{Multimap, MultimapImpl}, + predictable::PredictableCollection, + set::{Set, SetImpl}, + state::State, + transitions::Transitions, +}; diff --git a/work/crates/derive/src/utils/multimap.rs b/work/crates/derive/src/utils/multimap.rs new file mode 100644 index 0000000..53ff093 --- /dev/null +++ b/work/crates/derive/src/utils/multimap.rs @@ -0,0 +1,102 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{collections::HashMap, hash::Hash}; + +use crate::utils::{predictable::PredictableHasher, PredictableCollection, Set, SetImpl}; + +pub type Multimap = HashMap, PredictableHasher>; + +impl MultimapImpl for Multimap { + type Key = Key; + type Value = Value; + + fn fold(self, mut map: impl FnMut(Self::Key) -> K) -> Multimap + where + K: Eq + Hash, + Self::Key: Eq + Hash, + Self::Value: Eq + Hash + Clone, + { + let mut multimap = Multimap::empty(); + + for (key, value) in self { + let key = map(key); + + match multimap.get_mut(&key) { + None => { + let _ = multimap.insert(key, value); + } + + Some(accumulator) => accumulator.append(value), + } + } + + multimap + } + + fn join(self, mut join: impl FnMut(Self::Key, Self::Value) -> V) -> Set + where + V: Eq + Hash, + Self::Key: Eq + Hash + Clone, + { + let mut set = Set::empty(); + + for (key, subset) in self { + for value in subset { + let _ = set.insert(join(key.clone(), value)); + } + } + + set + } +} + +pub trait MultimapImpl { + type Key; + type Value; + + fn fold(self, map: impl FnMut(Self::Key) -> K) -> Multimap + where + K: Eq + Hash, + Self::Key: Eq + Hash, + Self::Value: Eq + Hash + Clone; + + fn join(self, concatenation: impl FnMut(Self::Key, Self::Value) -> V) -> Set + where + V: Eq + Hash, + Self::Key: Eq + Hash + Clone; +} diff --git a/work/crates/derive/src/utils/predictable.rs b/work/crates/derive/src/utils/predictable.rs new file mode 100644 index 0000000..d984540 --- /dev/null +++ b/work/crates/derive/src/utils/predictable.rs @@ -0,0 +1,56 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{collections::hash_map::DefaultHasher, hash::BuildHasher}; + +#[derive(Default, Clone)] +pub struct PredictableHasher; + +impl BuildHasher for PredictableHasher { + type Hasher = DefaultHasher; + + #[inline(always)] + fn build_hasher(&self) -> Self::Hasher { + DefaultHasher::new() + } +} + +pub trait PredictableCollection { + fn empty() -> Self; + + fn with_capacity(capacity: usize) -> Self; +} diff --git a/work/crates/derive/src/utils/set.rs b/work/crates/derive/src/utils/set.rs new file mode 100644 index 0000000..db6548f --- /dev/null +++ b/work/crates/derive/src/utils/set.rs @@ -0,0 +1,160 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{collections::HashSet, hash::Hash}; + +use crate::utils::{predictable::PredictableHasher, Multimap, PredictableCollection}; + +pub type Set = HashSet; + +impl PredictableCollection for Set { + #[inline(always)] + fn empty() -> Self { + Self::with_hasher(PredictableHasher) + } + + #[inline(always)] + fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_and_hasher(capacity, PredictableHasher) + } +} + +impl SetImpl for Set { + type Value = Value; + + #[inline(always)] + fn new(array: [Self::Value; N]) -> Self + where + Self::Value: Eq + Hash, + { + Self::from_iter(array) + } + + #[inline(always)] + fn append(&mut self, other: Self) + where + Self::Value: Eq + Hash + Clone, + { + *self = HashSet::union(self, &other).cloned().collect() + } + + #[inline(always)] + fn merge(self, other: Self) -> Self + where + Self::Value: Eq + Hash + Clone, + { + HashSet::union(&self, &other).cloned().collect() + } + + #[inline(always)] + fn is_single(&self) -> bool { + self.len() == 1 + } + + #[inline] + fn single(&self) -> Option + where + Self::Value: Clone, + { + if self.len() != 1 { + return None; + } + + self.iter().next().cloned() + } + + #[inline] + fn group(self, mut division: impl FnMut(Self::Value) -> (K, V)) -> Multimap + where + K: Eq + Hash, + V: Eq + Hash + Clone, + { + let mut multimap = Multimap::empty(); + + for value in self { + let (key, value) = division(value); + + multimap + .entry(key) + .and_modify(|values: &mut Set| { + let _ = values.insert(value.clone()); + }) + .or_insert_with(|| Set::new([value.clone()])); + } + + multimap + } + + #[inline] + fn as_ref(&self) -> Set<&Self::Value> + where + Self::Value: Eq + Hash, + { + self.iter().collect() + } +} + +pub trait SetImpl { + type Value; + + fn new(array: [Self::Value; N]) -> Self + where + Self::Value: Eq + Hash; + + fn append(&mut self, other: Self) + where + Self::Value: Eq + Hash + Clone; + + fn merge(self, other: Self) -> Self + where + Self::Value: Eq + Hash + Clone; + + fn is_single(&self) -> bool; + + fn single(&self) -> Option + where + Self::Value: Clone; + + fn group(self, division: impl FnMut(Self::Value) -> (K, V)) -> Multimap + where + K: Eq + Hash, + V: Eq + Hash + Clone; + + fn as_ref(&self) -> Set<&Self::Value> + where + Self::Value: Eq + Hash; +} diff --git a/work/crates/derive/src/utils/state.rs b/work/crates/derive/src/utils/state.rs new file mode 100644 index 0000000..972c827 --- /dev/null +++ b/work/crates/derive/src/utils/state.rs @@ -0,0 +1,42 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::hash::Hash; + +pub trait State: Eq + Hash + Copy { + fn gen_state(context: &mut C) -> Self; +} diff --git a/work/crates/derive/src/utils/symbol.rs b/work/crates/derive/src/utils/symbol.rs new file mode 100644 index 0000000..1b6de59 --- /dev/null +++ b/work/crates/derive/src/utils/symbol.rs @@ -0,0 +1,101 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{ + cmp::Ordering, + fmt::{Display, Formatter}, +}; + +use crate::utils::Set; + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(super) enum Symbol { + Null, + Terminal(T), +} + +impl Display for Symbol +where + T: Display, +{ + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => formatter.write_str("Null"), + Self::Terminal(t) => Display::fmt(t, formatter), + } + } +} + +impl Ord for Symbol +where + T: Ord, +{ + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + match (self, other) { + (Self::Null, Self::Null) => Ordering::Equal, + (Self::Null, Self::Terminal(_)) => Ordering::Less, + (Self::Terminal(_), Self::Null) => Ordering::Greater, + (Self::Terminal(a), Self::Terminal(b)) => a.cmp(b), + } + } +} + +impl PartialOrd for Symbol +where + T: Ord, +{ + #[inline(always)] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Symbol { + #[inline] + pub(super) fn is_null(&self) -> bool { + match self { + Self::Null => true, + _ => false, + } + } +} + +pub type TerminalSet = Set; + +pub(super) type Alphabet = Set>; diff --git a/work/crates/derive/src/utils/transitions.rs b/work/crates/derive/src/utils/transitions.rs new file mode 100644 index 0000000..d0dad63 --- /dev/null +++ b/work/crates/derive/src/utils/transitions.rs @@ -0,0 +1,115 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::utils::{AutomataTerminal, PredictableCollection, Set, SetImpl, State}; + +pub type Transitions = Set<(S, T, S)>; + +impl TransitionsImpl for Transitions +where + S: State, + T: AutomataTerminal, +{ + type State = S; + type Terminal = T; + + #[inline(always)] + fn through(&mut self, from: Self::State, symbol: Self::Terminal, to: Self::State) { + let _ = self.insert((from, symbol, to)); + } + + #[inline(always)] + fn through_null(&mut self, from: Self::State, to: Self::State) { + self.through(from, ::null(), to); + } + + #[inline] + fn alphabet(&self) -> Set { + self.iter().map(|(_, symbol, _)| symbol).cloned().collect() + } + + fn closure_of(&self, state: Self::State, symbol: Self::Terminal) -> Set { + let mut closure = Set::empty(); + + if symbol.is_null() { + self.closure_of_null(state, &mut closure); + + return closure; + } + + for (from, through, to) in self { + if from == &state && through == &symbol { + let mut null_closure = Set::empty(); + + self.closure_of_null(*to, &mut null_closure); + + closure.append(null_closure); + } + } + + closure + } + + fn closure_of_null(&self, state: Self::State, closure: &mut Set) { + let _ = closure.insert(state); + + for (from, through, to) in self { + if from == &state && through.is_null() { + let to = *to; + + if closure.insert(to) { + self.closure_of_null(to, closure); + } + } + } + } +} + +pub(super) trait TransitionsImpl { + type State: State; + type Terminal: AutomataTerminal; + + fn through(&mut self, from: Self::State, symbol: Self::Terminal, to: Self::State); + + fn through_null(&mut self, from: Self::State, to: Self::State); + + fn alphabet(&self) -> Set; + + fn closure_of(&self, state: Self::State, symbol: Self::Terminal) -> Set; + + fn closure_of_null(&self, state: Self::State, closures: &mut Set); +} diff --git a/work/crates/examples/Cargo.toml b/work/crates/examples/Cargo.toml new file mode 100644 index 0000000..a006940 --- /dev/null +++ b/work/crates/examples/Cargo.toml @@ -0,0 +1,105 @@ +################################################################################ +# This file is a part of the "Lady Deirdre" Work, # +# a compiler front-end foundation technology. # +# # +# This Work is a proprietary software with source available code. # +# # +# To copy, use, distribute, and contribute into this Work you must agree to # +# the terms of the End User License Agreement: # +# # +# https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. # +# # +# The Agreement let you use this Work in commercial and non-commercial # +# purposes. Commercial use of the Work is free of charge to start, # +# but the Agreement obligates you to pay me royalties # +# under certain conditions. # +# # +# If you want to contribute into the source code of this Work, # +# the Agreement obligates you to assign me all exclusive rights to # +# the Derivative Work or contribution made by you # +# (this includes GitHub forks and pull requests to my repository). # +# # +# The Agreement does not limit rights of the third party software developers # +# as long as the third party software uses public API of this Work only, # +# and the third party software does not incorporate or distribute # +# this Work directly. # +# # +# AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY # +# OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES # +# RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. # +# # +# If you do not or cannot agree to the terms of this Agreement, # +# do not use this Work. # +# # +# Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). # +# All rights reserved. # +################################################################################ + +[package] +name = "lady-deirdre-examples" +version = "0.0.0" +authors = ["Ilya Lakhin (Илья Александрович Лахин) "] +edition = "2021" +description = "Compiler front-end foundation technology. Examples crate." +keywords = ["parsing", "perser", "incremental", "compiler", "editor"] +categories = ["compilers", "data-structures", "no-std", "parsing", "text-editors"] +readme="./readme.md" +license-file="../../../EULA.md" +documentation = "https://docs.rs/lady-deirdre" +repository = "https://github.com/Eliah-Lakhin/lady-deirdre" +rust-version = "1.65" +publish = false +autobins = false +autoexamples = false +autotests = false +autobenches = false + +[[bench]] +name = "main" +harness = false + +[[test]] +name = "document" +path = "tests/document.rs" + +[[test]] +name = "iteration" +path = "tests/iteration.rs" + +[[test]] +name = "position" +path = "tests/position.rs" + +[[test]] +name = "token" +path = "tests/token.rs" + +[[test]] +name = "json" +path = "tests/json.rs" + +[[test]] +name = "balance" +path = "tests/balance.rs" + +[dependencies.lady-deirdre] +version = "1.0" +path = "../main" + +[dev-dependencies.rand] +version = "0.8" + +[dev-dependencies.ropey] +version = "1.5" + +[dev-dependencies.nom] +version = "7.1" + +[dev-dependencies.tree-sitter] +version = "0.20" + +[dev-dependencies.tree-sitter-json] +version = "0.19" + +[dev-dependencies.criterion] +version = "0.3" diff --git a/work/crates/examples/benches/data.rs b/work/crates/examples/benches/data.rs new file mode 100644 index 0000000..5210ba5 --- /dev/null +++ b/work/crates/examples/benches/data.rs @@ -0,0 +1,584 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::ops::Range; + +use lady_deirdre::{ + lexis::{CodeContent, Length, Site, SiteSpan}, + syntax::SyntaxTree, + Document, +}; +use lady_deirdre_examples::json::syntax::JsonNode; +use rand::{ + distributions::{Distribution, WeightedIndex}, + Rng, +}; + +const BRANCHING: usize = 8; +const NESTING_MIN: usize = 1; +const NESTING_MAX: usize = 13; +const MB: usize = KB * KB; +const KB: usize = 1024; + +#[derive(Clone)] +pub struct BenchData { + pub init: SourceSample, + current: SourceSample, + pub steps: Vec, +} + +impl BenchData { + pub fn new(init: SourceSample) -> Self { + Self { + init: init.clone(), + current: init, + steps: Vec::default(), + } + } + + pub fn edit_short(&mut self, random: &mut impl Rng, mut edit: SourceSample) { + match random.gen_range(1..6) == 1 { + true => { + self.current.replace(random, &mut edit); + self.steps.push(edit); + } + + false => { + self.current.insert(random, &mut edit); + self.steps.push(edit); + } + }; + } + + pub fn edit_long(&mut self, random: &mut impl Rng, mut edit: SourceSample) { + match random.gen_range(1..2) == 1 { + true => { + self.current.replace(random, &mut edit); + self.steps.push(edit); + } + + false => { + self.current.insert(random, &mut edit); + self.steps.push(edit); + } + }; + } + + pub fn reset(&mut self) { + self.current = self.init.clone(); + } + + pub fn verify_sequential(&self) { + let mut document = Document::::from(self.init.source.as_str()); + + assert!(document.errors().next().is_none()); + + for step in &self.steps { + document.write(step.span.clone(), &step.source); + + assert!(document.errors().next().is_none()); + } + + assert_eq!(document.substring(..), self.current.source); + } + + #[allow(dead_code)] + pub fn verify_independent(&self) { + if self.steps.is_empty() { + let document = Document::::from(self.init.source.as_str()); + + assert!(document.errors().next().is_none()); + + return; + } + + for step in &self.steps { + let mut document = Document::::from(self.init.source.as_str()); + + assert!(document.errors().next().is_none()); + + document.write(step.span.clone(), &step.source); + + assert!(document.errors().next().is_none()); + } + } + + pub fn describe_init(&self) -> String { + let size = self.init.source.len(); + let lines = self.init.lines; + + if size >= MB { + format!("{} Mb. ({} bytes, {} lines)", size / MB, size, lines) + } else if size >= KB { + format!("{} Kb. ({} bytes, {} lines)", size / KB, size, lines) + } else { + format!("{} bytes ({} lines)", size, lines) + } + } + + pub fn describe_average_edit(&self) -> String { + let size = self.average_edit_size(); + + if size >= MB { + format!("{} Mb. (~{} bytes)", size / MB, size) + } else if size >= KB { + format!("{} Kb. (~{} bytes)", size / KB, size) + } else { + format!("~{} bytes", size) + } + } + + pub fn describe_total_edits(&self) -> String { + let size = self.total_edit_size(); + + if size >= MB { + format!("({}) {} Mb. ({} bytes)", self.steps.len(), size / MB, size) + } else if size >= KB { + format!("({}) {} Kb. ({} bytes)", self.steps.len(), size / KB, size) + } else { + format!("({}) {} bytes", self.steps.len(), size) + } + } + + fn average_edit_size(&self) -> Length { + if self.steps.is_empty() { + return 0; + } + + self.total_edit_size() / self.steps.len() + } + + fn total_edit_size(&self) -> Length { + let mut total = 0; + + for step in &self.steps { + total += step.source.len(); + } + + total + } +} + +#[derive(Clone)] +pub struct SourceSample { + pub source: String, + pub span: SiteSpan, + lines: usize, +} + +impl SourceSample { + pub fn gen_init(random: &mut impl Rng, lines_range: Range) -> Self { + loop { + let nesting = random.gen_range(NESTING_MIN..NESTING_MAX); + + let source = gen_inner(random, BRANCHING, nesting); + + if source.chars().next().unwrap() != '{' { + continue; + } + + let lines = source.split('\n').count(); + + if lines < lines_range.start { + continue; + } + + if lines >= lines_range.end { + continue; + } + + return Self { + source, + lines, + span: 0..0, + }; + } + } + + pub fn gen_long(random: &mut impl Rng, lines_range: Range) -> Self { + loop { + let nesting = random.gen_range(NESTING_MIN..NESTING_MAX); + + let source = gen_inner(random, BRANCHING, nesting); + + let lines = source.split('\n').count(); + + if lines < lines_range.start { + continue; + } + + if lines >= lines_range.end { + continue; + } + + return Self { + source, + lines, + span: 0..0, + }; + } + } + + pub fn gen_short(random: &mut impl Rng, size_limit: usize) -> Self { + let mut source; + + let nesting = match random.gen::() < 4.0 / (size_limit as f64) { + true => 3, + false => 1, + }; + + loop { + source = gen_inner(random, BRANCHING, nesting); + + if source.split('\n').count() > 1 { + continue; + } + + if source.len() >= size_limit { + continue; + } + + break; + } + + return Self { + source, + lines: 1, + span: 0..0, + }; + } + + pub fn insert(&mut self, random: &mut impl Rng, edit: &mut Self) { + enum Candidate { + ArrayStart(Site), + ObjectStart(Site), + ArrayItem(Site), + ObjectItem(Site), + ArrayEmpty(Site), + ObjectEmpty(Site), + } + + enum Context { + Array, + Object, + } + + let mut stack = Vec::with_capacity(self.source.len() / 5 + 1); + let mut candidates = Vec::with_capacity(self.source.len() / 3 + 1); + + let mut site = 0; + let mut characters = self.source.chars().peekable(); + + loop { + let character = match characters.next() { + Some(character) => character, + + None => break, + }; + + match character { + '[' => { + stack.push(Context::Array); + + match characters.peek().unwrap() == &']' { + false => { + candidates.push(Candidate::ArrayStart(site + 1)); + } + + true => { + candidates.push(Candidate::ArrayEmpty(site + 1)); + } + } + } + + '{' => { + stack.push(Context::Object); + + match characters.peek().unwrap() == &'}' { + false => { + candidates.push(Candidate::ObjectStart(site + 1)); + } + + true => { + candidates.push(Candidate::ObjectEmpty(site + 1)); + } + } + } + + ']' | '}' => { + let _ = stack.pop().unwrap(); + } + + ',' => match stack.last().unwrap() { + Context::Array => { + candidates.push(Candidate::ArrayItem(site)); + } + Context::Object => { + candidates.push(Candidate::ObjectItem(site)); + } + }, + + _ => (), + } + + site += 1; + } + + let candidate = &candidates[random.gen_range(0..candidates.len())]; + + match *candidate { + Candidate::ArrayStart(site) => { + edit.span = site..site; + edit.source.push_str(", "); + self.source.insert_str(site, edit.source.as_str()) + } + + Candidate::ObjectStart(site) => { + edit.source = format!("\"key\": {}, ", edit.source); + edit.span = site..site; + self.source.insert_str(site, edit.source.as_str()) + } + + Candidate::ArrayItem(site) => { + edit.source = format!(", {}", edit.source); + edit.span = site..site; + self.source.insert_str(site, edit.source.as_str()) + } + + Candidate::ObjectItem(site) => { + edit.source = format!(", \"key\": {}", edit.source); + edit.span = site..site; + self.source.insert_str(site, edit.source.as_str()) + } + Candidate::ArrayEmpty(site) => { + edit.span = site..site; + self.source.insert_str(site, edit.source.as_str()) + } + + Candidate::ObjectEmpty(site) => { + edit.source = format!("\"key\": {}", edit.source); + edit.span = site..site; + self.source.insert_str(site, edit.source.as_str()) + } + } + } + + pub fn replace(&mut self, random: &mut impl Rng, edit: &mut Self) { + enum Context { + Array(Site), + Object(Site), + } + + impl Context { + fn start(self) -> Site { + match self { + Self::Array(site) => site, + Self::Object(site) => site, + } + } + } + + let deletion_limit = edit.source.len() * 3; + + let mut stack = Vec::with_capacity(self.source.len() / 5 + 1); + let mut candidates = Vec::with_capacity(self.source.len() / 3 + 1); + + let mut site = 0; + let mut characters = self.source.chars().peekable(); + + loop { + let character = match characters.next() { + Some(character) => character, + + None => break, + }; + + match character { + '[' => { + stack.push(Context::Array(site)); + } + + '{' => { + stack.push(Context::Object(site)); + } + + ']' | '}' => { + let start = stack.pop().unwrap().start(); + + if start > 0 && site - start < deletion_limit { + candidates.push(start..(site + 1)) + } + } + + '"' if characters.peek().unwrap() == &'s' => candidates.push(site..(site + 8)), + + 't' if characters.peek().unwrap() == &'r' => candidates.push(site..(site + 4)), + + 'n' if characters.peek().unwrap() == &'u' => candidates.push(site..(site + 4)), + + 'f' | '1' => candidates.push(site..(site + 5)), + + _ => (), + } + + site += 1; + } + + let candidate = candidates[random.gen_range(0..candidates.len())].clone(); + + edit.span = candidate.clone(); + + self.source = format!( + "{}{}{}", + &self.source[0..candidate.start], + edit.source.as_str(), + &self.source[candidate.end..] + ); + } +} + +fn gen_inner(random: &mut impl Rng, branching: usize, nesting: usize) -> String { + let distribution = match nesting == 0 { + true => WeightedIndex::new(&[1usize, 1, 1, 1, 1]).unwrap(), + false => WeightedIndex::new(&[1, 1, 1, 1, 1, 7, 7]).unwrap(), + }; + + match distribution.sample(random) + 1 { + 1 => String::from(r#"true"#), + 2 => String::from(r#"false"#), + 3 => String::from(r#"null"#), + 4 => String::from(r#"12345"#), + 5 => String::from(r#""STRING""#), + + 6 => match random.gen_range(0..branching) { + 0 => String::from(r#"[]"#), + 1 => format!("[{}]", gen_inner(random, branching, nesting - 1)), + + other => { + let mut result = String::from('['); + + match nesting == 1 { + true => { + for index in 0..other { + if index > 0 { + result.push_str(", "); + } + + result.push_str(&gen_inner(random, branching, nesting - 1)) + } + + result.push_str("]"); + } + + false => { + for index in 0..other { + match index == 0 { + true => result.push_str("\n "), + false => result.push_str(",\n "), + } + + result.push_str(&shift(gen_inner(random, branching, nesting - 1))) + } + + result.push_str("\n]"); + } + } + + result + } + }, + + 7 => match random.gen_range(0..branching) { + 0 => String::from(r#"{}"#), + 1 => format!( + r#"{{"key": {}}}"#, + gen_inner(random, branching, nesting - 1) + ), + + other => { + let mut result = String::from('{'); + + match nesting == 1 { + true => { + for index in 0..other { + if index > 0 { + result.push_str(", "); + } + + result.push_str(r#""key": "#); + result.push_str(&gen_inner(random, branching, nesting - 1)) + } + + result.push_str("}"); + } + + false => { + for index in 0..other { + match index == 0 { + true => result.push_str("\n "), + false => result.push_str(",\n "), + } + + result.push_str(r#""key": "#); + result.push_str(&shift(gen_inner(random, branching, nesting - 1))) + } + + result.push_str("\n}"); + } + } + + result + } + }, + + _ => unreachable!(), + } +} + +fn shift(text: String) -> String { + text.split('\n') + .enumerate() + .map(|(index, line)| { + if index == 0 { + return line.to_string(); + } + + return format!(" {}", line); + }) + .collect::>() + .join("\n") +} diff --git a/work/crates/examples/benches/frameworks.rs b/work/crates/examples/benches/frameworks.rs new file mode 100644 index 0000000..02bb96d --- /dev/null +++ b/work/crates/examples/benches/frameworks.rs @@ -0,0 +1,163 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +pub mod nom; +pub mod ropey; +pub mod treesitter; + +use std::{ + marker::PhantomData, + time::{Duration, Instant}, +}; + +use criterion::black_box; +use lady_deirdre::{lexis::SiteSpan, syntax::Node, Document}; + +use crate::BenchDataLayer; + +pub trait FrameworkCase { + fn name(&self) -> &'static str; + + #[allow(unused)] + fn configuration(&self, layer: &BenchDataLayer) -> FrameworkConfiguration { + FrameworkConfiguration { + sample_size: match layer.index == 0 { + false => 10, + true => 100, + }, + + ..FrameworkConfiguration::default() + } + } + + fn bench_load(&self, text: &str) -> Duration; + + fn bench_single_edit<'a>(&self, text: &'a str, span: SiteSpan, edit: &'a str) -> Duration; + + fn bench_sequential_edits<'a>( + &self, + text: &'a str, + edits: Vec<(SiteSpan, &'a str)>, + ) -> Duration; +} + +pub struct FrameworkConfiguration { + pub sample_size: usize, + pub data_load: bool, + pub short_edits: bool, + pub long_edits: bool, + pub many_edits: bool, +} + +impl Default for FrameworkConfiguration { + fn default() -> Self { + Self { + data_load: true, + short_edits: true, + long_edits: true, + many_edits: true, + sample_size: 100, + } + } +} + +pub struct SelfCase { + name: &'static str, + syntax: PhantomData, +} + +impl FrameworkCase for SelfCase { + fn name(&self) -> &'static str { + self.name + } + + #[inline(never)] + fn bench_load(&self, text: &str) -> Duration { + let start = Instant::now(); + let result = Document::::from(text); + let time = start.elapsed(); + + black_box(result); + + time + } + + #[inline(never)] + fn bench_single_edit<'a>(&self, text: &'a str, span: SiteSpan, edit: &'a str) -> Duration { + let mut result = Document::::from(text); + + let start = Instant::now(); + result.write(span, edit); + let time = start.elapsed(); + + black_box(result); + + time + } + + #[inline(never)] + fn bench_sequential_edits<'a>( + &self, + text: &'a str, + edits: Vec<(SiteSpan, &'a str)>, + ) -> Duration { + let mut result = Document::::from(text); + + let mut total = Duration::ZERO; + + for (span, edit) in edits { + let start = Instant::now(); + result.write(span, edit); + let time = start.elapsed(); + + total += time; + } + + black_box(result); + + total + } +} + +impl SelfCase { + pub fn new(name: &'static str) -> Self { + Self { + name, + syntax: Default::default(), + } + } +} diff --git a/work/crates/examples/benches/frameworks/nom.rs b/work/crates/examples/benches/frameworks/nom.rs new file mode 100644 index 0000000..59456f1 --- /dev/null +++ b/work/crates/examples/benches/frameworks/nom.rs @@ -0,0 +1,235 @@ +/* +A part of this file is copied from +https://github.com/Geal/nom/blob/761ab0a24fccb4c560367b583b608fbae5f31647/benchmarks/benches/json.r +as is under the terms and conditions of the MIT License: + +Copyright (c) 2014-2019 Geoffroy Couprie + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +use std::{ + collections::HashMap, + time::{Duration, Instant}, +}; + +use criterion::black_box; +use lady_deirdre::lexis::SiteSpan; +use nom::{ + branch::alt, + bytes::complete::{tag, take}, + character::complete::{anychar, char, multispace0, none_of}, + combinator::{map, map_opt, map_res, value, verify}, + error::ParseError, + multi::{fold_many0, separated_list0}, + number::complete::double, + sequence::{delimited, preceded, separated_pair}, + IResult, + Parser, +}; + +use crate::{frameworks::FrameworkConfiguration, BenchDataLayer, FrameworkCase}; + +#[derive(Debug, PartialEq, Clone)] +pub enum JsonValue { + Null, + Bool(bool), + Str(String), + Num(f64), + Array(Vec), + Object(HashMap), +} + +fn boolean(input: &str) -> IResult<&str, bool> { + alt((value(false, tag("false")), value(true, tag("true"))))(input) +} + +fn u16_hex(input: &str) -> IResult<&str, u16> { + map_res(take(4usize), |s| u16::from_str_radix(s, 16))(input) +} + +fn unicode_escape(input: &str) -> IResult<&str, char> { + map_opt( + alt(( + // Not a surrogate + map(verify(u16_hex, |cp| !(0xD800..0xE000).contains(cp)), |cp| { + cp as u32 + }), + // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details + map( + verify( + separated_pair(u16_hex, tag("\\u"), u16_hex), + |(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low), + ), + |(high, low)| { + let high_ten = (high as u32) - 0xD800; + let low_ten = (low as u32) - 0xDC00; + (high_ten << 10) + low_ten + 0x10000 + }, + ), + )), + // Could probably be replaced with .unwrap() or _unchecked due to the verify checks + std::char::from_u32, + )(input) +} + +fn character(input: &str) -> IResult<&str, char> { + let (input, c) = none_of("\"")(input)?; + if c == '\\' { + alt(( + map_res(anychar, |c| { + Ok(match c { + '"' | '\\' | '/' => c, + 'b' => '\x08', + 'f' => '\x0C', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + _ => return Err(()), + }) + }), + preceded(char('u'), unicode_escape), + ))(input) + } else { + Ok((input, c)) + } +} + +fn string(input: &str) -> IResult<&str, String> { + delimited( + char('"'), + fold_many0(character, String::new, |mut string, c| { + string.push(c); + string + }), + char('"'), + )(input) +} + +fn ws<'a, O, E: ParseError<&'a str>, F: Parser<&'a str, O, E>>(f: F) -> impl Parser<&'a str, O, E> { + delimited(multispace0, f, multispace0) +} + +fn array(input: &str) -> IResult<&str, Vec> { + delimited( + char('['), + ws(separated_list0(ws(char(',')), json_value)), + char(']'), + )(input) +} + +fn object(input: &str) -> IResult<&str, HashMap> { + map( + delimited( + char('{'), + ws(separated_list0( + ws(char(',')), + separated_pair(string, ws(char(':')), json_value), + )), + char('}'), + ), + |key_values| key_values.into_iter().collect(), + )(input) +} + +fn json_value(input: &str) -> IResult<&str, JsonValue> { + use JsonValue::*; + + alt(( + value(Null, tag("null")), + map(boolean, Bool), + map(string, Str), + map(double, Num), + map(array, Array), + map(object, Object), + ))(input) +} + +fn json(input: &str) -> IResult<&str, JsonValue> { + ws(json_value).parse(input) +} + +pub struct NomCase(pub &'static str); + +impl FrameworkCase for NomCase { + fn name(&self) -> &'static str { + self.0 + } + + fn configuration(&self, layer: &BenchDataLayer) -> FrameworkConfiguration { + FrameworkConfiguration { + sample_size: match layer.index == 0 { + false => 10, + true => 100, + }, + + many_edits: layer.index == 0, + + ..FrameworkConfiguration::default() + } + } + + #[inline(never)] + fn bench_load(&self, text: &str) -> Duration { + let start = Instant::now(); + let result = json(text).unwrap(); + let time = start.elapsed(); + + black_box(result); + + time + } + + #[inline(never)] + fn bench_single_edit<'a>(&self, text: &'a str, span: SiteSpan, edit: &'a str) -> Duration { + let start = Instant::now(); + let text = format!("{}{}{}", &text[0..span.start], edit, &text[span.end..]); + let result = json(text.as_str()).unwrap(); + let time = start.elapsed(); + + black_box(result); + + time + } + + #[inline(never)] + fn bench_sequential_edits<'a>( + &self, + text: &'a str, + edits: Vec<(SiteSpan, &'a str)>, + ) -> Duration { + let mut text = text.to_string(); + + let mut total = Duration::ZERO; + + for (span, edit) in edits { + let start = Instant::now(); + text = format!("{}{}{}", &text[0..span.start], edit, &text[span.end..]); + let result = json(text.as_str()).unwrap(); + let time = start.elapsed(); + + total += time; + + black_box(result); + } + + total + } +} diff --git a/work/crates/examples/benches/frameworks/ropey.rs b/work/crates/examples/benches/frameworks/ropey.rs new file mode 100644 index 0000000..25b3d4a --- /dev/null +++ b/work/crates/examples/benches/frameworks/ropey.rs @@ -0,0 +1,101 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::time::{Duration, Instant}; + +use criterion::black_box; +use lady_deirdre::lexis::SiteSpan; +use ropey::Rope; + +use crate::FrameworkCase; + +pub struct RopeyCase(pub &'static str); + +impl FrameworkCase for RopeyCase { + fn name(&self) -> &'static str { + self.0 + } + + #[inline(never)] + fn bench_load(&self, text: &str) -> Duration { + let start = Instant::now(); + let result = Rope::from_str(text); + let time = start.elapsed(); + + black_box(result); + + time + } + + #[inline(never)] + fn bench_single_edit<'a>(&self, text: &'a str, span: SiteSpan, edit: &'a str) -> Duration { + let mut result = Rope::from_str(text); + + let start = Instant::now(); + result.remove(span.clone()); + result.insert(span.start, edit); + let time = start.elapsed(); + + black_box(result); + + time + } + + #[inline(never)] + fn bench_sequential_edits<'a>( + &self, + text: &'a str, + edits: Vec<(SiteSpan, &'a str)>, + ) -> Duration { + let mut result = Rope::from_str(text); + + let mut total = Duration::ZERO; + + for (span, edit) in edits { + let start = Instant::now(); + result.remove(span.clone()); + result.insert(span.start, edit); + let time = start.elapsed(); + + total += time; + } + + black_box(result); + + total + } +} diff --git a/work/crates/examples/benches/frameworks/treesitter.rs b/work/crates/examples/benches/frameworks/treesitter.rs new file mode 100644 index 0000000..2316d52 --- /dev/null +++ b/work/crates/examples/benches/frameworks/treesitter.rs @@ -0,0 +1,169 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::time::{Duration, Instant}; + +use criterion::black_box; +use lady_deirdre::lexis::SiteSpan; +use tree_sitter::Point; + +use crate::FrameworkCase; + +fn find_position(text: &str, mut byte: usize) -> Point { + let mut row = 0; + let mut column = 0; + + for char in text.chars() { + if byte == 0 { + break; + } + + match char { + '\n' => { + column = 0; + row += 1; + } + + _ => { + column += 1; + } + } + + byte -= 1; + } + + Point { row, column } +} + +pub struct TreeSitterCase(pub &'static str); + +impl FrameworkCase for TreeSitterCase { + fn name(&self) -> &'static str { + self.0 + } + + #[inline(never)] + fn bench_load(&self, text: &str) -> Duration { + let mut parser = tree_sitter::Parser::new(); + parser.set_language(tree_sitter_json::language()).unwrap(); + + let start = Instant::now(); + let result = parser.parse(text, None).unwrap(); + let time = start.elapsed(); + + black_box(result); + black_box(parser); + + time + } + + #[inline(never)] + fn bench_single_edit<'a>(&self, text: &'a str, span: SiteSpan, edit: &'a str) -> Duration { + let mut parser = tree_sitter::Parser::new(); + parser.set_language(tree_sitter_json::language()).unwrap(); + + let mut result = parser.parse(text, None).unwrap(); + + let start_position = find_position(text, span.start); + let old_end_position = find_position(text, span.end); + let new_end_position = find_position( + &format!("{}{}{}", &text[0..span.start], edit, &text[span.end..]), + span.start + edit.len(), + ); + + let start = Instant::now(); + result.edit(&tree_sitter::InputEdit { + start_byte: span.start, + old_end_byte: span.end, + new_end_byte: span.start + edit.len(), + start_position, + old_end_position, + new_end_position, + }); + result = parser.parse(edit, Some(&result)).unwrap(); + let time = start.elapsed(); + + black_box(result); + black_box(parser); + + time + } + + #[inline(never)] + fn bench_sequential_edits<'a>( + &self, + text: &'a str, + edits: Vec<(SiteSpan, &'a str)>, + ) -> Duration { + let mut parser = tree_sitter::Parser::new(); + parser.set_language(tree_sitter_json::language()).unwrap(); + + let mut text = text.to_string(); + let mut result = parser.parse(text.as_str(), None).unwrap(); + + let mut total = Duration::ZERO; + + for (span, edit) in edits { + let new_text = format!("{}{}{}", &text[0..span.start], edit, &text[span.end..]); + + let start_position = find_position(text.as_str(), span.start); + let old_end_position = find_position(text.as_str(), span.end); + let new_end_position = find_position(new_text.as_str(), span.start + edit.len()); + + let start = Instant::now(); + result.edit(&tree_sitter::InputEdit { + start_byte: span.start, + old_end_byte: span.end, + new_end_byte: span.start + edit.len(), + start_position, + old_end_position, + new_end_position, + }); + result = parser.parse(edit, Some(&result)).unwrap(); + let time = start.elapsed(); + + total += time; + + text = new_text; + } + + black_box(result); + black_box(parser); + + total + } +} diff --git a/work/crates/examples/benches/layer.rs b/work/crates/examples/benches/layer.rs new file mode 100644 index 0000000..bc4fcf1 --- /dev/null +++ b/work/crates/examples/benches/layer.rs @@ -0,0 +1,279 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use std::{ops::Range, time::Duration}; + +use criterion::{BenchmarkId, Criterion}; +use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng}; + +use crate::{BenchData, FrameworkCase, SourceSample}; + +const SEED: u64 = 154656; +static LAYERS: [Range; 3] = [1500..2000, 60000..80000, 160000..200000]; +static SHORT_EDITS: usize = 1000; +static LONG_EDITS: usize = 100; + +pub struct BenchDataLayer { + pub index: usize, + pub load: BenchData, + pub short_edits: BenchData, + pub long_edits: BenchData, + pub many_edits: BenchData, +} + +impl BenchDataLayer { + pub fn new() -> Vec { + println!("Preparing test data..."); + + let mut random = StdRng::seed_from_u64(SEED); + let mut layers = Vec::new(); + + let mut short_edits = Vec::with_capacity(50); + + for _ in 0..SHORT_EDITS { + short_edits.push(SourceSample::gen_short(&mut random, 50)); + } + + println!("Short edits ready."); + + let mut long_edits = Vec::with_capacity(LONG_EDITS); + + for _ in 0..LONG_EDITS { + long_edits.push(SourceSample::gen_long(&mut random, 10..100)); + } + + println!("Long edits ready."); + + let mut many_edits = Vec::with_capacity(short_edits.len() + long_edits.len()); + many_edits.append( + &mut short_edits + .iter() + .map(|edit| (1, edit)) + .collect::>() + .clone(), + ); + many_edits.append( + &mut long_edits + .iter() + .map(|edit| (2, edit)) + .collect::>() + .clone(), + ); + many_edits.shuffle(&mut random); + + println!("Sequential edits ready."); + + for (index, layer) in LAYERS.iter().enumerate() { + let load_data = BenchData::new(SourceSample::gen_init(&mut random, layer.clone())); + + let mut short_edits_data = load_data.clone(); + + for edit in &short_edits { + short_edits_data.edit_short(&mut random, edit.clone()); + short_edits_data.reset(); + } + + let mut long_edits_data = load_data.clone(); + + for edit in &long_edits { + long_edits_data.edit_long(&mut random, edit.clone()); + long_edits_data.reset(); + } + + let mut many_edits_data = load_data.clone(); + + for (kind, edit) in many_edits.clone() { + match kind { + 1 => many_edits_data.edit_short(&mut random, edit.clone()), + 2 => many_edits_data.edit_long(&mut random, edit.clone()), + _ => unreachable!(), + } + } + + println!("Layer {} complete.", load_data.describe_init()); + + layers.push(BenchDataLayer { + index, + load: load_data, + short_edits: short_edits_data, + long_edits: long_edits_data, + many_edits: many_edits_data, + }); + } + + println!("Verifying test data..."); + + for layer in &layers { + // layer.load.verify_independent(); + // println!("Layer {} load data OK.", layer.load.describe_init()); + + // layer.short_edits.verify_independent(); + // println!("Layer {} short edits data OK.", layer.load.describe_init()); + + // layer.long_edits.verify_independent(); + // println!("Layer {} long edits data OK.", layer.load.describe_init()); + + layer.many_edits.verify_sequential(); + println!("Layer {} many edits data OK.", layer.load.describe_init(),); + } + + println!("Test data ready."); + + layers + } + + pub fn run(&self, criterion: &mut Criterion, frameworks: &[Box]) { + let mut group = criterion.benchmark_group(self.load.describe_init()); + + for framework in frameworks { + let configuration = framework.configuration(self); + + group.sample_size(configuration.sample_size); + + if configuration.data_load { + group.bench_with_input( + BenchmarkId::new("Data Load", framework.name()), + &self.load, + |bencher, sample| { + bencher.iter_custom(|iterations| { + let mut total = Duration::ZERO; + + for _ in 0..iterations { + total += framework.bench_load(&sample.init.source); + } + + total + }) + }, + ); + } + + if configuration.short_edits { + group.bench_with_input( + BenchmarkId::new( + format!("Short edits {}", self.short_edits.describe_average_edit()), + framework.name(), + ), + &self.short_edits, + |bencher, sample| { + let init = sample.init.source.as_str(); + let mut random = StdRng::seed_from_u64(SEED); + + bencher.iter_custom(|iterations| { + let mut total = Duration::ZERO; + + for _ in 0..iterations { + let step = &sample.steps[random.gen_range(0..sample.steps.len())]; + + total += framework.bench_single_edit( + init, + step.span.clone(), + step.source.as_str(), + ); + } + + total + }) + }, + ); + } + + if configuration.long_edits { + group.bench_with_input( + BenchmarkId::new( + format!("Long edits {}", self.long_edits.describe_average_edit()), + framework.name(), + ), + &self.long_edits, + |bencher, sample| { + let init = sample.init.source.as_str(); + let mut random = StdRng::seed_from_u64(SEED); + + bencher.iter_custom(|iterations| { + let mut total = Duration::ZERO; + + for _ in 0..iterations { + let step = + sample.steps[random.gen_range(0..sample.steps.len())].clone(); + + total += framework.bench_single_edit( + init, + step.span.clone(), + step.source.as_str(), + ); + } + + total + }) + }, + ); + } + + if configuration.many_edits { + group.bench_with_input( + BenchmarkId::new( + format!("Many edits {}", self.many_edits.describe_total_edits(),), + framework.name(), + ), + &self.many_edits, + |bencher, sample| { + let init = sample.init.source.as_str(); + + bencher.iter_custom(|iterations| { + let mut total = Duration::ZERO; + + for _ in 0..iterations { + total += framework.bench_sequential_edits( + init, + sample + .steps + .iter() + .map(|sample| (sample.span.clone(), sample.source.as_str())) + .collect::>(), + ); + } + + total + }) + }, + ); + } + } + + group.finish(); + } +} diff --git a/work/crates/examples/benches/main.rs b/work/crates/examples/benches/main.rs new file mode 100644 index 0000000..197648e --- /dev/null +++ b/work/crates/examples/benches/main.rs @@ -0,0 +1,81 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +//TODO check warnings regularly +#![allow(warnings)] + +mod data; +mod frameworks; +mod layer; + +use criterion::Criterion; +use lady_deirdre::syntax::NoSyntax; +use lady_deirdre_examples::json::{lexis::JsonToken, syntax::JsonNode}; + +use crate::{ + data::{BenchData, SourceSample}, + frameworks::{ + nom::NomCase, + ropey::RopeyCase, + treesitter::TreeSitterCase, + FrameworkCase, + SelfCase, + }, + layer::BenchDataLayer, +}; + +fn main() { + let layers = BenchDataLayer::new(); + + print!("{esc}[2J{esc}[1;1H", esc = 27 as char); + + let frameworks = Vec::from([ + Box::new(SelfCase::>::new("Self Lexis")) as Box, + Box::new(SelfCase::::new("Self Syntax")) as Box, + Box::new(RopeyCase("Ropey")) as Box, + Box::new(NomCase("Nom")) as Box, + Box::new(TreeSitterCase("Tree Sitter")) as Box, + ]); + + let mut criterion = Criterion::default().configure_from_args(); + + for layer in layers { + layer.run(&mut criterion, &frameworks); + } + + criterion.final_summary(); +} diff --git a/work/crates/examples/readme.md b/work/crates/examples/readme.md new file mode 100644 index 0000000..fcafcb0 --- /dev/null +++ b/work/crates/examples/readme.md @@ -0,0 +1,117 @@ +# Lady Deirdre Examples, Benchmarks, Integration Test. + +This subproject of the Lady Deirdre technology contains a simple example of the +[Json incremental parser](https://github.com/Eliah-Lakhin/lady-deirdre/tree/master/work/crates/examples/src/json), +[performance Benchmarks](#benchmarks), and +[Integration Tests](https://github.com/Eliah-Lakhin/lady-deirdre/tree/master/work/crates/examples/tests). + +## Benchmarks. + +### Setup. + +The code of the Benchmark tests is under the +[benches](https://github.com/Eliah-Lakhin/lady-deirdre/tree/master/work/crates/examples/benches) +directory. + +These tests generate a set of random +[JSON](https://en.wikipedia.org/wiki/JSON) files of different sizes and nesting +complexity, and series of random edits(insertions, deletions and replacements) +of also different sizes and nesting complexity for each JSON files. + +The JSON files and each edit results are always valid JSONs, and the validity +is verified automatically beforehand. + +The Benchmarks test computational performance on each series of edits comparing +three well-known Rust frameworks of different functional capabilities, and the +two different instances of Lady Deirdre: + - [Nom](https://crates.io/crates/nom). A parsers combinator library. This + combinator library is widely recognized as one of the best in performance + for non-incremental parsing, but Nom does not have any incremental reparsing + capabilities. + - [Tree-Sitter](https://crates.io/crates/tree-sitter). An incremental parsers + generator tool. This library is one of the most widely recognizable solution + for incremental parsing. + - [Ropey](https://crates.io/crates/ropey). A mutable text storage library. + This library does not provide any syntax or lexis parsing capabilities, but + Ropey has mutable text storage functionality similar to some Lady Deirdre + functions. + - "Self Syntax" is an instance of the Json syntax and lexis incremental parser + that uses Lady Deirdre under the hood. + - "Self Lexis" is an instance of the Json lexis only incremental parser + that uses Lady Deirdre under the hood. + +There are three series of tests on three independent JSON files of different +sizes: + - ~10Mb file (10791089 bytes, 178508 lines). + - ~4Mb file (4346095 bytes, 72072 lines). + - ~82 Kb. (84638 bytes, 1957 lines). + +For each file the benchmarks test initial loading time, independent edits time, +and the series of edits(1100 total edits) applied sequentially. + +The series of edits is the most interesting performance indicator, because it +shows actual live editing of the text that in some way mimics end-user +sequential edit actions. + +I used my mobile Intel NUC machine to perform benchmark tests: +Intel Core i7-8809G CPU @ 3.10GHz × 8, 16Mb RAM. + +### Results. + +You can find complete Criterion report +[here](https://6314c0d3ffd9447cb096168e--cheerful-malasada-35b65a.netlify.app/report/). + +1. **Incremental Reparsing.** + + Lady Deirdre shows almost the same performance on the small file(82Kb) + sequential edits as Tree-Sitter does: 12.1ms vs 11.25ms. + + But Lady Deirdre demonstrates significantly better results than Tree-Sitter + on medium(4Mb) and large(10Mb) files: 18ms vs 58ms and 39.1ms vs 124.5ms + accordingly. + +2. **Non-Incremental Parsing.** + + Nom works significantly better than Tree-Sitter and Lady Deirdre for initial + parsing. For the small file(82Kb) Nom has parsed the file in 0.87ms, + Lady Deirdre in 2.48ms, and Tree-Sitter in 5.91ms. + + For the larger file(10Mb) Nom's results are comparable too: + 87.25ms(Nom) vs 304ms(Lady Deirdre) vs 624ms(Tree-Sitter). + + Even though non-incremental parser combinator Nom shows significantly + better results that the incremental parsers, Lady Deirdre works up to 2 times + faster in these tests than Tree-Sitter does. + + For non-incremental series of complete reparsing of the small JSON file Nom + demonstrates expected performance degradation comparing to Lady Deirdre and + Tree-Sitter both: ~2155ms for 1100 edits complete reparsing. + +3. **Text Mutations.** + + Ropey demonstrates significantly better results on all text edit tests + than Tree-Sitter and Lady Deirdre both (these results not applicable to Nom). + To compare, on large JSON file(10Mb) a series of edits took + 1.26ms(Ropey) vs 11.1ms(Lady Deirdre JSON lexis only parser). + + For the fair comparison I would have to opt-out Lady Deirder's lexis parser + in these tests, but this is currently not possible. + +### Conclusion. + +Lady Deirdre demonstrates better performance than Tree-Sitter on initial data +loading on all tests, comparable performance of incremental reparsing on small +files, and better performance on incremental reparsing on medium to large files. + +These results allow me to conclude that in certain applications Lady Deirdre +could be a competitive replacement to Tree-Sitter, a widely used +production-ready incremental parsing solution. However, it is worth to mention +that both solutions have different and sometimes incomparable functional +capabilities, and the different goals. Moreover, the tests performed in these +Benchmarks were applied on merely artificial snippets and relatively simple Json +syntax. + +For non-incremental parsing Nom and the solutions of the same class are +beneficial in performance for the application developers, however both +Tree-Sitter and Lady Deirdre are still applicable solutions for this type +of parsing too. diff --git a/work/crates/examples/src/json/formatter.rs b/work/crates/examples/src/json/formatter.rs new file mode 100644 index 0000000..8c3c8b6 --- /dev/null +++ b/work/crates/examples/src/json/formatter.rs @@ -0,0 +1,108 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use lady_deirdre::{ + lexis::SourceCode, + syntax::{ParseContext, TransduceRef, Transducer}, +}; + +use crate::json::{lexis::JsonToken, syntax::JsonNode}; + +pub struct JsonFormatter; + +impl> Transducer for JsonFormatter { + fn map(&mut self, context: &mut ParseContext) -> String { + match context.node() { + JsonNode::Root { object } => object + .get(context) + .map(|string| string.as_str()) + .unwrap_or("?") + .to_string(), + + JsonNode::Object { entries } => { + format!( + "{{{}}}", + entries + .into_iter() + .map(|node_ref| node_ref + .get(context) + .map(|string| string.as_str()) + .unwrap_or("?") + .to_string()) + .collect::>() + .join(", "), + ) + } + + JsonNode::Entry { key, value } => { + format!( + "{:#}: {}", + key.string(context).unwrap_or("?"), + value + .get(context) + .map(|string| string.as_str()) + .unwrap_or("?"), + ) + } + + JsonNode::Array { items } => { + format!( + "[{}]", + items + .into_iter() + .map(|node_ref| node_ref + .get(context) + .map(|string| string.as_str()) + .unwrap_or("?") + .to_string()) + .collect::>() + .join(", "), + ) + } + + JsonNode::String { value } | JsonNode::Number { value } => { + value.string(context).unwrap_or("?").to_string() + } + + JsonNode::True => String::from("true"), + + JsonNode::False => String::from("false"), + + JsonNode::Null => String::from("null"), + } + } +} diff --git a/work/crates/examples/src/json/lexis.rs b/work/crates/examples/src/json/lexis.rs new file mode 100644 index 0000000..28f7f91 --- /dev/null +++ b/work/crates/examples/src/json/lexis.rs @@ -0,0 +1,87 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use lady_deirdre::lexis::Token; + +#[derive(Token, Debug, Clone, Copy)] +#[define(DEC = ['0'..'9'])] +#[define(HEX = DEC | ['A'..'F'])] +#[define(POSITIVE = ['1'..'9'] & DEC*)] +#[define(ESCAPE = '\\' & ( + ['"', '\\', '/', 'b', 'f', 'n', 'r', 't'] + | ('u' & HEX & HEX & HEX & HEX) +))] +pub enum JsonToken { + #[rule("true")] + True, + + #[rule("false")] + False, + + #[rule("null")] + Null, + + #[rule('{')] + BraceOpen, + + #[rule('}')] + BraceClose, + + #[rule('[')] + BracketOpen, + + #[rule(']')] + BracketClose, + + #[rule(',')] + Comma, + + #[rule(':')] + Colon, + + #[rule('"' & (ESCAPE | ^['"', '\\'])* & '"')] + String, + + #[rule('-'? & ('0' | POSITIVE) & ('.' & DEC+)? & (['e', 'E'] & ['-', '+']? & DEC+)?)] + Number, + + #[rule([' ', '\t', '\n', '\x0c', '\r']+)] + Whitespace, + + #[mismatch] + Mismatch, +} diff --git a/work/crates/examples/src/json/mod.rs b/work/crates/examples/src/json/mod.rs new file mode 100644 index 0000000..4266a2c --- /dev/null +++ b/work/crates/examples/src/json/mod.rs @@ -0,0 +1,40 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +pub mod formatter; +pub mod lexis; +pub mod syntax; diff --git a/work/crates/examples/src/json/syntax.rs b/work/crates/examples/src/json/syntax.rs new file mode 100644 index 0000000..f7cdd02 --- /dev/null +++ b/work/crates/examples/src/json/syntax.rs @@ -0,0 +1,80 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use lady_deirdre::{ + lexis::TokenRef, + syntax::{Node, NodeRef, SyntaxError}, +}; + +use crate::json::lexis::JsonToken; + +#[derive(Node, Clone)] +#[token(JsonToken)] +#[error(SyntaxError)] +#[skip($Whitespace)] +#[define(ANY = Object | Array | True | False | String | Number | Null)] +pub enum JsonNode { + #[root] + #[rule(object: Object)] + Root { object: NodeRef }, + + #[rule($BraceOpen & (entries: Entry)*{$Comma} & $BraceClose)] + #[synchronization] + Object { entries: Vec }, + + #[rule(key: $String & $Colon & value: ANY)] + Entry { key: TokenRef, value: NodeRef }, + + #[rule($BracketOpen & (items: ANY)*{$Comma} & $BracketClose)] + #[synchronization] + Array { items: Vec }, + + #[rule(value: $String)] + String { value: TokenRef }, + + #[rule(value: $Number)] + Number { value: TokenRef }, + + #[rule($True)] + True, + + #[rule($False)] + False, + + #[rule($Null)] + Null, +} diff --git a/work/crates/examples/src/lib.rs b/work/crates/examples/src/lib.rs new file mode 100644 index 0000000..f960edd --- /dev/null +++ b/work/crates/examples/src/lib.rs @@ -0,0 +1,41 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +//TODO check warnings regularly +#![allow(warnings)] + +pub mod json; diff --git a/work/crates/examples/tests/balance.rs b/work/crates/examples/tests/balance.rs new file mode 100644 index 0000000..45c64d3 --- /dev/null +++ b/work/crates/examples/tests/balance.rs @@ -0,0 +1,278 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +//TODO check warnings regularly +#![allow(warnings)] + +use lady_deirdre::{ + lexis::{CodeContent, SimpleToken, ToSpan}, + syntax::{Node, NodeRef, SyntaxError, SyntaxTree}, + Document, +}; + +#[test] +fn test_balance() { + static mut VERSION: usize = 0; + + #[derive(Node, Clone, Debug, PartialEq, Eq)] + #[token(SimpleToken)] + #[error(SyntaxError)] + #[skip($Number | $Symbol | $Identifier | $String | $Char | $Whitespace | $Mismatch)] + #[define(ANY = Parenthesis | Brackets | Braces)] + enum DebugNode { + #[root] + #[rule(inner: ANY*)] + Root { + #[default(unsafe { VERSION })] + version: usize, + inner: Vec, + }, + + #[rule($ParenOpen & inner: ANY* & $ParenClose)] + #[synchronization] + Parenthesis { + #[default(unsafe { VERSION })] + version: usize, + inner: Vec, + }, + + #[rule($BracketOpen & inner: ANY* & $BracketClose)] + #[synchronization] + Brackets { + #[default(unsafe { VERSION })] + version: usize, + inner: Vec, + }, + + #[rule($BraceOpen & inner: ANY* & $BraceClose)] + #[synchronization] + Braces { + #[default(unsafe { VERSION })] + version: usize, + inner: Vec, + }, + } + + trait DebugPrint { + fn debug_print(&self) -> String; + + fn debug_errors(&self) -> String; + } + + impl DebugPrint for Document { + fn debug_print(&self) -> String { + fn traverse(document: &Document, node_ref: &NodeRef) -> String { + let node = match node_ref.deref(document) { + None => return format!("?"), + Some(node) => node, + }; + + let errors = match document.get_cluster(&node_ref.cluster_ref) { + None => 0, + + Some(cluster) => (&cluster.errors).into_iter().count(), + }; + + match node { + DebugNode::Root { version, inner } => { + format!( + "{}:{}<{}>", + version, + errors, + inner + .iter() + .map(|node_ref| traverse(document, node_ref)) + .collect::>() + .join("") + ) + } + + DebugNode::Parenthesis { version, inner } => { + format!( + "{}:{}({})", + version, + errors, + inner + .iter() + .map(|node_ref| traverse(document, node_ref)) + .collect::>() + .join("") + ) + } + + DebugNode::Brackets { version, inner } => { + format!( + "{}:{}[{}]", + version, + errors, + inner + .iter() + .map(|node_ref| traverse(document, node_ref)) + .collect::>() + .join("") + ) + } + + DebugNode::Braces { version, inner } => { + format!( + "{}:{}{{{}}}", + version, + errors, + inner + .iter() + .map(|node_ref| traverse(document, node_ref)) + .collect::>() + .join("") + ) + } + } + } + + traverse(self, self.root()) + } + + fn debug_errors(&self) -> String { + self.errors() + .map(|error| format!("{}: {}", error.span().format(self), error)) + .collect::>() + .join("\n") + } + } + + let mut document = Document::::from("foo bar baz"); + + assert_eq!(document.debug_print(), "0:0<>"); + assert_eq!(document.substring(..), "foo bar baz"); + assert_eq!(document.debug_errors(), ""); + + unsafe { VERSION = 1 }; + + document.write(0..0, "("); + assert_eq!(document.debug_print(), "1:1<1:1()>"); + assert_eq!(document.substring(..), "(foo bar baz"); + assert_eq!(document.debug_errors(), "[1:13]: Parenthesis format mismatch. Expected Braces, Brackets, Parenthesis, or $ParenClose."); + + unsafe { VERSION = 2 }; + + document.write(1..1, "[{"); + assert_eq!(document.debug_print(), "2:1<2:1(2:1[2:1{}])>"); + assert_eq!(document.substring(..), "([{foo bar baz"); + assert_eq!( + document.debug_errors(), + r#"[1:15]: Parenthesis format mismatch. Expected Braces, Brackets, Parenthesis, or $ParenClose. +[1:15]: Brackets format mismatch. Expected Braces, Brackets, Parenthesis, or $BracketClose. +[1:15]: Braces format mismatch. Expected Braces, Brackets, Parenthesis, or $BraceClose."# + ); + + unsafe { VERSION = 3 }; + + document.write(6..6, ")"); + assert_eq!(document.debug_print(), "3:0<3:0(3:1[3:1{}])>"); + assert_eq!(document.substring(..), "([{foo) bar baz"); + assert_eq!( + document.debug_errors(), + r#"[1:7]: Brackets format mismatch. Expected Braces, Brackets, Parenthesis, or $BracketClose. +[1:7]: Braces format mismatch. Expected Braces, Brackets, Parenthesis, or $BraceClose."# + ); + + unsafe { VERSION = 4 }; + + document.write(6..7, ""); + assert_eq!(document.debug_print(), "4:1<4:1(4:1[4:1{}])>"); + assert_eq!(document.substring(..), "([{foo bar baz"); + assert_eq!( + document.debug_errors(), + r#"[1:15]: Parenthesis format mismatch. Expected Braces, Brackets, Parenthesis, or $ParenClose. +[1:15]: Brackets format mismatch. Expected Braces, Brackets, Parenthesis, or $BracketClose. +[1:15]: Braces format mismatch. Expected Braces, Brackets, Parenthesis, or $BraceClose."# + ); + + unsafe { VERSION = 5 }; + document.write(6..6, "}()[]"); + unsafe { VERSION = 6 }; + document.write(15..15, "]"); + unsafe { VERSION = 7 }; + + document.write(20..20, ")"); + assert_eq!(document.debug_print(), "7:0<7:0(6:0[5:0{}5:0()5:0[]])>"); + assert_eq!(document.substring(..), "([{foo}()[] bar] baz)"); + assert_eq!(document.debug_errors(), r#""#); + + unsafe { VERSION = 8 }; + + document.write(7..8, ""); + assert_eq!(document.debug_print(), "7:0<7:0(8:1[8:0{}5:0[]])>"); + assert_eq!(document.substring(..), "([{foo})[] bar] baz)"); + assert_eq!( + document.debug_errors(), + r#"[1:8]: Brackets format mismatch. Expected Braces, Brackets, Parenthesis, or $BracketClose."# + ); + + unsafe { VERSION = 9 }; + + document.write(12..12, "X"); + assert_eq!(document.debug_print(), "7:0<7:0(9:1[8:0{}5:0[]])>"); + assert_eq!(document.substring(..), "([{foo})[] bXar] baz)"); + assert_eq!( + document.debug_errors(), + r#"[1:8]: Brackets format mismatch. Expected Braces, Brackets, Parenthesis, or $BracketClose."# + ); + + unsafe { VERSION = 10 }; + + document.write(2..2, "("); + assert_eq!(document.debug_print(), "7:0<7:0(10:0[10:0(8:0{})5:0[]])>"); + assert_eq!(document.substring(..), "([({foo})[] bXar] baz)"); + assert_eq!(document.debug_errors(), r#""#); + + unsafe { VERSION = 11 }; + + document.write(7..8, ""); + assert_eq!(document.debug_print(), "7:0<7:0(10:0[10:0(11:1{})5:0[]])>"); + assert_eq!(document.substring(..), "([({foo)[] bXar] baz)"); + assert_eq!( + document.debug_errors(), + r#"[1:8]: Braces format mismatch. Expected Braces, Brackets, Parenthesis, or $BraceClose."# + ); + + unsafe { VERSION = 12 }; + + document.write(7..7, "}"); + assert_eq!(document.debug_print(), "7:0<7:0(10:0[10:0(12:0{})5:0[]])>"); + assert_eq!(document.substring(..), "([({foo})[] bXar] baz)"); + assert_eq!(document.debug_errors(), r#""#); +} diff --git a/work/crates/examples/tests/document.rs b/work/crates/examples/tests/document.rs new file mode 100644 index 0000000..4d9dcab --- /dev/null +++ b/work/crates/examples/tests/document.rs @@ -0,0 +1,573 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +//TODO check warnings regularly +#![allow(warnings)] + +use std::fmt::{Debug, Display, Formatter}; + +use lady_deirdre::{ + lexis::{CodeContent, LexisSession, SourceCode, Token, TokenCursor}, + syntax::{NoSyntax, SimpleNode}, + Document, +}; + +#[test] +fn test_document_lexis() { + #[derive(Clone, Debug)] + pub enum CustomToken { + A, + B, + C, + F, + } + + impl Token for CustomToken { + #[inline] + fn new(session: &mut impl LexisSession) -> Self { + let mut kind = 0; + + loop { + let current = session.character(); + session.advance(); + + if current == '\0' { + break; + } + + let next = session.character(); + + match (kind, current, next) { + // Token A + (0, '1', '1') => { + kind = 1; + } + + (0, '1', _) => { + kind = 1; + session.submit(); + break; + } + + (1, '1', '1') => (), + + (1, '1', _) => { + session.submit(); + break; + } + + // Token B + (0, '2', '2') => { + kind = 2; + } + + (0, '2', _) => { + kind = 2; + session.submit(); + break; + } + + (2, '2', '2') => (), + + (2, '2', _) => { + session.submit(); + break; + } + + // Token C + (0, '3', '3') => { + kind = 2; + } + + (0, '3', _) => { + kind = 2; + session.submit(); + break; + } + + (2, '3', '3') => (), + + (2, '3', _) => { + session.submit(); + break; + } + + _ => break, + } + } + + match kind { + 0 => CustomToken::F, + 1 => CustomToken::A, + 2 => CustomToken::B, + 3 => CustomToken::C, + _ => unreachable!(), + } + } + } + + impl Display for CustomToken { + fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result { + Debug::fmt(self, formatter) + } + } + + let mut document = Document::>::default(); + + document.write(.., "111222111"); + + assert_eq!(document.length(), 9); + assert_eq!( + "111|222|111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|3|6", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "111|222", + document + .chunks(0..5) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "111|222|111", + document + .chunks(3..6) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "222", + document + .chunks(4..4) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + + document.write(0..0, "1"); + + assert_eq!( + "1111|222|111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|4|7", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(4..4, "1"); + + assert_eq!( + "11111|222|111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|5|8", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(5..5, "2"); + + assert_eq!( + "11111|2222|111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|5|9", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(5..5, "$"); + + assert_eq!( + "11111|$|2222|111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|F|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|5|6|10", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(5..5, "1"); + + assert_eq!( + "111111|$|2222|111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|F|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|6|7|11", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(7..7, "@"); + + assert_eq!( + "111111|$@|2222|111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|F|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|6|8|12", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(1..5, "2"); + + assert_eq!( + "1|2|1|$@|2222|111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|B|A|F|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|1|2|3|5|9", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(2..5, ""); + + assert_eq!( + "1|22222|111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|1|6", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(100..100, "11"); + + assert_eq!( + "1|22222|11111", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|1|6", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(8..11, ""); + + assert_eq!( + "1|22222|11", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|B|A", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|1|6", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); + + document.write(8..8, "2"); + + assert_eq!( + "1|22222|11|2", + document + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "A|B|A|B", + document + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|"), + ); + assert_eq!( + "0|1|6|8", + document + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|"), + ); +} + +#[test] +fn test_document_write() { + let mut document = Document::::default(); + + assert_eq!(document.substring(..), ""); + + document.write(.., "foo bar"); + + assert_eq!(document.substring(..), "foo bar"); + + document.write(.., "foo Xbar"); + + assert_eq!(document.substring(..), "foo Xbar"); + + document.write(0..0, "123 "); + + assert_eq!(document.substring(..), "123 foo Xbar"); + + document.write(100.., "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"); + + assert_eq!( + document.substring(..), + "123 foo Xbar1 2 3 4 5 6 7 8 9 10 11 12 13 14 15", + ); + + assert_eq!(document.length(), 47); + assert_eq!(document.token_count(), 33); + assert_eq!(document.cursor(..).string(0).unwrap(), "123"); + assert_eq!(document.cursor(..).string(1).unwrap(), " "); + assert_eq!(document.cursor(..).string(2).unwrap(), "foo"); + assert_eq!(document.cursor(..).string(3).unwrap(), " "); + assert_eq!(document.cursor(..).string(4).unwrap(), "Xbar1"); + assert_eq!(document.cursor(..).string(5).unwrap(), " "); + assert_eq!(document.cursor(..).string(6).unwrap(), "2"); + + document.write(6..10, ""); + + assert_eq!( + document.substring(..), + "123 foar1 2 3 4 5 6 7 8 9 10 11 12 13 14 15", + ); + + document.write(9..10, ""); + + assert_eq!( + document.substring(..), + "123 foar12 3 4 5 6 7 8 9 10 11 12 13 14 15", + ); + + assert_eq!(document.length(), 42); + assert_eq!(document.token_count(), 29); + assert_eq!(document.cursor(..).string(0).unwrap(), "123"); + assert_eq!(document.cursor(..).string(1).unwrap(), " "); + assert_eq!(document.cursor(..).string(2).unwrap(), "foar12"); + assert_eq!(document.cursor(..).string(3).unwrap(), " "); + assert_eq!(document.cursor(..).string(4).unwrap(), "3"); + assert_eq!(document.cursor(..).string(5).unwrap(), " "); + assert_eq!(document.cursor(..).string(6).unwrap(), "4"); + assert_eq!(document.cursor(6..7).string(0).unwrap(), "foar12"); + assert!(document.cursor(6..7).string(1).is_none()); + + document.write(4..36, ""); + + assert_eq!(document.length(), 10); + assert_eq!(document.token_count(), 5); + assert_eq!(document.substring(..), "123 14 15"); + assert_eq!(document.cursor(..).string(0).unwrap(), "123"); + assert_eq!(document.cursor(..).string(1).unwrap(), " "); + assert_eq!(document.cursor(..).string(2).unwrap(), "14"); + assert_eq!(document.cursor(..).string(3).unwrap(), " "); + assert_eq!(document.cursor(..).string(4).unwrap(), "15"); + assert!(document.cursor(..).string(5).is_none()); +} diff --git a/work/crates/examples/tests/iteration.rs b/work/crates/examples/tests/iteration.rs new file mode 100644 index 0000000..a2a82b0 --- /dev/null +++ b/work/crates/examples/tests/iteration.rs @@ -0,0 +1,155 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use lady_deirdre::{ + lexis::{CodeContent, SimpleToken, SourceCode, ToSite, TokenBuffer, TokenCursor}, + syntax::NoSyntax, + Document, +}; + +#[test] +fn test_chunk_iterator() { + tests(TokenBuffer::::from( + "public foo() { x = 100 + 2.0 - 'a'[\"bar\"]; }", + )); + tests(Document::>::from( + "public foo() { x = 100 + 2.0 - 'a'[\"bar\"]; }", + )); + + fn tests(code: impl SourceCode) { + assert_eq!( + "public| |foo|(|)| |{| |x| |=| |100| |+| |2.0| |-| |'a'|[|\"bar\"|]|;| |}", + code.chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + + assert_eq!( + "public| |foo", + code.chunks(2..7) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + + let mut cursor = code.cursor(2..7); + + assert_eq!(cursor.site_ref(0).to_site(&code).unwrap(), 0); + assert_eq!(cursor.token_ref(0).string(&code).unwrap(), "public"); + assert!(matches!(cursor.string(0), Some(string) if string == "public")); + + assert_eq!(cursor.site_ref(1).to_site(&code).unwrap(), 6); + assert_eq!(cursor.token_ref(1).string(&code).unwrap(), " "); + assert!(matches!(cursor.string(1), Some(string) if string == " ")); + + assert_eq!(cursor.site_ref(2).to_site(&code).unwrap(), 7); + assert_eq!(cursor.token_ref(2).string(&code).unwrap(), "foo"); + assert!(matches!(cursor.string(2), Some(string) if string == "foo")); + + assert_eq!(cursor.site_ref(3).to_site(&code).unwrap(), 10); + assert!(!cursor.token_ref(3).is_valid_ref(&code)); + assert!(matches!(cursor.string(3), None)); + + assert_eq!(cursor.site_ref(1).to_site(&code).unwrap(), 6); + assert_eq!(cursor.token_ref(1).string(&code).unwrap(), " "); + assert!(matches!(cursor.string(1), Some(string) if string == " ")); + + assert!(cursor.advance()); + + assert_eq!(cursor.site_ref(0).to_site(&code).unwrap(), 6); + assert_eq!(cursor.token_ref(0).string(&code).unwrap(), " "); + assert!(matches!(cursor.string(0), Some(string) if string == " ")); + + assert_eq!(cursor.site_ref(1).to_site(&code).unwrap(), 7); + assert_eq!(cursor.token_ref(1).string(&code).unwrap(), "foo"); + assert!(matches!(cursor.string(1), Some(string) if string == "foo")); + + assert_eq!(cursor.site_ref(2).to_site(&code).unwrap(), 10); + assert!(!cursor.token_ref(2).is_valid_ref(&code)); + assert!(matches!(cursor.string(2), None)); + + assert!(cursor.advance()); + assert!(cursor.advance()); + + assert_eq!(cursor.site_ref(0).to_site(&code).unwrap(), 10); + assert!(!cursor.token_ref(0).is_valid_ref(&code)); + assert!(matches!(cursor.string(0), None)); + } +} + +#[test] +fn test_empty_chunk_iterator() { + tests(Document::>::default()); + tests(TokenBuffer::::default()); + + fn tests(code: impl SourceCode) { + assert!(code.chunks(..).collect::>().is_empty()); + assert!(code.chunks(2..7).collect::>().is_empty()); + + let mut cursor = code.cursor(2..7); + + assert_eq!(cursor.site_ref(0).to_site(&code).unwrap(), 0); + assert_eq!(cursor.site_ref(1).to_site(&code).unwrap(), 0); + assert!(!cursor.token_ref(0).is_valid_ref(&code)); + assert!(!cursor.token_ref(1).is_valid_ref(&code)); + assert!(matches!(cursor.string(0), None)); + assert!(matches!(cursor.string(1), None)); + + assert!(!cursor.advance()); + + assert_eq!(cursor.site_ref(0).to_site(&code).unwrap(), 0); + assert_eq!(cursor.site_ref(1).to_site(&code).unwrap(), 0); + assert!(!cursor.token_ref(0).is_valid_ref(&code)); + assert!(!cursor.token_ref(1).is_valid_ref(&code)); + assert!(matches!(cursor.string(0), None)); + assert!(matches!(cursor.string(1), None)); + } +} + +#[test] +fn test_char_iterator() { + tests(TokenBuffer::::from("foo bar baz")); + tests(Document::>::from("foo bar baz")); + + fn tests(code: impl SourceCode) { + assert_eq!("foo bar baz", code.substring(..)); + assert_eq!("oo bar b", code.substring(1..9)); + assert_eq!("", code.substring(100..)); + assert_eq!("", code.substring(2..2)); + } +} diff --git a/work/crates/examples/tests/json.rs b/work/crates/examples/tests/json.rs new file mode 100644 index 0000000..8aabd2d --- /dev/null +++ b/work/crates/examples/tests/json.rs @@ -0,0 +1,517 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +//TODO check warnings regularly +#![allow(warnings)] + +use lady_deirdre::{ + lexis::{CodeContent, SourceCode, ToSpan, TokenBuffer, TokenRef}, + syntax::{Node, NodeRef, SyntaxError, SyntaxTree}, + Document, +}; +use lady_deirdre_examples::json::{formatter::JsonFormatter, lexis::JsonToken, syntax::JsonNode}; + +#[test] +fn test_json_success() { + static SNIPPET: &'static str = + r#"{"foo": [1, 3, true, false, null, {"a": "xyz", "b": null}], "baz": {}}"#; + + let code = TokenBuffer::::from(SNIPPET); + + assert_eq!(SNIPPET, code.transduce(JsonFormatter)); + + let tree = JsonNode::parse(code.cursor(..)); + + assert!(tree.errors().collect::>().is_empty()); +} + +#[test] +fn test_json_errors_1() { + let code = TokenBuffer::::from( + r#"{FOO "foo": [1, 3, true, false, null, {"a": "xyz", "b": null}], "baz": {}}"#, + ); + + assert_eq!( + r#"{"foo": [1, 3, true, false, null, {"a": "xyz", "b": null}], "baz": {}}"#, + code.transduce(JsonFormatter) + ); + + let tree = JsonNode::parse(code.cursor(..)); + + assert_eq!( + "[1:2] - [1:4]: Object format mismatch. Expected Entry or $BraceClose.", + tree.errors() + .map(|error| format!("{}: {}", error.span().format(&code), error)) + .collect::>() + .join("\n") + ); +} + +#[test] +fn test_json_errors_2() { + let code = TokenBuffer::::from( + r#"{"foo": [1, 3 true, false, null, {"a": "xyz", "b": null}], "baz": {}}"#, + ); + + assert_eq!( + r#"{"foo": [1, 3, true, false, null, {"a": "xyz", "b": null}], "baz": {}}"#, + code.transduce(JsonFormatter) + ); + + let tree = JsonNode::parse(code.cursor(..)); + + assert_eq!( + "[1:14]: Missing $Comma in Array.", + tree.errors() + .map(|error| format!("{}: {}", error.span().format(&code), error)) + .collect::>() + .join("\n") + ); +} + +#[test] +fn test_json_errors_3() { + let code = TokenBuffer::::from( + r#"{"foo": [1, 3,, false, null, {"a": "xyz", "b": null}], "baz": {}}"#, + ); + + assert_eq!( + r#"{"foo": [1, 3, false, null, {"a": "xyz", "b": null}], "baz": {}}"#, + code.transduce(JsonFormatter) + ); + + let tree = JsonNode::parse(code.cursor(..)); + + assert_eq!( + "[1:15]: Array format mismatch. Expected Array, False, Null, Number, Object, String, or True.", + tree.errors() + .map(|error| format!("{}: {}", error.span().format(&code), error)) + .collect::>() + .join("\n") + ); +} + +#[test] +fn test_json_errors_4() { + let code = TokenBuffer::::from( + r#"{"foo": [1, 3, true, false, null, "a": "xyz", "b": null}], "baz": {}}"#, + ); + + assert_eq!( + r#"{"foo": [1, 3, true, false, null, "a", "b"], "baz": {}}"#, + code.transduce(JsonFormatter) + ); + + let tree = JsonNode::parse(code.cursor(..)); + + assert_eq!( + "[1:38] - [1:44]: Array format mismatch. Expected $BracketClose or $Comma.\n\ + [1:50] - [1:56]: Array format mismatch. Expected $BracketClose or $Comma.", + tree.errors() + .map(|error| format!("{}: {}", error.span().format(&code), error)) + .collect::>() + .join("\n") + ); +} + +#[test] +fn test_json_errors_5() { + let code = TokenBuffer::::from(r#"{"outer": [{"a": "xyz",] "b": null}, "baz"]}"#); + + assert_eq!( + r#"{"outer": [{"a": "xyz", "b": null}, "baz"]}"#, + code.transduce(JsonFormatter) + ); + + let tree = JsonNode::parse(code.cursor(..)); + + assert_eq!( + "[1:24]: Object format mismatch. Expected Entry.", + tree.errors() + .map(|error| format!("{}: {}", error.span().format(&code), error)) + .collect::>() + .join("\n") + ); +} + +#[test] +fn test_json_errors_6() { + let code = TokenBuffer::::from(r#"{"outer": [{"a": ], "b": null}, "baz"]}"#); + + assert_eq!( + r#"{"outer": [{"a": ?, "b": null}, "baz"]}"#, + code.transduce(JsonFormatter) + ); + + let tree = JsonNode::parse(code.cursor(..)); + + assert_eq!( + "[1:18]: Entry format mismatch. Expected Array, False, Null, Number, Object, String, or True.\n\ + [1:18]: Object format mismatch. Expected $BraceClose or $Comma.", + tree.errors() + .map(|error| format!("{}: {}", error.span().format(&code), error)) + .collect::>() + .join("\n") + ); +} + +#[test] +fn test_json_errors_7() { + let code = TokenBuffer::::from( + r#"{"outer": [{"a": [, "b": null}, "baz"], "outer2", "outer3": 12345}"#, + ); + + assert_eq!( + r#"{"outer": [{"a": ["b", "baz"], "outer2": 12345}]}"#, + code.transduce(JsonFormatter) + ); + + let tree = JsonNode::parse(code.cursor(..)); + + assert_eq!( + "[1:19]: Array format mismatch. Expected Array, False, Null, Number, Object, String, True, or $BracketClose.\n\ + [1:24] - [1:30]: Array format mismatch. Expected $BracketClose or $Comma.\n\ + [1:49] - [1:58]: Entry format mismatch. Expected $Colon.\n\ + [1:67]: Array format mismatch. Expected $BracketClose or $Comma.\n[1:67]: Object format mismatch. Expected $BraceClose or $Comma.", + tree.errors() + .map(|error| format!("{}: {}", error.span().format(&code), error)) + .collect::>() + .join("\n") + ); +} + +#[test] +fn test_json_incremental() { + static mut VERSION: usize = 0; + + #[derive(Node, Clone)] + #[token(JsonToken)] + #[error(SyntaxError)] + #[skip($Whitespace)] + #[define(ANY = Object | Array | True | False | String | Number | Null)] + pub enum DebugNode { + #[root] + #[rule(object: Object)] + Root { + #[default(unsafe { VERSION })] + version: usize, + object: NodeRef, + }, + + #[rule($BraceOpen & (entries: Entry)*{$Comma} & $BraceClose)] + #[synchronization] + Object { + #[default(unsafe { VERSION })] + version: usize, + entries: Vec, + }, + + #[rule(key: $String & $Colon & value: ANY)] + Entry { + #[default(unsafe { VERSION })] + version: usize, + key: TokenRef, + value: NodeRef, + }, + + #[rule($BracketOpen & (items: ANY)*{$Comma} & $BracketClose)] + #[synchronization] + Array { + #[default(unsafe { VERSION })] + version: usize, + items: Vec, + }, + + #[rule(value: $String)] + String { + #[default(unsafe { VERSION })] + version: usize, + value: TokenRef, + }, + + #[rule(value: $Number)] + Number { + #[default(unsafe { VERSION })] + version: usize, + value: TokenRef, + }, + + #[rule($True)] + True { + #[default(unsafe { VERSION })] + version: usize, + }, + + #[rule($False)] + False { + #[default(unsafe { VERSION })] + version: usize, + }, + + #[rule($Null)] + Null { + #[default(unsafe { VERSION })] + version: usize, + }, + } + + trait DebugPrint { + fn debug_print(&self) -> String; + + fn debug_errors(&self) -> String; + } + + impl DebugPrint for Document { + fn debug_print(&self) -> String { + fn traverse(document: &Document, node_ref: &NodeRef) -> String { + let node = match node_ref.deref(document) { + None => return format!("?"), + Some(node) => node, + }; + + match node { + DebugNode::Root { version, object } => { + format!("{}({})", version, traverse(document, object)) + } + + DebugNode::Object { version, entries } => { + format!( + "{}({{{}}})", + version, + entries + .into_iter() + .map(|node_ref| traverse(document, node_ref)) + .collect::>() + .join(", "), + ) + } + + DebugNode::Array { version, items } => { + format!( + "{}([{}])", + version, + items + .into_iter() + .map(|node_ref| traverse(document, node_ref)) + .collect::>() + .join(", "), + ) + } + + DebugNode::Entry { + version, + key, + value, + } => { + format!( + "{}({:#}: {})", + version, + key.string(document).unwrap_or("?"), + traverse(document, value), + ) + } + + DebugNode::String { version, value } | DebugNode::Number { version, value } => { + format!("{}({})", version, value.string(document).unwrap_or("?")) + } + + DebugNode::True { version } => format!("{}(true)", version), + + DebugNode::False { version } => format!("{}(false)", version), + + DebugNode::Null { version } => format!("{}(null)", version), + } + } + + traverse(self, self.root()) + } + + fn debug_errors(&self) -> String { + self.errors() + .map(|error| format!("{}: {}", error.span().format(self), error)) + .collect::>() + .join("\n") + } + } + + unsafe { VERSION = 0 } + + let mut document = Document::::from(""); + assert_eq!(document.substring(..), r#""#); + assert_eq!( + document.debug_errors(), + "[1:1]: Root format mismatch. Expected Object.", + ); + assert_eq!(document.transduce(JsonFormatter), r#"?"#); + assert_eq!(document.debug_print(), r#"0(?)"#); + + unsafe { VERSION = 1 } + + document.write(0..0, "{"); + assert_eq!(document.substring(..), r#"{"#); + assert_eq!( + document.debug_errors(), + "[1:2]: Object format mismatch. Expected Entry or $BraceClose.", + ); + assert_eq!(document.transduce(JsonFormatter), r#"{}"#); + assert_eq!(document.debug_print(), r#"1(1({}))"#); + + unsafe { VERSION = 2 } + + document.write(1..1, "}"); + assert_eq!(document.substring(..), r#"{}"#); + assert_eq!(document.transduce(JsonFormatter), r#"{}"#); + assert_eq!(document.debug_print(), r#"2(2({}))"#); + + unsafe { VERSION = 3 } + + document.write(1..1, r#""foo""#); + assert_eq!(document.substring(..), r#"{"foo"}"#); + assert_eq!( + document.debug_errors(), + "[1:7]: Entry format mismatch. Expected $Colon." + ); + assert_eq!(document.transduce(JsonFormatter), r#"{"foo": ?}"#); + assert_eq!(document.debug_print(), r#"3(3({3("foo": ?)}))"#); + + unsafe { VERSION = 4 } + + document.write( + 6..6, + r#"[1, 3, true, false, null, {"a": "xyz", "b": null}]"#, + ); + assert_eq!( + document.substring(..), + r#"{"foo"[1, 3, true, false, null, {"a": "xyz", "b": null}]}"# + ); + assert_eq!(document.debug_errors(), "[1:7]: Missing $Colon in Entry."); + assert_eq!( + document.transduce(JsonFormatter), + r#"{"foo": [1, 3, true, false, null, {"a": "xyz", "b": null}]}"# + ); + assert_eq!( + document.debug_print(), + r#"3(3({4("foo": 4([4(1), 4(3), 4(true), 4(false), 4(null), 4({4("a": 4("xyz")), 4("b": 4(null))})]))}))"# + ); + + unsafe { VERSION = 5 } + + document.write(6..6, r#" :"#); + assert_eq!( + document.substring(..), + r#"{"foo" :[1, 3, true, false, null, {"a": "xyz", "b": null}]}"# + ); + assert_eq!(document.debug_errors(), ""); + assert_eq!( + document.transduce(JsonFormatter), + r#"{"foo": [1, 3, true, false, null, {"a": "xyz", "b": null}]}"# + ); + assert_eq!( + document.debug_print(), + r#"3(3({5("foo": 4([4(1), 4(3), 4(true), 4(false), 4(null), 4({4("a": 4("xyz")), 4("b": 4(null))})]))}))"# + ); + + unsafe { VERSION = 6 } + + document.write(6..8, r#": "#); + assert_eq!( + document.substring(..), + r#"{"foo": [1, 3, true, false, null, {"a": "xyz", "b": null}]}"# + ); + assert_eq!( + document.transduce(JsonFormatter), + r#"{"foo": [1, 3, true, false, null, {"a": "xyz", "b": null}]}"# + ); + assert_eq!( + document.debug_print(), + r#"3(3({6("foo": 4([4(1), 4(3), 4(true), 4(false), 4(null), 4({4("a": 4("xyz")), 4("b": 4(null))})]))}))"# + ); + + unsafe { VERSION = 7 } + + document.write(8..34, r#""#); + assert_eq!( + document.substring(..), + r#"{"foo": {"a": "xyz", "b": null}]}"# + ); + assert_eq!( + document.debug_errors(), + "[1:32]: Object format mismatch. Expected $BraceClose or $Comma." + ); + assert_eq!( + document.transduce(JsonFormatter), + r#"{"foo": {"a": "xyz", "b": null}}"# + ); + assert_eq!( + document.debug_print(), + r#"7(7({7("foo": 4({4("a": 4("xyz")), 4("b": 4(null))}))}))"# + ); + + unsafe { VERSION = 8 } + + document.write(31..32, r#""#); + assert_eq!( + document.substring(..), + r#"{"foo": {"a": "xyz", "b": null}}"# + ); + assert_eq!(document.debug_errors(), ""); + assert_eq!( + document.transduce(JsonFormatter), + r#"{"foo": {"a": "xyz", "b": null}}"# + ); + assert_eq!( + document.debug_print(), + r#"8(8({8("foo": 8({4("a": 4("xyz")), 4("b": 4(null))}))}))"# + ); + + unsafe { VERSION = 9 } + + document.write(14..14, r#"111, "c": "#); + assert_eq!( + document.substring(..), + r#"{"foo": {"a": 111, "c": "xyz", "b": null}}"# + ); + assert_eq!(document.debug_errors(), ""); + assert_eq!( + document.transduce(JsonFormatter), + r#"{"foo": {"a": 111, "c": "xyz", "b": null}}"# + ); + assert_eq!( + document.debug_print(), + r#"8(8({8("foo": 9({9("a": 9(111)), 9("c": 4("xyz")), 4("b": 4(null))}))}))"# + ); +} diff --git a/work/crates/examples/tests/position.rs b/work/crates/examples/tests/position.rs new file mode 100644 index 0000000..3243c29 --- /dev/null +++ b/work/crates/examples/tests/position.rs @@ -0,0 +1,91 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use lady_deirdre::{ + lexis::{Position, SimpleToken, SourceCode, ToPosition, ToSite, TokenBuffer}, + syntax::NoSyntax, + Document, +}; + +#[test] +fn test_position_to_site() { + tests(TokenBuffer::::from("foo \n bar \r\nbaz")); + tests(Document::>::from( + "foo \n bar \r\nbaz", + )); + + fn tests(code: impl SourceCode) { + assert_eq!(0, Position::new(0, 10).to_site(&code).unwrap()); + assert_eq!(0, Position::new(1, 1).to_site(&code).unwrap()); + assert_eq!(1, Position::new(1, 2).to_site(&code).unwrap()); + assert_eq!(4, Position::new(1, 10).to_site(&code).unwrap()); + assert_eq!(5, Position::new(2, 1).to_site(&code).unwrap()); + assert_eq!(9, Position::new(2, 5).to_site(&code).unwrap()); + assert_eq!(10, Position::new(2, 10).to_site(&code).unwrap()); + assert_eq!(12, Position::new(3, 0).to_site(&code).unwrap()); + assert_eq!(12, Position::new(3, 1).to_site(&code).unwrap()); + assert_eq!(13, Position::new(3, 2).to_site(&code).unwrap()); + assert_eq!(15, Position::new(3, 4).to_site(&code).unwrap()); + } +} + +#[test] +fn test_site_to_position() { + tests(TokenBuffer::::from("foo \n bar \r\nbaz")); + tests(Document::>::from( + "foo \n bar \r\nbaz", + )); + + fn tests(code: impl SourceCode) { + assert_eq!(Position::new(1, 1), 0.to_position(&code).unwrap()); + assert_eq!(Position::new(1, 2), 1.to_position(&code).unwrap()); + assert_eq!(Position::new(1, 4), 3.to_position(&code).unwrap()); + assert_eq!(Position::new(1, 5), 4.to_position(&code).unwrap()); + assert_eq!(Position::new(2, 1), 5.to_position(&code).unwrap()); + assert_eq!(Position::new(2, 2), 6.to_position(&code).unwrap()); + assert_eq!(Position::new(2, 3), 7.to_position(&code).unwrap()); + assert_eq!(Position::new(2, 4), 8.to_position(&code).unwrap()); + assert_eq!(Position::new(2, 5), 9.to_position(&code).unwrap()); + assert_eq!(Position::new(2, 6), 10.to_position(&code).unwrap()); + assert_eq!(Position::new(2, 7), 11.to_position(&code).unwrap()); + assert_eq!(Position::new(3, 1), 12.to_position(&code).unwrap()); + assert_eq!(Position::new(3, 2), 13.to_position(&code).unwrap()); + assert_eq!(Position::new(3, 3), 14.to_position(&code).unwrap()); + assert_eq!(Position::new(3, 4), 15.to_position(&code).unwrap()); + assert_eq!(Position::new(3, 4), 16.to_position(&code).unwrap()); + } +} diff --git a/work/crates/examples/tests/token.rs b/work/crates/examples/tests/token.rs new file mode 100644 index 0000000..1f74b93 --- /dev/null +++ b/work/crates/examples/tests/token.rs @@ -0,0 +1,73 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use lady_deirdre::lexis::{CodeContent, SimpleToken, TokenBuffer}; + +#[test] +fn test_default_token() { + let buffer = TokenBuffer::::from("public foo() { x = 100 + 2.0 - 'a'[\"bar\"]; }"); + + assert_eq!( + "public| |foo|(|)| |{| |x| |=| |100| |+| |2.0| |-| |'a'|[|\"bar\"|]|;| |}", + buffer + .chunks(..) + .map(|chunk| chunk.string.to_string()) + .collect::>() + .join("|"), + ); + + assert_eq!( + "Identifier|Whitespace|Identifier|ParenOpen|ParenClose|Whitespace|BraceOpen|\ + Whitespace|Identifier|Whitespace|Symbol|Whitespace|Number|Whitespace|Symbol|\ + Whitespace|Number|Whitespace|Symbol|Whitespace|Char|BracketOpen|String|BracketClose|\ + Symbol|Whitespace|BraceClose", + buffer + .chunks(..) + .map(|chunk| chunk.token.to_string()) + .collect::>() + .join("|") + ); + + assert_eq!( + "0|6|7|10|11|12|13|14|15|16|17|18|19|22|23|24|25|28|29|30|31|34|35|40|41|42|43", + buffer + .chunks(..) + .map(|chunk| chunk.site.to_string()) + .collect::>() + .join("|") + ); +} diff --git a/work/crates/main/Cargo.toml b/work/crates/main/Cargo.toml new file mode 100644 index 0000000..5aa0c10 --- /dev/null +++ b/work/crates/main/Cargo.toml @@ -0,0 +1,66 @@ +################################################################################ +# This file is a part of the "Lady Deirdre" Work, # +# a compiler front-end foundation technology. # +# # +# This Work is a proprietary software with source available code. # +# # +# To copy, use, distribute, and contribute into this Work you must agree to # +# the terms of the End User License Agreement: # +# # +# https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. # +# # +# The Agreement let you use this Work in commercial and non-commercial # +# purposes. Commercial use of the Work is free of charge to start, # +# but the Agreement obligates you to pay me royalties # +# under certain conditions. # +# # +# If you want to contribute into the source code of this Work, # +# the Agreement obligates you to assign me all exclusive rights to # +# the Derivative Work or contribution made by you # +# (this includes GitHub forks and pull requests to my repository). # +# # +# The Agreement does not limit rights of the third party software developers # +# as long as the third party software uses public API of this Work only, # +# and the third party software does not incorporate or distribute # +# this Work directly. # +# # +# AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY # +# OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES # +# RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. # +# # +# If you do not or cannot agree to the terms of this Agreement, # +# do not use this Work. # +# # +# Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). # +# All rights reserved. # +################################################################################ + +[package] +name = "lady-deirdre" +version = "1.0.0" +authors = ["Ilya Lakhin (Илья Александрович Лахин) "] +edition = "2021" +description = "Compiler front-end foundation technology. Main Crate." +keywords = ["parsing", "perser", "incremental", "compiler", "editor"] +categories = ["compilers", "data-structures", "no-std", "parsing", "text-editors"] +readme = "./readme.md" +license-file="../../../EULA.md" +documentation = "https://docs.rs/lady-deirdre" +repository = "https://github.com/Eliah-Lakhin/lady-deirdre" +rust-version = "1.65" +publish = true +autobins = false +autoexamples = false +autotests = false +autobenches = false + +[features] +default = ["std"] + +# Turns on Rust Standatd Library dependency. +std = ["lady-deirdre-derive/std"] + +[dependencies.lady-deirdre-derive] +version = "1.0" +path = "../derive" +default-features = false diff --git a/work/crates/main/readme.md b/work/crates/main/readme.md new file mode 100644 index 0000000..862c5d0 --- /dev/null +++ b/work/crates/main/readme.md @@ -0,0 +1,389 @@ +# Lady Deirdre. + +[![Lady Deirdre Main Crate API Docs](https://img.shields.io/docsrs/lady-deirdre?label=Main%20Docs)](https://docs.rs/lady-deirdre) +[![Lady Deirdre Macro Crate API Docs](https://img.shields.io/docsrs/lady-deirdre-derive?label=Macro%20Docs)](https://docs.rs/lady-deirdre-derive) +[![Lady Deirdre Main Crate](https://img.shields.io/crates/v/lady-deirdre?label=Main%20Crate)](https://crates.io/crates/lady-deirdre) +[![Lady Deirdre Macro Crate](https://img.shields.io/crates/v/lady-deirdre-derive?label=Macro%20Crate)](https://crates.io/crates/lady-deirdre-derive) + +Compiler front-end foundation technology. + +If you want to create your own programming language with IDE support from +day one, or if you are going to develop new IDE from scratch, or a programming +language LSP plugin, this Technology is for you! + +Lady Deirdre provides a framework to develop Lexical Scanner, Syntax Parser and +Semantic Analyser that could work in live coding environment applying +user-input changes incrementally to all underlying data structures. + +This Technology represents a set of essential instruments to develop modern +programming language compilers with seamless IDE integration. + +**Features**: + + - Written in Rust entirely. + - Derive-macros to define PL Grammar directly on Enum types. + - Smart error recovery system out of the box. + - Dependency-free no-std ready API. + - Works faster than Tree Sitter. + +**Links:** + - [Main Crate API Documentation](https://docs.rs/lady-deirdre). + - [Macro Crate API Documentation](https://docs.rs/lady-deirdre-derive). + - [Repository](https://github.com/Eliah-Lakhin/lady-deirdre). + - [Examples, Tests, Benchmarks](https://github.com/Eliah-Lakhin/lady-deirdre/tree/master/work/crates/examples). + - [End User License Agreement](https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md). + +**This Work is a proprietary software with source available code.** + +To copy, use, distribute, and contribute into this Work you must agree to +the terms of the +[End User License Agreement](https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md). + +The Agreement let you use this Work in commercial and non-commercial purposes. +Commercial use of the Work is free of charge to start, but the Agreement +obligates you to pay me royalties under certain conditions. + +If you want to contribute into the source code of this Work, the Agreement +obligates you to assign me all exclusive rights to the Derivative Work made by +you (this includes GitHub forks and pull requests to my repository). + +The Agreement does not limit rights of the third party software developers as +long as the third party software uses public API of this Work only, and the +third party software does not incorporate or distribute this Work directly. + +If you do not or cannot agree to the terms of this Agreement, do not use +this Work. + +Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). All rights reserved. + +# Main Crate API Documentation. + +This is the Main API Crate of the Lady Deirdre Technology. + +This Crate together with the +[`Macro Crate`](https://docs.rs/lady-deirdre-derove) provide sufficient set of +tools to construct incremental compilation system of a Programming Language. + +## Architecture overview. + +### Programming Language Grammar. + +The Technology deals with syntax and lexis grammar analysis of the language +independently. An API user should define both levels of the grammar separately, +or to define just the lexical grammar bypassing syntax parsing stage. The +[`Macro Crate`](https://docs.rs/lady-deirdre-derove) provides two derive macros +to define lexis and syntax on the custom enum types by specifying parsing rules +on the enum variants directly through the macro attributes. Alternatively, you +can implement syntax and/or lexis parsers manually by implementing corresponding +trait functions. + +The [Lexis module](crate::lexis) contains everything related to the Lexical +analysis, and the [Syntax module](crate::syntax), in turn, contains everything +related to the syntax analysis. + +### Parsing Process. + +The parsing process(of lexis and syntax grammars both) is driven by two loosely +coupled API layers that interact to each other: one layer is responsible for +input read and output write operations, but does not know anything about the +grammar, and another layer performs actual parsing of the local parts +of the input data. An API user can customize any of these layers depending on +the end compilation system design needs. The Crate API provides default +implementations of these layers that would cover most of the practical use +cases. + +For example, a [LexisSession](crate::lexis::LexisSession) trait is a cursor +over the source code input data of the lexical parsing stage. It's +opposite object is a [Token](crate::lexis::Token) trait that implements actual +lexical scanner for particular programming language. The +[Token::new](crate::lexis::Token::new) function accepts a reference to the +LexisSession, reads some data from the session moving session's internal cursor +forward, and in the end returns an instance of a single token parsed by the +scanner. An API user normally don't need to implement LexisSession trait +manually, unless you are working on the crate's API extension. For example, +the [Document](crate::Document) object under the hood has its own implementation +of this trait that is properly interacting with the Document internal data. +Usually an API user needs to implement a Token trait +only(on the enum type) to specify PL's lexical grammar. The user is encouraged +to do so using corresponding [Token](::lady_deirdre_derive::Token) derive +macro, or, in some unusual cases, you can implement this trait manually too. + +### Incremental Reparsing and Error Recovery. + +The Crate provides objects to parse and to store parsed data in incremental and +non-incremental ways. The [Document](crate::Document) object is one of such +objects that provides incremental reparsing capabilities. The Document instance +caches parsed data, and is capable to continue parsing +process from any random point where the end user wants to update the source code +text. + +In particular, you can use this object to represent a code editor's opened file. + +Parsing infrastructure is resilient to the source code errors. The parsing +process is able to recover from errors efficiently reconstructing and always +keeping syntax tree up to date. + +### Data Structures. + +Finally, the Technology utilizes a concept of the versioned arena +memory management to provide a framework to organize such data structures as +directional graphs where the nodes of the graph reside in a common arena memory. +The nodes refer each other through the weak type and lifetime independent +references into this arena. In particular this framework used by the Crate to +organize mutable Syntax Tree data structure that, depending on the end +compilation system design, could serve as an Abstract Syntax Tree, and could +serve as a semantic resolution data structure as well. Read more about this +concept in the [Arena module documentation](crate::arena). + +## Tutorial. + +This Tutorial demonstrates how to implement a parser and an interpreter of a +simple calculator language with +[S-expressions](https://en.wikipedia.org/wiki/S-expression) syntax. + +For the sake of simplicity this calculator allows Sum and Mult operations +on integer values and their combinations only. + + - `(123)` resolves to the `123`. + - `(+ 5, 10, 4)` resolves to `19`. + - `(* 5, 10)` resolves to `50`. + - `(* (+ 7, 2), (+ 4, 8))` resolves to `108`. + +```rust +// First of all we need to define a programming language lexical grammar, and +// a data structure type to store individual token instances. + +// Token is an enum type with variants representing token types. +// Lexis parsing rules specified through the regular expressions on these +// variants. A Token macro derive compiles these regular expressions, and +// implements a Token trait that, in turn, implements a lexical scanner under +// the hood. + +use lady_deirdre::lexis::Token; + +#[derive(Token, Debug, PartialEq)] +enum CalcToken { + #[rule("(")] + Open, + + #[rule(")")] + Close, + + #[rule("+")] + Plus, + + #[rule("*")] + Mult, + + #[rule(",")] + Comma, + + #[rule(['1'..'9'] & ['0'..'9']* | '0')] + #[constructor(parse_num)] // This variant contains a custom field, + // so we need a dedicated constructor. + Num(usize), + + // Any `char::is_ascii_whitespace()` character. + #[rule([' ', '\t', '\n', '\x0c', '\r']+)] + Whitespace, + + // The lexer sinks all unrecognizable tokens into this special kind of + // "mismatch" token. + #[mismatch] + Mismatch, +} + +impl CalcToken { + fn parse_num(input: &str) -> Self { + Self::Num(input.parse().unwrap()) + } +} + +// Lets try our lexer. + +// To test the lexer we need to load the source code into a SourceCode +// storage. We are going to use a Document object which is an incremental +// storage. + +// Since we did not define the syntax grammar yet, we are going to use +// a special type of grammar called "NoSyntax" that bypasses syntax parsing +// stage. + +use lady_deirdre::{Document, syntax::NoSyntax}; + +let mut doc = Document::>::default(); + +// Document is an incremental storage with random write access operations. +// Filling the entire document(specified by the `..` span range) with initial +// text. +doc.write(.., "(+ 5, 10)"); + +// Now lets check our tokens using chunk iterator. + +use lady_deirdre::lexis::CodeContent; + +assert_eq!( + doc.chunks(..).map(|chunk| chunk.token).collect::>(), + vec![ + &CalcToken::Open, + &CalcToken::Plus, + &CalcToken::Whitespace, + &CalcToken::Num(5), + &CalcToken::Comma, + &CalcToken::Whitespace, + &CalcToken::Num(10), + &CalcToken::Close, + ], +); + +// Now lets define our syntax parser. + +// Similarly to Token, we are going to define a Syntax Tree node type as a Rust +// enum type with LL(1) grammar rules directly on the enum variants. + +use lady_deirdre::{ + syntax::{Node, SyntaxError, NodeRef, SyntaxTree}, + lexis::TokenRef, +}; + +#[derive(Node)] +#[token(CalcToken)] // We need to specify a Token type explicitly. +#[error(SyntaxError)] // An object that will store syntax errors. + // SyntaxError is the default implement, but you can use + // any custom type that implements From. +#[skip($Whitespace)] // Tokens to be ignored in the syntax rule expressions. +enum CalcNode { + #[root] // The entry-point Rule of the Syntax Tree root node. + #[rule(expr: Expression)] + Root { + // A weak reference to the top captured Expression node. + expr: NodeRef, + }, + + #[rule( + $Open + & operator: ($Plus | $Mult)? + & (operands: (Number | Expression))+{$Comma} + & $Close + )] + #[synchronization] // "synchronization" directive tells the parse to recover + // from syntax errors through balancing of to the "(" and + // ")" tokens. + Expression { + operator: TokenRef, + operands: Vec, + }, + + #[rule(value: $Num)] + Number { value: TokenRef } +} + +// A helper function that prints syntax structure for debugging purposes. +fn show_tree(node_ref: &NodeRef, tree: &Document) -> String { + // Turns weak NodeRef reference to Node strong reference using the + // SyntaxTree instance. + let node = match node_ref.deref(tree) { + Some(node) => node, + // If referred node does not exist in the syntax + // tree(e.g. due to syntax errors) returning the + // "?" string. + None => return String::from("?"), + }; + + match node { + CalcNode::Root { expr } => format!("Root({})", show_tree(expr, tree)), + + CalcNode::Expression { operator, operands } => { + let mut result = String::new(); + + match operator.deref(tree) { + Some(CalcToken::Plus) => result.push_str("Plus("), + Some(CalcToken::Mult) => result.push_str("Mult("), + Some(_) => unreachable!(), + None => result.push_str("?("), + } + + result.push_str( + operands + .iter() + .map(|op| show_tree(op, tree)) + .collect::>() + .join(", ") + .as_str(), + ); + + result.push_str(")"); + + result + } + + CalcNode::Number { value } => { + match value.deref(tree) { + Some(CalcToken::Num(num)) => num.to_string(), + Some(_) => unreachable!(), + None => String::from("?"), + } + } + } +} + +// Lets try to run our grammar again. This time with the syntax parser. + +let mut doc = Document::::default(); + +doc.write(.., "(* (+ 3, 4, 5), 10)"); + +assert_eq!(show_tree(doc.root(), &doc), "Root(Mult(Plus(3, 4, 5), 10))"); + +// Now, lets implement an interpreter of our expression language by traversing +// the Syntax Tree. + +fn interpret(node_ref: &NodeRef, doc: &Document) -> usize { + let node = match node_ref.deref(doc) { + Some(node) => node, + None => return 0, + }; + + match node { + CalcNode::Root { expr } => interpret(expr, doc), + + CalcNode::Expression { operator, operands } => { + match operator.deref(doc) { + Some(CalcToken::Mult) => { + let mut result = 1; + + for operand in operands { + result *= interpret(operand, doc); + } + + result + } + + Some(CalcToken::Plus) => { + let mut result = 0; + + for operand in operands { + result += interpret(operand, doc); + } + + result + } + + Some(_) => unreachable!(), + + None => 0, + } + } + + CalcNode::Number { value } => { + match value.deref(doc) { + Some(CalcToken::Num(num)) => *num, + Some(_) => unreachable!(), + None => 0, + } + } + } +} + +assert_eq!(interpret(doc.root(), &doc), 120); +``` diff --git a/work/crates/main/src/arena/id.rs b/work/crates/main/src/arena/id.rs new file mode 100644 index 0000000..64fea97 --- /dev/null +++ b/work/crates/main/src/arena/id.rs @@ -0,0 +1,140 @@ +use crate::std::*; + +/// A globally unique identifier of the data container. +/// +/// ```rust +/// use lady_deirdre::arena::Id; +/// +/// let id_a = Id::new(); +/// let id_b = Id::new(); +/// +/// // Id is equals to itself. +/// assert_eq!(id_a, id_a); +/// +/// // Id is equals to its copy. +/// assert_eq!(id_a, *(&id_a)); +/// +/// // Id is never equals to another Id. +/// assert_ne!(id_a, id_b); +/// +/// // Id is never equals the Nil Id. +/// assert_ne!(&id_a, Id::nil()); +/// +/// ``` +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct Id { + inner: u64, +} + +impl Ord for Id { + #[inline(always)] + fn cmp(&self, other: &Self) -> Ordering { + self.inner.cmp(&other.inner) + } +} + +impl PartialOrd for Id { + #[inline(always)] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Debug for Id { + #[inline(always)] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + Debug::fmt(&self.inner, formatter) + } +} + +impl Display for Id { + #[inline(always)] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + Display::fmt(&self.inner, formatter) + } +} + +impl AsRef for Id { + #[inline(always)] + fn as_ref(&self) -> &u64 { + &self.inner + } +} + +impl Borrow for Id { + #[inline(always)] + fn borrow(&self) -> &u64 { + &self.inner + } +} + +impl Id { + /// Returns the next non-nil instance of [Id]. + /// + /// There could be up to a half of [u64::MAX] unique instances of [Id] per process. + /// An attempt to allocate more instances will panic. + /// + /// ```rust + /// use lady_deirdre::arena::Id; + /// + /// assert!(!Id::new().is_nil()); + /// + /// ``` + #[inline(always)] + pub fn new() -> Self { + static COUNTER: AtomicU64 = AtomicU64::new(1); + const HALF: u64 = u64::MAX / 2; + + let next = COUNTER.fetch_add(1, AtomicOrdering::SeqCst); + + if next > HALF { + COUNTER.fetch_sub(1, AtomicOrdering::SeqCst); + + panic!("Id internal counter overflow."); + } + + Self { inner: next } + } + + /// Returns a static reference to the [Id] instance that considered to be invalid. + /// + /// Nil identifiers normally don't refer valid data. + /// + /// ```rust + /// use lady_deirdre::arena::Id; + /// + /// assert!(Id::nil().is_nil()); + /// + /// ``` + #[inline(always)] + pub const fn nil() -> &'static Self { + const NIL: Id = Id { inner: 0 }; + + &NIL + } + + /// Returns `true` if the [Id] instance refers invalid data. + #[inline(always)] + pub const fn is_nil(&self) -> bool { + self.inner == 0 + } + + /// Returns [u64] inner representation of [Id]. + /// + /// A zero value corresponds to the [nil](Id::nil) identifier. + #[inline(always)] + pub const fn into_inner(self) -> u64 { + self.inner + } +} + +/// A convenient interface for objects that persist or refer globally unique data. +/// +/// This interface normally should be implemented for collections of globally unique data, and for +/// weak references into such collections. +pub trait Identifiable { + /// Returns a reference to a globally unique identifier of the data container this object + /// belongs to. + fn id(&self) -> &Id; +} diff --git a/work/crates/main/src/arena/mod.rs b/work/crates/main/src/arena/mod.rs new file mode 100644 index 0000000..92068e7 --- /dev/null +++ b/work/crates/main/src/arena/mod.rs @@ -0,0 +1,14 @@ +#![doc = include_str!("readme.md")] + +mod id; +mod reference; +mod repository; +mod sequence; + +pub(crate) use crate::arena::repository::RepositoryIterator; +pub use crate::arena::{ + id::{Id, Identifiable}, + reference::{Ref, RefIndex, RefVersion}, + repository::Repository, + sequence::Sequence, +}; diff --git a/work/crates/main/src/arena/readme.md b/work/crates/main/src/arena/readme.md new file mode 100644 index 0000000..2177cd0 --- /dev/null +++ b/work/crates/main/src/arena/readme.md @@ -0,0 +1,117 @@ +# Versioned arena memory management. + +This module contains a set of collections to organize versioned arena memory +management, and an interface to weakly refer items inside this memory. + +A long-lived, but frequently updated set of interconnected data can be stored in +a common Container with globally unique identifier [Id] to distinguish between +the Container instances. This container, in turn, can be stored in a static +memory, or in some other long-lived easy to access place. + +Inside this Container you can use [Repository] and [Sequence] instances to +store actual mutable and immutable data items accordingly. + +Finally, you can implement a domain-specific reference object for your end +users to access data inside this Container. This object will use base +[Ref] object under the hood to refer data inside underlying Repository and +Sequence collections. + +As a rule of thumb you can implement an [Identifiable] trait for your Container, +so it would be easier for your users to distinct between Container instances. + +```rust +use lady_deirdre::arena::{Id, Identifiable, Repository, Sequence, Ref}; + +pub struct IntStorage { + id: Id, + inner: Repository, +} + +impl Identifiable for IntStorage { + fn id(&self) -> &Id { &self.id } +} + +impl IntStorage { + pub fn new() -> Self { + Self { + // Id::new() returns a globally unique value. + id: Id::new(), + // Alternatively use Repository::with_capacity(...). + inner: Repository::default(), + } + } + + pub fn add(&mut self, item: usize) -> IntRef { + // This is an "O(1)" time operation. + // + // Returning value is always uniquely identifies the Item across other + // "self.set" items. + let inner_ref = self.inner.insert(item); + + // This is a "weak" reference to a corresponding Item stored in the + // `IntStorage` collection. + // + // If the user lost this reference, there is no way to access this + // Item anymore. However, the lifetime of the stored Item is independent + // from the reference lifetime(reference counting is not involved here). + // Moreover, the returned weak references are not guaranteed to be + // valid(as they are "weak"). + // + // It is up to your system design(to you) to decide about the memory + // cleanup approaches. + IntRef { + id: self.id, + inner_ref, + } + } +} + +// It is cheap and safe to copy both Id and Ref. And this object is Send+Sync +// by default. +#[derive(Clone, Copy)] +pub struct IntRef { + id: Id, + inner_ref: Ref, +} + +impl Identifiable for IntRef { + fn id(&self) -> &Id { &self.id } +} + +impl IntRef { + // The end user dereferences this weak reference providing an IntStorage + // instance the referred Item instance belongs to. + // + // If the end users have a set of such IntStorage instances, they can + // lookup for corresponding instance using e.g. IntRef::id() value. + pub fn deref<'a>(&self, storage: &'a IntStorage) -> Option<&'a usize> { + if self.id != storage.id { + // The end user provided incorrect IntStorage instance, dereference + // has failed. + return None; + } + + // Returns "Some" if referred Item still exists in this Repository, + // otherwise returns "None"(IntRef weak reference considered obsolete). + storage.inner.get(&self.inner_ref) + } + + // Returns removed Item from provided IntStorage instance if the referred + // Item exists in this storage. + // + // Otherwise returns None (the IntRef instance is obsolete or invalid). + pub fn remove(&self, storage: &mut IntStorage) -> Option { + if self.id != storage.id { + // The end user provided incorrect IntRef instance. + return None; + } + + storage.inner.remove(&self.inner_ref) + } +} +``` + +This is a common pattern used across the entire crate API. For example, +[Document](crate::Document) uses [Repository] under the hood to resolve +[TokenRef](crate::lexis::TokenRef) weak references to the stored Tokens of +the Source Code. diff --git a/work/crates/main/src/arena/reference.rs b/work/crates/main/src/arena/reference.rs new file mode 100644 index 0000000..c5633ae --- /dev/null +++ b/work/crates/main/src/arena/reference.rs @@ -0,0 +1,109 @@ +use crate::std::*; + +/// An index into the inner array of items of the [Sequence](crate::arena::Sequence) collection, or +/// into the inner array of entries of the [Repository](crate::arena::Repository) collection. +pub type RefIndex = usize; + +/// A revision version of the entry inside [Repository](crate::arena::Repository) collection. +pub type RefVersion = usize; + +/// A generic homogeneous weak reference into the Arena collection items. +/// +/// This is a low-level interface. An API user normally does not need to construct or to inspect +/// into this interface manually unless you work on the extension of this Crate. +/// +/// The Ref instances initially constructed by the [Repository](crate::arena::Repository) or by +/// the [Sequence](crate::arena::Sequence), or by a top-level API. +/// +/// The reference considered to be either valid or invalid. The integrity of references is not +/// guaranteed by underlying collections or by the wrapper containers. For example, a Repository +/// collection can produce a valid reference to the item inside that collection, but later on the +/// data could obsolete(e.g. by removing an item from the collection). In this case the Ref instance +/// becomes invalid, and it could not be dereferenced to a valid item from that collection. In this +/// sense Ref is a "weak" reference. +/// +/// The Ref instance is collection-independent, as such it could be interpreted in different ways +/// depending on applied collection, and in case of misinterpretation it could be dereferenced to a +/// wrong Item. Misinterpretation of the Ref instance(within the safe API) is a logical error, not +/// an undefined behavior. +/// +/// See [module documentation](crate::arena) for details on how to avoid this problem in the end +/// API design. +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Ref { + /// Indicates invalid reference. + /// + /// This type of reference cannot be dereferenced. + Nil, + + /// Indicates a reference to a single data item that resides outside of the main collection. + /// + /// Such references considered to be always valid. This type of variants cannot be + /// dereferenced by collection functions of the [arena](crate::arena) module. They supposed to be + /// dereferenced by the top-level wrapper container functions. + /// + /// Primary variant is a helper variant to refer an inner single selected item that logically + /// belongs to specified top-level wrapper container, but resides outside of the container's + /// main collection. + /// + /// An example of such container is a [Cluster](crate::syntax::Cluster) container that has a + /// [Cluster::primary](crate::syntax::Cluster::primary) field that resides near the + /// [Cluster::nodes](crate::syntax::Cluster::nodes) collection field. A [Ref::Primary] variant + /// would refer the "primary" field value of the Cluster instance in this case. + Primary, + + /// Indicates a references to the Item inside the [Sequence](crate::arena::Sequence) collection. + Sequence { + /// An index into the inner array of the [Sequence](crate::arena::Sequence) collection. + /// + /// If the index is outside of array's bounds, the reference considered to be invalid, and + /// is interpreted as a [Ref::Nil] variant. Otherwise the reference considered to be valid. + index: RefIndex, + }, + + /// Indicates a references to the Item inside the [Repository](crate::arena::Repository) + /// collection. + /// + /// The reference valid if and only if it refers Occupied entry inside corresponding Repository, + /// and the version of the reference equals to the version of the indexed entry. + /// + /// For details see [Repository documentation](crate::arena::Repository). + Repository { + /// An index into the inner array of entries inside the + /// [Repository](crate::arena::Repository) collection. + /// + /// If the index is outside of the Repository inner array bounds, the reference considered + /// to be invalid, and is interpreted as a [Ref::Nil] variant. + index: RefIndex, + + /// A version of the entry indexed by this variant into the inner array of entries inside + /// the [Repository](crate::arena::Repository) collection. + /// + /// If the version held by this variant differs from the version of occupied entry in + /// specified Repository instance, the reference considered to be invalid, and is + /// interpreted as a [Ref::Nil] variant. + version: RefVersion, + }, +} + +impl Debug for Ref { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + match &self { + Ref::Nil => formatter.write_str("Ref(Nil)"), + + _ => formatter.write_str("Ref"), + } + } +} + +impl Ref { + /// Returns true if the reference enum is a [Ref::Nil] variant. + #[inline(always)] + pub const fn is_nil(&self) -> bool { + match &self { + Ref::Nil => true, + _ => false, + } + } +} diff --git a/work/crates/main/src/arena/repository.rs b/work/crates/main/src/arena/repository.rs new file mode 100644 index 0000000..0c78b5d --- /dev/null +++ b/work/crates/main/src/arena/repository.rs @@ -0,0 +1,931 @@ +use crate::{ + arena::{Ref, RefIndex, RefVersion}, + std::*, +}; + +/// A mutable versioned data collection. +/// +/// The interface provides a way to store, remove, update and mutate items in allocated memory, and +/// to access stored items by weak [versioned references](crate::arena::Ref::Repository). +/// +/// All operations performed in "O(1)" constant time. +/// +/// Under the hood this data structure holds Rust standard [vector](Vec) with entries. Each entry +/// exists in one of the three states: Occupied, Reserved, or Vacant. +/// +/// When an API user adds a data item inside collection, it is either added into the next Vacant +/// entry turning this entry to Occupied state, or on the top of the vector into a new Occupied +/// entry. Vacant entries are managed in queue as a linked list. When the user removes data item, +/// corresponding entry turns into Vacant and is scheduled for the next insertion event in a queue +/// of Vacant entries. +/// +/// Each Occupied(or Reserved) entry holds [version number](crate::arena::RefVersion) of +/// occupied(or possibly occupied) data. And the corresponding Ref object that refers this entry +/// also holds this version value. If an API user removes an item from this collection, and later +/// occupies the entry with a different data item, a new entry will hold a different version value, +/// so the Ref to the old version of item would fail to resolve. +/// +/// In other words, references into this collection items are always unique in the history of +/// collection changes. +/// +/// Also, an API user can reserve entries inside this collection for late initialization. While +/// the entry is in Reserved state, it does not hold any data, but it could have weak references, +/// and it will not be Occupied by any other data item. These references are not valid for +/// dereferencing until the entry is fully initialized. Once the Reserved entry turns to Occupied +/// it could be dereferenced by initially created reference. +/// +/// Collection's interface provides a high-level safe interface, and a lower level unsafe interface +/// that avoids some minor check overhead to benefit performance. +/// +/// ```rust +/// use lady_deirdre::arena::{Repository, Ref}; +/// +/// let mut repo = Repository::<&'static str>::default(); +/// +/// let string_a_ref: Ref = repo.insert("foo"); +/// let string_b_ref: Ref = repo.insert("bar"); +/// +/// assert_eq!(repo.get(&string_a_ref).unwrap(), &"foo"); +/// assert_eq!(repo.get(&string_b_ref).unwrap(), &"bar"); +/// +/// repo.remove(&string_b_ref); +/// +/// assert_eq!(repo.get(&string_a_ref).unwrap(), &"foo"); +/// assert!(!repo.contains(&string_b_ref)); +/// +/// let string_c_ref: Ref = repo.insert("baz"); +/// +/// assert_eq!(repo.get(&string_a_ref).unwrap(), &"foo"); +/// assert!(!repo.contains(&string_b_ref)); +/// assert_eq!(repo.get(&string_c_ref).unwrap(), &"baz"); +/// +/// *(repo.get_mut(&string_a_ref).unwrap()) = "foo2"; +/// +/// assert_eq!(repo.get(&string_a_ref).unwrap(), &"foo2"); +/// assert!(!repo.contains(&string_b_ref)); +/// assert_eq!(repo.get(&string_c_ref).unwrap(), &"baz"); +/// ``` +pub struct Repository { + entries: Vec>, + next: RefIndex, + revision: RefVersion, + modified: bool, +} + +impl Default for Repository { + #[inline] + fn default() -> Self { + Self { + entries: Vec::new(), + next: 0, + revision: 0, + modified: false, + } + } +} + +impl Debug for Repository { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + formatter.write_str("Repository") + } +} + +pub type RepositoryIterator<'a, T> = + FilterMap>, fn(&'a RepositoryEntry) -> Option<&'a T>>; + +impl<'a, T> IntoIterator for &'a Repository { + type Item = &'a T; + type IntoIter = RepositoryIterator<'a, T>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.entries.iter().filter_map(|entry| match entry { + RepositoryEntry::Occupied { data, .. } => Some(data), + _ => None, + }) + } +} + +pub type RepositoryIntoIterator = + FilterMap>, fn(RepositoryEntry) -> Option>; + +impl IntoIterator for Repository { + type Item = T; + type IntoIter = RepositoryIntoIterator; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.entries.into_iter().filter_map(|entry| match entry { + RepositoryEntry::Occupied { data, .. } => Some(data), + _ => None, + }) + } +} + +impl FromIterator for Repository { + #[inline(always)] + fn from_iter>(iter: I) -> Self { + let entries = iter + .into_iter() + .map(|data| RepositoryEntry::Occupied { data, revision: 0 }) + .collect::>(); + + let next = entries.len(); + + Self { + entries, + next, + revision: 0, + modified: false, + } + } +} + +impl Repository { + /// Creates a new collection instance with pre-allocated memory for at least `capacity` items + /// to be stored in. + #[inline(always)] + pub fn with_capacity(capacity: usize) -> Self { + Self { + entries: Vec::with_capacity(capacity), + next: 0, + revision: 0, + modified: false, + } + } + + /// Adds an item into this collection returning valid weak reference to the item. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_ref = repo.insert(10); + /// + /// assert_eq!(repo.get(&item_ref).unwrap(), &10); + /// ``` + #[inline] + pub fn insert(&mut self, data: T) -> Ref { + let index = self.insert_index(data); + + unsafe { self.make_ref(index) } + } + + /// Adds an item into this collection returning valid [RefIndex](crate::arena::RefIndex) to + /// access corresponding item from the inner array of this Repository. + /// + /// This is a low-level API. + /// + /// An API user can utilize this index with care to perform low-level unsafe operations with + /// lesser overhead. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_ref_index = repo.insert_index(10); + /// + /// // This is safe because `insert_index` returns valid index. + /// unsafe { + /// assert_eq!(repo.get_unchecked(item_ref_index), &10); + /// } + /// + /// // This is safe because `insert_index` returns valid index. + /// let item_ref = unsafe { + /// repo.make_ref(item_ref_index) + /// }; + /// + /// assert_eq!(repo.get(&item_ref).unwrap(), &10); + /// + /// // This is safe because `insert_index` returns valid index, and the item is still + /// // in the `repo`. + /// unsafe { + /// repo.remove_unchecked(item_ref_index); + /// }; + /// + /// // From now on it would be unsafe to call e.g. `repo.get_unchecked(item_reference)`, because + /// // the item is no longer exists in the `repo`. + /// ``` + pub fn insert_index(&mut self, data: T) -> RefIndex { + let index = self.next; + + if self.modified { + self.revision += 1; + self.modified = false; + } + + match self.entries.get_mut(self.next) { + None => { + self.entries.push(RepositoryEntry::Occupied { + data, + revision: self.revision, + }); + + self.next += 1; + } + + Some(vacant) => { + debug_assert!( + matches!(vacant, RepositoryEntry::Vacant(..)), + "Internal error. Occupied entry in the next position.", + ); + + self.next = match replace( + vacant, + RepositoryEntry::Occupied { + data, + revision: self.revision, + }, + ) { + RepositoryEntry::Vacant(next) => next, + _ => unsafe { unreachable_unchecked() }, + } + } + } + + index + } + + /// Reserves an entry inside this collection for late initialization. + /// + /// This is a low-level API. + /// + /// An API user can utilize low-level API to initialize referred entry later. In particular, the + /// user can crate a [Ref](crate::arena::Ref) from received index. This reference will be + /// considered invalid, but once the entry initializes it will become valid to dereference. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_ref_index = repo.reserve(); + /// + /// // This is safe because `reserve` returns valid index. + /// let item_ref = unsafe { + /// repo.make_ref(item_ref_index) + /// }; + /// + /// // Referred item is not yet initialized, so it cannot be dereferenced, but is it safe + /// // to try to dereference. + /// assert!(repo.get(&item_ref).is_none()); + /// + /// // This is safe because `reserve` returns valid index. + /// unsafe { + /// repo.set_unchecked(item_ref_index, 10); + /// } + /// + /// // Since the item already initialized, from now on it is fine to dereference it. + /// assert_eq!(repo.get(&item_ref).unwrap(), &10); + /// ``` + pub fn reserve(&mut self) -> RefIndex { + let index = self.next; + + if self.modified { + self.revision += 1; + self.modified = false; + } + + match self.entries.get_mut(self.next) { + None => { + self.entries.push(RepositoryEntry::Reserved { + revision: self.revision, + }); + + self.next += 1; + } + + Some(vacant) => { + debug_assert!( + matches!(vacant, RepositoryEntry::Vacant(..)), + "Internal error. Occupied entry in the next position.", + ); + + self.next = match replace( + vacant, + RepositoryEntry::Reserved { + revision: self.revision, + }, + ) { + RepositoryEntry::Vacant(next) => next, + _ => unsafe { unreachable_unchecked() }, + } + } + } + + index + } + + /// Removes an item from this collection by reference. + /// + /// If referred item exists, returns the value. Otherwise returns [None]. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_ref = repo.insert(10); + /// + /// assert_eq!(repo.get(&item_ref).unwrap(), &10); + /// + /// assert_eq!(repo.remove(&item_ref).unwrap(), 10); + /// + /// // Referred value no longer exists in the `repo`. + /// assert!(!repo.contains(&item_ref)); + /// ``` + #[inline] + pub fn remove(&mut self, reference: &Ref) -> Option { + match reference { + Ref::Repository { index, version } => { + let entry = self.entries.get_mut(*index)?; + + match entry { + RepositoryEntry::Occupied { revision, .. } if revision == version => (), + + RepositoryEntry::Reserved { .. } => { + panic!("An attempt to remove reserved entry.") + } + + _ => return None, + } + + let occupied = replace(entry, RepositoryEntry::Vacant(self.next)); + + let data = match occupied { + RepositoryEntry::Occupied { data, .. } => { + self.modified = true; + data + } + _ => unsafe { unreachable_unchecked() }, + }; + + self.next = *index; + + Some(data) + } + + _ => None, + } + } + + /// Forcefully raises repository internal version. + /// + /// This is a low-level API. Normally an API user does not need to call this function manually, + /// as the versions are managed automatically. + /// + /// This function is supposed to be used together with "upgrade" function. + /// See [Upgrade function documentation](Repository::upgrade) for details. + /// + /// Note that raising of the Repository version does not affect exist entries. It only + /// affects a newly inserted items, or the items upgraded by the Upgrade function. + #[inline(always)] + pub fn commit(&mut self) { + self.revision += 1; + self.modified = false; + } + + /// Removes all items from this collection preserving allocated memory. + /// + /// All references belong to this collection are implicitly turn to invalid. + #[inline(always)] + pub fn clear(&mut self) { + self.modified = true; + self.next = 0; + self.entries.clear(); + } + + /// Returns `true` if referred item exists in this collection in the Occupied entry. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_ref = repo.insert(10); + /// + /// assert!(repo.contains(&item_ref)); + /// + /// let _ = repo.remove(&item_ref); + /// + /// assert!(!repo.contains(&item_ref)); + #[inline] + pub fn contains(&self, reference: &Ref) -> bool { + match reference { + Ref::Repository { index, version } => match self.entries.get(*index) { + Some(RepositoryEntry::Occupied { revision, .. }) => version == revision, + _ => false, + }, + + _ => false, + } + } + + /// Tries to dereference referred item. + /// + /// Returns [None] if referred item does not exist in this collection in the Occupied entry. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_ref = repo.insert(10); + /// + /// assert_eq!(repo.get(&item_ref), Some(&10)); + /// + /// let _ = repo.remove(&item_ref); + /// + /// assert_eq!(repo.get(&item_ref), None); + #[inline] + pub fn get(&self, reference: &Ref) -> Option<&T> { + match reference { + Ref::Repository { index, version } => match self.entries.get(*index) { + Some(RepositoryEntry::Occupied { data, revision, .. }) if version == revision => { + Some(data) + } + _ => None, + }, + + _ => None, + } + } + + /// Tries to mutably dereference referred item. + /// + /// Returns [None] if referred item does not exist in this collection in the Occupied entry. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_ref = repo.insert(10); + /// + /// *(repo.get_mut(&item_ref).unwrap()) = 20; + /// + /// assert_eq!(repo.get(&item_ref), Some(&20)); + #[inline] + pub fn get_mut(&mut self, reference: &Ref) -> Option<&mut T> { + match reference { + Ref::Repository { index, version } => match self.entries.get_mut(*index) { + Some(RepositoryEntry::Occupied { data, revision, .. }) if version == revision => { + Some(data) + } + _ => None, + }, + + _ => None, + } + } + + /// Returns item weak reference by internal index. + /// + /// This is a low-level API. + /// + /// This index could be received, for example, from the [insert_index](Repository::insert_index) + /// function. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_index = repo.insert_index(10); + /// + /// let item_ref = unsafe { + /// repo.make_ref(item_index) + /// }; + /// + /// assert_eq!(repo.get(&item_ref), Some(&10)); + /// ``` + /// + /// Note that unlike [Ref](crate::arena::Ref), [RefIndex](crate::arena::RefIndex) is + /// version-independent "reference" into this collection. An API user should care not to misuse + /// indices. + /// + /// ```rust + /// + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_a_index = repo.insert_index(10); + /// + /// // This is safe because `insert_index` returns valid index. + /// let item_a_ref = unsafe { + /// repo.make_ref(item_a_index) + /// }; + /// + /// assert_eq!(repo.get(&item_a_ref), Some(&10)); + /// + /// // Removing all items from this collection. + /// repo.clear(); + /// + /// // Inserting a new item inside this collection. + /// let item_b_index = repo.insert_index(20); + /// + /// // `item_a_ref` is history-dependent. + /// // An item previously referred by `item_a_ref` considered to be missing in this collection. + /// assert!(!repo.contains(&item_a_ref)); + /// + /// // However, Item B due to prior collection changes has the same index as removed Item A. + /// assert_eq!(item_a_index, item_b_index); + /// + /// // Making a reference from `item_a_index` would return a reference to Item B. + /// let item_a_ref = unsafe { + /// repo.make_ref(item_a_index) + /// }; + /// + /// // A new `item_a_ref` actually refers Item B. + /// assert_eq!(repo.get(&item_a_ref), Some(&20)); + /// ``` + /// + /// **Safety:** + /// - An entry indexed by `index` exists in this collection either in Occupied, or in Reserved + /// state. + #[inline(always)] + pub unsafe fn make_ref(&self, index: RefIndex) -> Ref { + debug_assert!( + index < self.entries.len(), + "Internal error. Index out of bounds." + ); + + #[allow(unreachable_code)] + let entry = unsafe { self.entries.get_unchecked(index) }; + + let version = match entry { + RepositoryEntry::Occupied { revision, .. } + | RepositoryEntry::Reserved { revision, .. } => *revision, + + RepositoryEntry::Vacant(..) => { + #[cfg(debug_assertions)] + { + unreachable!( + "Internal error. An attempt to make a reference from index pointing to \ + vacant entry." + ); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + Ref::Repository { index, version } + } + + /// Immutably derefers collection's item by internal index. + /// + /// This is a low-level API. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_index = repo.insert_index(10); + /// + /// // This is safe because `insert_item` occupies collection's entry. + /// assert_eq!(unsafe { repo.get_unchecked(item_index) }, &10); + /// ``` + /// + /// **Safety:** + /// - An entry indexed by `index` exists in this collection in Occupied state. + #[inline(always)] + pub unsafe fn get_unchecked(&self, index: RefIndex) -> &T { + debug_assert!( + index < self.entries.len(), + "Internal error. Index out of bounds." + ); + + let entry = unsafe { self.entries.get_unchecked(index) }; + + match entry { + RepositoryEntry::Occupied { data, .. } => data, + + _ => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. An attempt to index into non-occupied entry."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + + /// Mutably derefers collection's item by internal index. + /// + /// This is a low-level API. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_index = repo.insert_index(10); + /// + /// // This is safe because `insert_item` occupies collection's entry. + /// unsafe { *repo.get_unchecked_mut(item_index) = 20; } + /// + /// assert_eq!(unsafe { repo.get_unchecked(item_index) }, &20); + /// ``` + /// + /// **Safety:** + /// - An entry indexed by `index` exists in this collection in Occupied state. + #[inline(always)] + pub unsafe fn get_unchecked_mut(&mut self, index: RefIndex) -> &mut T { + debug_assert!( + index < self.entries.len(), + "Internal error. Index out of bounds." + ); + + let entry = unsafe { self.entries.get_unchecked_mut(index) }; + + match entry { + RepositoryEntry::Occupied { data, .. } => data, + + _ => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. An attempt to index into non-occupied entry."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + + /// Replaces Occupied item value by collection's internal index, or initializes + /// Reserved item by index. + /// + /// This is a low-level API. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_index = repo.insert_index(10); + /// + /// // This is safe because `insert_item` occupies collection's entry. + /// unsafe { repo.set_unchecked(item_index, 20); } + /// + /// assert_eq!(unsafe { repo.get_unchecked(item_index) }, &20); + /// ``` + /// + /// If the indexed entry is a Reserved entry, this function initializes this item turning entry + /// state to Occupied. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_index = repo.reserve(); + /// + /// // This is safe because `reserve` returns valid index. + /// let item_ref = unsafe { repo.make_ref(item_index) }; + /// + /// // Referred item is not initialized yet(is not "Occupied). + /// assert!(!repo.contains(&item_ref)); + /// + /// // Initializing reserved entry. + /// unsafe { repo.set_unchecked(item_index, 10); } + /// + /// // From now on referred Item "exists" in this collection. + /// assert!(repo.contains(&item_ref)); + /// ``` + /// + /// **Safety:** + /// - An entry indexed by `index` exists in this collection in Occupied or Reserved state. + #[inline(always)] + pub unsafe fn set_unchecked(&mut self, index: RefIndex, data: T) { + debug_assert!( + index < self.entries.len(), + "Internal error. Index out of bounds." + ); + + let entry = unsafe { self.entries.get_unchecked_mut(index) }; + + let revision = match entry { + RepositoryEntry::Reserved { revision } | RepositoryEntry::Occupied { revision, .. } => { + *revision + } + + RepositoryEntry::Vacant(..) => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. An attempt to write into vacant entry."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + *entry = RepositoryEntry::Occupied { data, revision }; + } + + /// Removes collection's Occupied or Reserved entry by internal index. + /// + /// This is a low-level API. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_index = repo.insert_index(10); + /// + /// // This is safe because `insert_index` returns valid index. + /// let item_ref = unsafe { repo.make_ref(item_index) }; + /// + /// // This is safe because `insert_item` returns valid index. + /// unsafe { repo.remove_unchecked(item_index); } + /// + /// // From now on referred Item no longer "exists" in this collection. + /// assert!(!repo.contains(&item_ref)); + /// ``` + /// + /// An API user can utilize this function to remove Reserved entry without initialization. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_index = repo.reserve(); + /// + /// // This is safe because `reserve` returns valid index. + /// let item_ref = unsafe { repo.make_ref(item_index) }; + /// + /// // This is safe because `reserve` returns valid index, and the Item's Entry exists in this + /// // collection in Reserved state. + /// unsafe { repo.remove_unchecked(item_index); } + /// + /// // From now on referred Entry no longer "exists" in this collection. + /// // An API user cannot initialize this item by `item_index`. + /// ``` + /// + /// **Safety:** + /// - An entry indexed by `index` exists in this collection in Occupied or Reserved state. + #[inline(always)] + pub unsafe fn remove_unchecked(&mut self, index: RefIndex) { + debug_assert!( + index < self.entries.len(), + "Internal error. Index out of bounds." + ); + + let entry = unsafe { self.entries.get_unchecked_mut(index) }; + + let occupied = replace(entry, RepositoryEntry::Vacant(self.next)); + + self.modified = true; + + match occupied { + RepositoryEntry::Occupied { .. } | RepositoryEntry::Reserved { .. } => (), + + RepositoryEntry::Vacant { .. } => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. An attempt to remove vacant entry."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + self.next = index; + } + + /// Upgrades collection's Occupied or Reserved entry version without changing of their content. + /// + /// This is a low-level API that allows bulk "re-insertion" of several existing item in a more + /// efficient way than the series of independent removes and inserts. + /// + /// If an API user wants to preserve some entries content, but needs to obsolete their weak + /// references, a trivial way to do so is just to remove and then re-insert them: + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_a_ref = repo.insert(10); + /// let item_b_ref = repo.insert(20); + /// + /// assert!(repo.contains(&item_a_ref)); + /// assert!(repo.contains(&item_b_ref)); + /// + /// // We do not change the content of referred items, but just re-inserting them. + /// let item_a_content = repo.remove(&item_a_ref).unwrap(); + /// let item_b_content = repo.remove(&item_b_ref).unwrap(); + /// let item_a_ref_2 = repo.insert(item_a_content); + /// let item_b_ref_2 = repo.insert(item_b_content); + /// + /// // Old weak references no longer valid. + /// assert!(!repo.contains(&item_a_ref)); + /// assert!(!repo.contains(&item_b_ref)); + /// ``` + /// + /// This is safe approach, however this approach involves certain performance overhead that + /// could be critical when performing bulk operations. In addition to that this approach does + /// not preserve entries indices(which is also important in certain situations). + /// + /// If an API user confident about indices integrity, an alternative way would be using a + /// [Commit](crate::arena::Repository::commit) function and series of Upgrade functions instead. + /// + /// ```rust + /// use lady_deirdre::arena::Repository; + /// + /// let mut repo = Repository::::default(); + /// + /// let item_a_index = repo.insert_index(10); + /// let item_b_index = repo.insert_index(20); + /// + /// // This is safe because `insert_index` returns valid index. + /// let item_a_ref = unsafe { repo.make_ref(item_a_index) }; + /// let item_b_ref = unsafe { repo.make_ref(item_b_index) }; + /// + /// assert!(repo.contains(&item_a_ref)); + /// assert!(repo.contains(&item_b_ref)); + /// + /// // Forcefully raises Repository version. + /// repo.commit(); + /// + /// // This is safe because the items referred by index are still exist in this repository. + /// unsafe { + /// repo.upgrade(item_a_index); + /// repo.upgrade(item_b_index); + /// } + /// + /// // Previously created weak references no longer valid. + /// assert!(!repo.contains(&item_a_ref)); + /// assert!(!repo.contains(&item_b_ref)); + /// + /// // We can still create new weak references using these indices. + /// let item_a_ref_2 = unsafe { repo.make_ref(item_a_index) }; + /// let item_b_ref_2 = unsafe { repo.make_ref(item_b_index) }; + /// + /// assert!(repo.contains(&item_a_ref_2)); + /// assert!(repo.contains(&item_b_ref_2)); + /// ``` + /// + /// Note, if an API user misses to call Commit function, it will not lead to undefined behavior, + /// but in this case the Upgrade function does not guarantee version upgrade. + /// + /// **Safety:** + /// - An entry indexed by `index` exists in this collection in Occupied or Reserved state. + #[inline(always)] + pub unsafe fn upgrade(&mut self, index: RefIndex) { + debug_assert!( + index < self.entries.len(), + "Internal error. Index out of bounds." + ); + + let entry = unsafe { self.entries.get_unchecked_mut(index) }; + + match entry { + RepositoryEntry::Occupied { revision, .. } | RepositoryEntry::Reserved { revision } => { + *revision = self.revision; + } + + RepositoryEntry::Vacant { .. } => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. An attempt to update revision of vacant entry."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + } +} + +#[doc(hidden)] +pub enum RepositoryEntry { + Vacant(RefIndex), + Occupied { data: T, revision: RefVersion }, + Reserved { revision: RefVersion }, +} diff --git a/work/crates/main/src/arena/sequence.rs b/work/crates/main/src/arena/sequence.rs new file mode 100644 index 0000000..05ccf54 --- /dev/null +++ b/work/crates/main/src/arena/sequence.rs @@ -0,0 +1,290 @@ +use crate::{ + arena::{Ref, RefIndex}, + std::*, +}; + +/// A convenient wrapper over the FIFO vector. +/// +/// This interface wraps a vector of items that supposed to grow as a FIFO stack on initialization, +/// but later on will be used in mostly immutable way during lifetime. +/// +/// Sequence interface is compatible with [Ref](crate::arena::Ref) weak references framework. +/// +/// In contrast to [Repository](crate::arena::Repository) Sequence does not have a version +/// management mechanism as the collection supposed to be immutable during lifetime. For the sake +/// of simplicity, there are no strict rules to enforce distinction between the initialization +/// and the usage stages, so an API user should utilize this collection with care. +/// +/// Since the Sequence collection uses Rust's [Vector](Vec) under the hood, sequential iteration +/// over this collection items does not suffer from the cache misses issue. +/// +/// ```rust +/// use lady_deirdre::arena::Sequence; +/// +/// let mut sequence = Sequence::::default(); +/// +/// sequence.push(10); +/// sequence.push(20); +/// +/// let first_item_ref = Sequence::::make_ref(0); +/// +/// assert_eq!(sequence.get(&first_item_ref), Some(&10)); +/// +/// // Inner function returns a slice of the inner vector data. +/// assert_eq!(&sequence.inner()[1], &20); +/// ``` +/// +/// Alternatively, an API user can set up a Vector instance and then turn it into Sequence: +/// +/// ```rust +/// use lady_deirdre::arena::Sequence; +/// +/// let mut sequence = Sequence::::from(vec![10, 20]); +/// +/// let first_item_ref = Sequence::::make_ref(0); +/// +/// assert_eq!(sequence.get(&first_item_ref), Some(&10)); +/// +/// // Receiving original inner vector from this collection. +/// let original_vector = sequence.into_vec(); +/// +/// assert_eq!(&original_vector[1], &20); +/// ``` +#[repr(transparent)] +pub struct Sequence { + entries: Vec, +} + +impl Default for Sequence { + #[inline(always)] + fn default() -> Self { + Self { + entries: Vec::new(), + } + } +} + +impl Debug for Sequence { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + formatter.write_str("Sequence") + } +} + +impl From> for Sequence { + #[inline(always)] + fn from(entries: Vec) -> Self { + Self { entries } + } +} + +impl Sequence { + /// Creates a new collection instance with pre-allocated memory for at least `capacity` items + /// to be stored in. + #[inline(always)] + pub fn with_capacity(capacity: usize) -> Self { + Self { + entries: Vec::with_capacity(capacity), + } + } + + /// Pushes an item on the top of the Sequence inner FIFO vector. + /// + /// This function is supposed to be used on the instance initialization stage only. + /// + /// Returns valid reference index to refer added item. This index can be used to create valid + /// [Ref](crate::arena::Ref) instance using [make_ref](crate::arena::Sequence::make_ref) + /// function. + /// + /// ```rust + /// use lady_deirdre::arena::Sequence; + /// + /// let mut sequence = Sequence::::default(); + /// + /// let item_index = sequence.push(10); + /// let item_ref = Sequence::::make_ref(item_index); + /// + /// assert_eq!(sequence.get(&item_ref), Some(&10)); + /// ``` + #[inline(always)] + pub fn push(&mut self, data: T) -> RefIndex { + let index = self.entries.len(); + + self.entries.push(data); + + index + } + + /// Removes an item from the top of the Sequence inner FIFO vector. + /// + /// This function is supposed to be used on the instance initialization stage only. + /// + /// Returns removed item if the Sequence is not empty. Otherwise returns [None]. + /// + /// ```rust + /// use lady_deirdre::arena::Sequence; + /// + /// let mut sequence = Sequence::::default(); + /// + /// let _ = sequence.push(10); + /// let _ = sequence.push(20); + /// + /// assert_eq!(sequence.pop(), Some(20)); + /// assert_eq!(sequence.pop(), Some(10)); + /// assert_eq!(sequence.pop(), None); + /// ``` + #[inline(always)] + pub fn pop(&mut self) -> Option { + self.entries.pop() + } + + /// Reserves capacity to for at least `additional` items to be inserted on top of this + /// collection. + #[inline(always)] + pub fn reserve(&mut self, additional: usize) { + self.entries.reserve(additional) + } + + /// Returns `true` if referred item exists in this collection. + /// + /// ```rust + /// use lady_deirdre::arena::{Repository, Sequence}; + /// + /// let mut repo = Repository::::default(); + /// + /// let repo_item_ref = repo.insert(10); + /// + /// let mut seq = Sequence::::default(); + /// + /// let seq_item_index = seq.push(20); + /// let seq_item_ref = Sequence::::make_ref(seq_item_index); + /// + /// // Repository item reference is invalid to the Sequence collection. + /// assert!(!seq.contains(&repo_item_ref)); + /// + /// // Inserted Sequence item reference is a valid reference for this Sequence collection. + /// assert!(seq.contains(&seq_item_ref)); + #[inline] + pub fn contains(&self, reference: &Ref) -> bool { + match reference { + Ref::Sequence { index } if self.entries.len() > *index => true, + + _ => false, + } + } + + /// Tries to dereference referred item. + /// + /// Returns [None] if referred item does not exist in this collection. + /// + /// ```rust + /// use lady_deirdre::arena::Sequence; + /// + /// let mut seq = Sequence::::default(); + /// + /// let item_index = seq.push(10); + /// let item_ref = Sequence::::make_ref(item_index); + /// + /// assert_eq!(seq.get(&item_ref), Some(&10)); + /// + /// let _ = seq.pop(); + /// + /// // Referred item no longer exists in this collection. + /// assert_eq!(seq.get(&item_ref), None); + #[inline] + pub fn get(&self, reference: &Ref) -> Option<&T> { + match reference { + Ref::Sequence { index } => self.entries.get(*index), + + _ => None, + } + } + + /// Tries to mutably dereference referred item. + /// + /// Returns [None] if referred item does not exist in this collection. + /// + /// ```rust + /// use lady_deirdre::arena::Sequence; + /// + /// let mut seq = Sequence::::default(); + /// + /// let item_index = seq.push(10); + /// let item_ref = Sequence::::make_ref(item_index); + /// + /// *(seq.get_mut(&item_ref).unwrap()) = 20; + /// + /// assert_eq!(seq.get(&item_ref), Some(&20)); + #[inline] + pub fn get_mut(&mut self, reference: &Ref) -> Option<&mut T> { + match reference { + Ref::Sequence { index } => self.entries.get_mut(*index), + + _ => None, + } + } + + /// Removes all items from this collection preserving allocated memory. + /// + /// All references belong to this collection are implicitly turn to invalid. However, if an API + /// user inserts new items later on, previously created references would become valid again as + /// the Sequence collection does not manage versions. + #[inline(always)] + pub fn clear(&mut self) { + self.entries.clear(); + } + + /// Moves inner vector of items out of this collection. + #[inline(always)] + pub fn into_vec(self) -> Vec { + self.entries + } + + /// Returns item weak reference by internal index. + /// + /// This index could be received, for example, from the [push](Sequence::push) function. + /// + /// ```rust + /// use lady_deirdre::arena::Sequence; + /// + /// let mut seq = Sequence::::default(); + /// + /// let item_index = seq.push(10); + /// let item_ref = Sequence::::make_ref(item_index); + /// + /// assert_eq!(seq.get(&item_ref), Some(&10)); + /// + /// let _ = seq.pop(); + /// + /// // Referred item no longer exists in this collection. + /// assert_eq!(seq.get(&item_ref), None); + /// + /// // Note that however Sequence collection does not manage versions, as such inserting a new + /// // item inside this collection would turn previously created weak reference to a valid + /// // reference again, and that old reference would refer a new item instance. + /// + /// let _ = seq.push(20); + /// assert_eq!(seq.get(&item_ref), Some(&20)); + #[inline(always)] + pub fn make_ref(index: RefIndex) -> Ref { + Ref::Sequence { index } + } + + /// Returns an immutable slice of all items inside this collection. + /// + /// Returned data slice is indexable by indices received from the [push](Sequence::push) + /// function. + #[inline(always)] + pub fn inner(&self) -> &[T] { + &self.entries[..] + } + + /// Returns a mutable slice of all items inside this collection. + /// + /// Returned data slice is indexable by indices received from the [push](Sequence::push) + /// function. + #[inline(always)] + pub fn inner_mut(&mut self) -> &mut [T] { + &mut self.entries[..] + } +} diff --git a/work/crates/main/src/incremental/cursor.rs b/work/crates/main/src/incremental/cursor.rs new file mode 100644 index 0000000..0a8a15b --- /dev/null +++ b/work/crates/main/src/incremental/cursor.rs @@ -0,0 +1,267 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable}, + incremental::{storage::ChildRefIndex, Document}, + lexis::{Length, Site, SiteRef, SiteSpan, TokenCount, TokenCursor, TokenRef}, + std::*, + syntax::Node, +}; + +pub struct DocumentCursor<'document, N: Node> { + document: &'document Document, + next_chunk_ref: ChildRefIndex, + end_chunk_ref: ChildRefIndex, + peek_chunk_ref: ChildRefIndex, + peek_distance: TokenCount, +} + +impl<'document, N: Node> Identifiable for DocumentCursor<'document, N> { + #[inline(always)] + fn id(&self) -> &Id { + self.document.id() + } +} + +impl<'document, N: Node> TokenCursor<'document> for DocumentCursor<'document, N> { + type Token = N::Token; + + #[inline(always)] + fn advance(&mut self) -> bool { + if unsafe { self.next_chunk_ref.same_chunk_as(&self.end_chunk_ref) } { + return false; + } + + unsafe { self.next_chunk_ref.next() }; + + match self.peek_distance == 0 { + true => { + self.peek_chunk_ref = self.next_chunk_ref; + } + + false => { + self.peek_distance -= 1; + } + } + + true + } + + #[inline(always)] + fn token(&mut self, distance: TokenCount) -> Option<&'document Self::Token> { + if unsafe { self.next_chunk_ref.same_chunk_as(&self.end_chunk_ref) } { + return None; + } + + if unsafe { self.jump(distance) } { + return None; + } + + Some(unsafe { self.peek_chunk_ref.token() }) + } + + #[inline(always)] + fn site(&mut self, distance: TokenCount) -> Option { + if unsafe { self.next_chunk_ref.same_chunk_as(&self.end_chunk_ref) } { + return None; + } + + if unsafe { self.jump(distance) } { + return None; + } + + Some(unsafe { self.document.tree().site_of(&self.peek_chunk_ref) }) + } + + #[inline(always)] + fn length(&mut self, distance: TokenCount) -> Option { + if unsafe { self.next_chunk_ref.same_chunk_as(&self.end_chunk_ref) } { + return None; + } + + if unsafe { self.jump(distance) } { + return None; + } + + Some(*unsafe { self.peek_chunk_ref.span() }) + } + + #[inline(always)] + fn string(&mut self, distance: TokenCount) -> Option<&'document str> { + if unsafe { self.next_chunk_ref.same_chunk_as(&self.end_chunk_ref) } { + return None; + } + + if unsafe { self.jump(distance) } { + return None; + } + + Some(unsafe { self.peek_chunk_ref.string() }) + } + + #[inline(always)] + fn token_ref(&mut self, distance: TokenCount) -> TokenRef { + if unsafe { self.next_chunk_ref.same_chunk_as(&self.end_chunk_ref) } { + return TokenRef::nil(); + } + + if unsafe { self.jump(distance) } { + return TokenRef::nil(); + } + + let ref_index = unsafe { self.peek_chunk_ref.chunk_ref_index() }; + + let chunk_ref = unsafe { self.document.references.chunks().make_ref(ref_index) }; + + TokenRef { + id: *self.document.id(), + chunk_ref, + } + } + + #[inline(always)] + fn site_ref(&mut self, distance: TokenCount) -> SiteRef { + if unsafe { self.next_chunk_ref.same_chunk_as(&self.end_chunk_ref) } { + return self.end_site_ref(); + } + + if unsafe { self.jump(distance) } { + return self.end_site_ref(); + } + + let ref_index = unsafe { self.peek_chunk_ref.chunk_ref_index() }; + + let chunk_ref = unsafe { self.document.references.chunks().make_ref(ref_index) }; + + TokenRef { + id: *self.document.id(), + chunk_ref, + } + .site_ref() + } + + #[inline(always)] + fn end_site_ref(&mut self) -> SiteRef { + if self.end_chunk_ref.is_dangling() { + return SiteRef::new_code_end(*self.document.id()); + } + + let ref_index = unsafe { self.end_chunk_ref.chunk_ref_index() }; + + let chunk_ref = unsafe { self.document.references.chunks().make_ref(ref_index) }; + + TokenRef { + id: *self.document.id(), + chunk_ref, + } + .site_ref() + } +} + +impl<'document, N: Node> DocumentCursor<'document, N> { + pub(super) fn new(document: &'document Document, mut span: SiteSpan) -> Self { + let mut next_chunk_ref = document.tree().lookup(&mut span.start); + let mut end_chunk_ref = document.tree().lookup(&mut span.end); + + if next_chunk_ref.is_dangling() { + next_chunk_ref = document.tree().last(); + } else if span.start == 0 && unsafe { !next_chunk_ref.is_first() } { + unsafe { next_chunk_ref.back() }; + } + + if !end_chunk_ref.is_dangling() { + unsafe { end_chunk_ref.next() }; + } + + Self { + document, + next_chunk_ref, + end_chunk_ref, + peek_chunk_ref: next_chunk_ref, + peek_distance: 0, + } + } + + // Returns `true` if jump has failed. + // Safety: `self.next_chunk_ref` behind the `self.end_chunk_ref`. + #[inline] + unsafe fn jump(&mut self, target: TokenCount) -> bool { + while self.peek_distance < target { + self.peek_distance += 1; + + unsafe { self.peek_chunk_ref.next() }; + + if unsafe { self.peek_chunk_ref.same_chunk_as(&self.end_chunk_ref) } { + self.peek_distance = 0; + self.peek_chunk_ref = self.next_chunk_ref; + return true; + } + } + + if self.peek_distance > target * 2 { + self.peek_distance = 0; + self.peek_chunk_ref = self.next_chunk_ref; + + while self.peek_distance < target { + self.peek_distance += 1; + + unsafe { self.peek_chunk_ref.next() }; + + debug_assert!( + !self.peek_chunk_ref.is_dangling(), + "Internal error. Dangling peek ref.", + ); + } + + return false; + } + + while self.peek_distance > target { + unsafe { self.peek_chunk_ref.back() } + + debug_assert!( + !self.peek_chunk_ref.is_dangling(), + "Internal error. Dangling peek ref.", + ); + + self.peek_distance -= 1; + } + + false + } +} diff --git a/work/crates/main/src/incremental/document.rs b/work/crates/main/src/incremental/document.rs new file mode 100644 index 0000000..a047a37 --- /dev/null +++ b/work/crates/main/src/incremental/document.rs @@ -0,0 +1,1173 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, Ref}, + incremental::{ + cursor::DocumentCursor, + errors::DocumentErrorIterator, + lexis::IncrementalLexisSession, + storage::{ChildRefIndex, ClusterCache, References, Tree}, + syntax::IncrementalSyntaxSession, + }, + lexis::{ + utils::{split_left, split_right}, + Length, + Site, + SiteRefInner, + SiteSpan, + SourceCode, + ToSpan, + TokenBuffer, + TokenCount, + CHUNK_SIZE, + }, + std::*, + syntax::{Cluster, NoSyntax, Node, NodeRef, SyntaxTree, NON_ROOT_RULE, ROOT_RULE}, +}; + +/// An incrementally managed compilation unit. +/// +/// Document is a storage of a compilation unit(a source code of the file) with incremental update +/// operations. Document object stores the source code, the lexical structure of the code, and the +/// syntax structure of the code. This is the main entry point of the crate API. +/// +/// Document is responsible to load the source code, to parse the source code grammar and to +/// construct lexical and syntax structure of the code, and to perform update operations in +/// incremental way keeping the code, lexis and syntax structures in sync with the changes. +/// +/// Depending on the end compilation system needs there could be several instances of this object +/// per each compilation unit(per each file of the file structure of compiled project). +/// +/// ## Instantiation. +/// +/// An API user specifies Document grammar using generic type parameter `N` of the +/// [Node](crate::syntax::Node) type. +/// +/// To opt out syntax analysis stage(e.g. if the syntax grammar unknown or not needed in particular +/// case), an API user uses special implementation of the Node called +/// [`NoSyntax`](crate::syntax::NoSyntax) that enforces Document to skip syntax analysis +/// and the Syntax Tree construction, but persists lexical structure only. +/// +/// There are three ways to initially load the source code text into the Document: +/// 1. By loading from the relatively small string snippet. +/// ```rust +/// use lady_deirdre::{Document, syntax::SimpleNode}; +/// +/// let _ = Document::::from("test string"); +/// ``` +/// 2. By initializing an empty Document, and using [write](Document::write) operation on +/// the instance. +/// ```rust +/// use lady_deirdre::{Document, syntax::SimpleNode}; +/// +/// let mut doc = Document::::default(); +/// doc.write(.., "test string"); +/// ``` +/// 3. And using dedicated [TokenBuffer](crate::lexis::Tokens) instance to preload large file. +/// ```rust +/// use lady_deirdre::{Document, syntax::SimpleNode, lexis::TokenBuffer}; +/// +/// let mut buffer = TokenBuffer::default(); +/// buffer.append("First line.\n"); +/// buffer.append("Second line.\nThird line.\n"); +/// +/// let _doc = buffer.into_document::(); +/// ``` +/// +/// As the TokenBuffer provides functionality for fast line-by-line lexis pre-parsing the last +/// option is the most preferable(but the most verbose) way for production use. +/// +/// ## Updating. +/// +/// An API user performs write operations into the Document using [write](Document::write) +/// function specifying a [Span](crate::lexis::ToSpan) of the code to rewrite(possibly empty span), +/// and a string to insert in place of this spanned test. Document performs update operations in +/// time relative to the user changes, so it is totally fine to call this function on every end-user +/// input action even on large documents. +/// +/// ```rust +/// use lady_deirdre::{Document, syntax::SimpleNode, lexis::CodeContent}; +/// +/// let mut doc = Document::::from("test string"); +/// +/// // Writing another string in the begin of the Document. +/// doc.write(0..0, "Foo "); +/// assert_eq!(doc.substring(..), "Foo test string"); +/// +/// // Removing "test " substring. +/// doc.write(4..9, ""); +/// assert_eq!(doc.substring(..), "Foo string"); +/// +/// // Surrounding substring "str" with parenthesis. +/// doc.write(4..7, "(str)"); +/// assert_eq!(doc.substring(..), "Foo (str)ing"); +/// ``` +/// +/// There are several ways to specify this Span. In particular, an API use can utilize simple ranges +/// of character absolute indices([Sites](crate::lexis::Site) as in the example above), ranges of +/// the column-row [Positions](crate::lexis::Position), or ranges of the +/// [token weak references](crate::lexis::TokenRef). +/// +/// ## Inspecting Lexis Structure. +/// +/// Document implements the [SourceCode](crate::lexis::SourceCode) trait and the +/// [CodeContent](crate::lexis::CodeContent) extension trait that provide lexical structure +/// inspection features. +/// +/// ```rust +/// use lady_deirdre::{ +/// Document, +/// lexis::{SourceCode, CodeContent, SimpleToken}, +/// syntax::SimpleNode, +/// }; +/// +/// let doc = Document::::from("foo bar baz"); +/// +/// // A number of characters in the Document. +/// assert_eq!(doc.length(), 11); +/// +/// // A number of tokens in the Document(including whitespace tokens). +/// assert_eq!(doc.token_count(), 5); +/// +/// // A substring from the Document source code. +/// assert_eq!(doc.substring(1..6), "oo ba"); +/// +/// // A set of lengths of the tokens that "touch" specified span. +/// assert_eq!(doc.chunks(5..7).map(|chunk| chunk.length).collect::>(), vec![3, 1]); +/// +/// // A set of strings of the tokens that "touch" specified span. +/// assert_eq!(doc.chunks(5..7).map(|chunk| chunk.string).collect::>(), vec!["bar", " "]); +/// ``` +/// +/// An API users utilizes lower-level [TokenCursor](crate::lexis::TokenCursor) API to traverse and +/// to inspect individual tokens metadata. +/// +/// ```rust +/// use lady_deirdre::{ +/// Document, +/// lexis::{SourceCode, CodeContent, TokenCursor, SimpleToken}, +/// syntax::SimpleNode +/// }; +/// +/// let mut doc = Document::::from("foo bar baz"); +/// +/// // A generic "iterator" over the tokens at the specified Site(token "bar"). +/// let mut cursor = doc.cursor(5..5); +/// +/// // A reference of the first token "bar" from this cursor. +/// let token_ref = cursor.token_ref(0); +/// +/// // "bar" is of "Identifier" type. +/// assert_eq!(token_ref.deref(&doc), Some(&SimpleToken::Identifier)); +/// assert_eq!(token_ref.string(&doc), Some("bar")); +/// +/// // Write something at the beginning of the Document. +/// doc.write(0..0, "123"); +/// assert_eq!(doc.substring(..), "123foo bar baz"); +/// +/// // TokenRef is still dereferencable after the Document changes, because the token was not +/// // affected by these changes. +/// assert_eq!(token_ref.string(&doc), Some("bar")); +/// +/// // And we can write something at the token start Site too. +/// let token_start_site_ref = token_ref.site_ref(); +/// doc.write(token_start_site_ref..token_start_site_ref, "X"); +/// assert_eq!(doc.substring(..), "123foo Xbar baz"); +/// +/// // However, the TokenRef is no longer valid because the token has been rewritten after +/// // the previous write action. +/// assert_eq!(token_ref.string(&doc), None); +/// ``` +/// +/// ## Inspecting Syntax Structure. +/// +/// Document implements the [SyntaxTree](crate::syntax::SyntaxTree) trait that provides +/// Syntax Tree and Syntax Errors access features. +/// +/// ```rust +/// use lady_deirdre::{ +/// Document, +/// syntax::{SimpleNode, SyntaxTree, NodeRef}, +/// lexis::{CodeContent, ToSpan}, +/// }; +/// +/// let mut doc = Document::::from("foo ([bar] {baz})"); +/// +/// // Returns a weak reference to the root os the SyntaxTree. +/// // It is OK to copy this reference and reuse the copy many times. +/// let root_ref = *doc.root(); +/// +/// // A simple parens structure formatter that traverses the Syntax Tree. +/// fn fmt(doc: &Document, node_ref: &NodeRef) -> String { +/// let node = match node_ref.deref(doc) { +/// Some(node) => node, +/// // If the NodeRef is invalid it means that the syntax parser failed +/// // to parse particular part of the source code due to syntax errors. +/// None => return format!("?"), +/// }; +/// +/// let children = match node { +/// SimpleNode::Root { inner } => inner, +/// SimpleNode::Braces { inner } => inner, +/// SimpleNode::Brackets { inner } => inner, +/// SimpleNode::Parenthesis { inner } => inner, +/// }; +/// +/// let children_fmt = children +/// .iter() +/// .map(|node_ref| fmt(doc, node_ref)) +/// .collect::>().join(", "); +/// +/// match node { +/// SimpleNode::Root { .. } => children_fmt, +/// SimpleNode::Braces { .. } => format!("{{{}}}", children_fmt), +/// SimpleNode::Brackets { .. } => format!("[{}]", children_fmt), +/// SimpleNode::Parenthesis { .. } => format!("({})", children_fmt), +/// } +/// } +/// +/// assert_eq!(fmt(&doc, &root_ref).as_str(), "([], {})"); +/// +/// // Writing another bracket snippet at the begin of the Document. +/// doc.write(0..0, "[{x} [y] (z)]"); +/// assert_eq!(doc.substring(..), "[{x} [y] (z)]foo ([bar] {baz})"); +/// assert_eq!(fmt(&doc, &root_ref).as_str(), "[{}, [], ()], ([], {})"); +/// +/// // The Document is resistant to the syntax errors preserving original Tree structure. +/// // Removing the second char "{". +/// doc.write(1..2, ""); +/// assert_eq!(doc.substring(..), "[x} [y] (z)]foo ([bar] {baz})"); +/// assert_eq!(fmt(&doc, &root_ref).as_str(), "[[], ()], ([], {})"); +/// +/// // Collecting syntax errors. +/// let errors = doc.errors() +/// .map(|error| format!("{}: {}", error.span().format(&doc), error)) +/// .collect::>() +/// .join("\n"); +/// assert_eq!( +/// errors.as_str(), +/// "[1:3]: Brackets format mismatch. Expected Braces, Brackets, Parenthesis, or $BracketClose.", +/// ); +/// +/// // Syntax Tree is a mutable structure. +/// // Adding artificial empty braces Node to the Root. +/// { +/// let new_node = SimpleNode::Braces { inner: vec![] }; +/// let new_node_ref = root_ref.cluster().link_node(&mut doc, new_node); +/// +/// match root_ref.deref_mut(&mut doc).unwrap() { +/// SimpleNode::Root { inner } => { inner.push(new_node_ref) }, +/// _ => unreachable!() +/// } +/// } +/// +/// assert_eq!(doc.substring(..), "[x} [y] (z)]foo ([bar] {baz})"); +/// assert_eq!(fmt(&doc, &root_ref).as_str(), "[[], ()], ([], {}), {}"); +/// ``` +pub struct Document { + id: Id, + root_cluster: Cluster, + root_node_ref: NodeRef, + tree: Tree, + token_count: TokenCount, + pub(super) references: References, +} + +impl Drop for Document { + fn drop(&mut self) { + let _ = unsafe { self.tree.free(&mut self.references) }; + } +} + +impl Debug for Document { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + formatter + .debug_struct("Document") + .field("id", &self.id) + .field("length", &self.tree.length()) + .finish_non_exhaustive() + } +} + +impl Identifiable for Document { + #[inline(always)] + fn id(&self) -> &Id { + &self.id + } +} + +impl SourceCode for Document { + type Token = N::Token; + + type Cursor<'code> = DocumentCursor<'code, N>; + + #[inline(always)] + fn contains(&self, chunk_ref: &Ref) -> bool { + self.references.chunks().contains(chunk_ref) + } + + #[inline(always)] + fn get_token(&self, chunk_ref: &Ref) -> Option<&Self::Token> { + let chunk_ref = self.references.chunks().get(chunk_ref)?; + + debug_assert!( + !chunk_ref.is_dangling(), + "Internal error. Dangling chunk ref in the References repository." + ); + + Some(unsafe { chunk_ref.token() }) + } + + #[inline(always)] + fn get_token_mut(&mut self, chunk_ref: &Ref) -> Option<&mut Self::Token> { + let chunk_ref = self.references.chunks().get(chunk_ref)?; + + debug_assert!( + !chunk_ref.is_dangling(), + "Internal error. Dangling chunk ref in the References repository." + ); + + Some(unsafe { chunk_ref.token_mut() }) + } + + #[inline(always)] + fn get_site(&self, chunk_ref: &Ref) -> Option { + let chunk_ref = self.references.chunks().get(chunk_ref)?; + + Some(unsafe { self.tree.site_of(chunk_ref) }) + } + + #[inline(always)] + fn get_string(&self, chunk_ref: &Ref) -> Option<&str> { + let chunk_ref = self.references.chunks().get(chunk_ref)?; + + debug_assert!( + !chunk_ref.is_dangling(), + "Internal error. Dangling chunk ref in the References repository." + ); + + Some(unsafe { chunk_ref.string() }) + } + + #[inline(always)] + fn get_length(&self, chunk_ref: &Ref) -> Option { + let chunk_ref = self.references.chunks().get(chunk_ref)?; + + debug_assert!( + !chunk_ref.is_dangling(), + "Internal error. Dangling chunk ref in the References repository." + ); + + Some(*unsafe { chunk_ref.span() }) + } + + #[inline(always)] + fn cursor(&self, span: impl ToSpan) -> Self::Cursor<'_> { + let span = match span.to_span(self) { + None => panic!("Specified span is invalid."), + + Some(span) => span, + }; + + Self::Cursor::new(self, span) + } + + #[inline(always)] + fn length(&self) -> Length { + self.tree.length() + } + + #[inline(always)] + fn token_count(&self) -> TokenCount { + self.token_count + } +} + +impl SyntaxTree for Document { + type Node = N; + + type ErrorIterator<'document> = DocumentErrorIterator<'document, Self::Node>; + + #[inline(always)] + fn root(&self) -> &NodeRef { + &self.root_node_ref + } + + #[inline(always)] + fn errors(&self) -> Self::ErrorIterator<'_> { + let cursor = self.tree.first(); + let current = (&self.root_cluster.errors).into_iter(); + + Self::ErrorIterator { + id: &self.id, + cursor, + current, + } + } + + #[inline(always)] + fn contains(&self, cluster_ref: &Ref) -> bool { + match cluster_ref { + Ref::Primary => true, + + Ref::Repository { .. } => self.references.clusters().contains(cluster_ref), + + _ => false, + } + } + + #[inline(always)] + fn get_cluster(&self, cluster_ref: &Ref) -> Option<&Cluster> { + match cluster_ref { + Ref::Primary => Some(&self.root_cluster), + + Ref::Repository { .. } => { + let chunk_ref = self.references.clusters().get(cluster_ref)?; + + let cluster_cache = unsafe { chunk_ref.cache()? }; + + Some(&cluster_cache.cluster) + } + + _ => None, + } + } + + #[inline(always)] + fn get_cluster_mut(&mut self, cluster_ref: &Ref) -> Option<&mut Cluster> { + match cluster_ref { + Ref::Primary => Some(&mut self.root_cluster), + + Ref::Repository { .. } => { + let chunk_ref = self.references.clusters().get(cluster_ref)?; + + let cluster_cache = unsafe { chunk_ref.cache_mut()? }; + + Some(&mut cluster_cache.cluster) + } + + _ => None, + } + } +} + +impl Default for Document { + #[inline(always)] + fn default() -> Self { + let id = Id::new(); + let mut tree = Tree::default(); + let mut references = References::default(); + + let root_cluster = Self::initial_parse(&id, &mut tree, &mut references); + + let root_node_ref = NodeRef { + id, + cluster_ref: Ref::Primary, + node_ref: Ref::Primary, + }; + + Self { + id, + root_cluster, + root_node_ref, + tree, + token_count: 0, + references, + } + } +} + +impl From for Document +where + N: Node, + S: Borrow, +{ + #[inline(always)] + fn from(string: S) -> Self { + let mut buffer = TokenBuffer::::default(); + + buffer.append(string.borrow()); + + Self::from_buffer(buffer) + } +} + +impl Document { + #[inline] + pub(crate) fn from_buffer(buffer: TokenBuffer) -> Self { + let id = Id::new(); + + let token_count = buffer.token_count(); + let spans = buffer.spans.into_vec().into_iter(); + let strings = buffer.strings.into_vec().into_iter(); + let tokens = buffer.tokens.into_vec().into_iter(); + + let mut references = References::with_capacity(token_count); + + let mut tree = + unsafe { Tree::from_chunks(&mut references, token_count, spans, strings, tokens) }; + + let root_cluster = Self::initial_parse(&id, &mut tree, &mut references); + + let root_node_ref = NodeRef { + id, + cluster_ref: Ref::Primary, + node_ref: Ref::Primary, + }; + + Self { + id, + root_cluster, + root_node_ref, + tree, + token_count, + references, + } + } + + /// Replaces a spanned substring of the source code with provided `text` string, and re-parses + /// Document's lexical and syntax structure relatively to these changes. + /// + /// Operation performance complexity is relative to the `span` and the `text` size. As such it + /// is fine to call this function frequently for relatively small changes even for the Documents + /// that hold large source codes. For example, it is fine to call this function on every end + /// user keyboard typing actions. + /// + /// The amount of original lexis and syntax structure of the Document to be re-created after + /// this operation completion is not specified. The implementation tends to re-use as much + /// data from the original structures as possible. However, some weak references into the + /// Document [tokens](crate::lexis::TokenRef), [sites](crate::lexis::SiteRef), + /// [nodes](crate::syntax::NodeRef), [clusters](crate::syntax::Cluster) and + /// [errors](crate::syntax::ErrorRef) may obsolete. + /// + /// There are many ways to specify the `span` of the source code. The most trivial way is + /// to use a [Range](std::ops::Range) of characters absolute indices(`120..128`). Another way + /// is to specify a range of the column-row [positions](crate::lexis::Position): + /// `Position::new(10, 20)..Position::new(10..28)`. For details, see + /// [ToSpan](crate::lexis::ToSpan) documentation. + /// + /// Note, that the Span range could be an empty range. In this case the `span` object will + /// specify just a cursor inside the code, and the Write operation becomes an Insertion + /// operation of specified `text`. If `text` is an empty string, Write operation becomes + /// a Deletion operation. + /// + /// ```rust + /// use lady_deirdre::{Document, lexis::CodeContent, syntax::SimpleNode}; + /// + /// let mut doc = Document::::from("foo bar baz"); + /// + /// doc.write(4..7, "BaR"); + /// + /// assert_eq!(doc.substring(..), "foo BaR baz"); + /// ``` + /// + /// Write operation will panic if the `span` cannot be turned into a + /// [SiteSpan](crate::lexis::SiteSpan). In other words, if the Span is not a valid span for this + /// Document instance. This is practically impossible when an API user uses arbitrary numeric + /// values such as ranges of character absolute indices or ranges of Positions, but it could + /// happen, for example, if the user provides a range of [SiteRef](crate::lexis::SiteRef). + /// Because Site weak references could obsolete. In this case an API user could preliminary + /// check span's validity using [is_valid_span](crate::lexis::ToSpan::is_valid_span) function. + /// + #[inline(never)] + pub fn write(&mut self, span: impl ToSpan, text: impl AsRef) { + let span = match span.to_span(self) { + None => panic!("Specified span is invalid."), + + Some(span) => span, + }; + + let text = text.as_ref(); + + if span.is_empty() && text.is_empty() { + return; + } + + let cursor = self.update_lexis(span, text); + + if TypeId::of::() == TypeId::of::::Token>>() { + return; + } + + self.update_syntax(cursor); + } + + #[inline(always)] + pub(super) fn tree(&self) -> &Tree { + &self.tree + } + + fn update_lexis(&mut self, mut span: SiteSpan, text: &str) -> Cover { + let mut head; + let mut head_offset; + let mut tail; + let mut tail_offset; + + match span.start == span.end { + false => { + head_offset = span.start; + head = self.tree.lookup(&mut head_offset); + tail_offset = span.end; + tail = self.tree.lookup(&mut tail_offset); + } + + true => { + head_offset = span.start; + head = self.tree.lookup(&mut head_offset); + tail_offset = head_offset; + tail = head; + } + } + + let mut input = Vec::with_capacity(3); + + if head_offset > 0 { + debug_assert!( + !head.is_dangling(), + "Internal error. Dangling reference with non-zero offset.", + ); + + input.push(split_left(unsafe { head.string() }, head_offset)); + + span.start -= head_offset; + } else { + let moved = match head.is_dangling() { + false => match unsafe { !head.is_first() } { + true => { + unsafe { head.back() } + true + } + + false => false, + }, + + true => { + head = self.tree.last(); + + !head.is_dangling() + } + }; + + if moved { + let head_string = unsafe { head.string() }; + let head_span = unsafe { *head.span() }; + + input.push(head_string); + + span.start -= head_span; + } + } + + if !text.is_empty() { + input.push(text); + } + + if tail_offset > 0 { + debug_assert!( + !tail.is_dangling(), + "Internal error. Dangling reference with non-zero offset.", + ); + + let length = unsafe { *tail.span() }; + + input.push(split_right(unsafe { tail.string() }, tail_offset)); + + span.end += length - tail_offset; + + unsafe { tail.next() } + } + + let mut product = + unsafe { IncrementalLexisSession::run(text.len() / CHUNK_SIZE + 2, &input, tail) }; + + span.end += product.tail_length; + + let mut skip = 0; + + loop { + if head.is_dangling() { + break; + } + + if unsafe { head.same_chunk_as(&product.tail_ref) } { + break; + } + + let product_string = match product.strings.get(skip) { + Some(string) => string.as_str(), + None => break, + }; + + let head_string = unsafe { head.string() }; + + if product_string == head_string { + let head_span = unsafe { *head.span() }; + + span.start += head_span; + product.length -= head_span; + skip += 1; + + unsafe { head.next() }; + + continue; + } + + break; + } + + loop { + if product.count() == skip { + break; + } + + if unsafe { head.same_chunk_as(&product.tail_ref) } { + break; + } + + let last = match product.tail_ref.is_dangling() { + false => { + let mut previous = product.tail_ref; + + unsafe { previous.back() }; + + previous + } + + true => self.tree.last(), + }; + + if last.is_dangling() { + break; + } + + let product_string = match product.strings.last() { + Some(string) => string.as_str(), + None => break, + }; + + let last_string = unsafe { last.string() }; + + if product_string == last_string { + let last_span = unsafe { *last.span() }; + + span.end -= last_span; + + let _ = product.spans.pop(); + let _ = product.strings.pop(); + let _ = product.tokens.pop(); + + product.length -= last_span; + product.tail_ref = last; + + continue; + } + + break; + } + + if head.is_dangling() { + debug_assert!( + product.tail_ref.is_dangling(), + "Internal error. Dangling head and non-dangling tail.", + ); + + let token_count = product.count() - skip; + + let tail_tree = unsafe { + Tree::from_chunks( + &mut self.references, + token_count, + product.spans.into_iter().skip(skip), + product.strings.into_iter().skip(skip), + product.tokens.into_iter().skip(skip), + ) + }; + + let insert_span = tail_tree.length(); + + unsafe { self.tree.join(&mut self.references, tail_tree) }; + + self.token_count += token_count; + + let chunk_ref = { + let mut point = span.start; + + let chunk_ref = self.tree.lookup(&mut point); + + debug_assert_eq!(point, 0, "Internal error. Bad span alignment."); + + chunk_ref + }; + + return Cover { + chunk_ref, + span: span.start..(span.start + insert_span), + lookahead: 0, + }; + } + + let insert_count = product.count() - skip; + + if let Some(remove_count) = unsafe { head.continuous_to(&product.tail_ref) } { + if unsafe { self.tree.is_writeable(&head, remove_count, insert_count) } { + let (chunk_ref, insert_span) = unsafe { + self.tree.write( + &mut self.references, + head, + remove_count, + insert_count, + product.spans.into_iter().skip(skip), + product.strings.into_iter().skip(skip), + product.tokens.into_iter().skip(skip), + ) + }; + + self.token_count += insert_count; + self.token_count -= remove_count; + + return Cover { + chunk_ref, + span: span.start..(span.start + insert_span), + lookahead: 0, + }; + } + } + + let mut middle = unsafe { self.tree.split(&mut self.references, head) }; + + let middle_split_point = { + let mut point = span.end - span.start; + + let chunk_ref = middle.lookup(&mut point); + + debug_assert_eq!(point, 0, "Internal error. Bad span alignment."); + + chunk_ref + }; + + let right = unsafe { middle.split(&mut self.references, middle_split_point) }; + + let remove_count; + let insert_span; + + { + let replacement = unsafe { + Tree::from_chunks( + &mut self.references, + insert_count, + product.spans.into_iter().skip(skip), + product.strings.into_iter().skip(skip), + product.tokens.into_iter().skip(skip), + ) + }; + + insert_span = replacement.length(); + + remove_count = unsafe { replace(&mut middle, replacement).free(&mut self.references) }; + }; + + unsafe { self.tree.join(&mut self.references, middle) }; + unsafe { self.tree.join(&mut self.references, right) }; + + self.token_count += insert_count; + self.token_count -= remove_count; + + head = { + let mut point = span.start; + + let chunk_ref = self.tree.lookup(&mut point); + + debug_assert_eq!(point, 0, "Internal error. Bad span alignment."); + + chunk_ref + }; + + Cover { + chunk_ref: head, + span: span.start..(span.start + insert_span), + lookahead: 0, + } + } + + fn update_syntax(&mut self, mut cover: Cover) { + loop { + let mut shift; + let mut rule; + + match cover.chunk_ref.is_dangling() { + false => match unsafe { cover.chunk_ref.is_first() } { + true => { + shift = 0; + rule = ROOT_RULE; + } + + false => { + unsafe { cover.chunk_ref.back() }; + + shift = unsafe { *cover.chunk_ref.span() }; + + rule = NON_ROOT_RULE; + } + }, + + true => match self.tree.length() == 0 { + true => { + shift = 0; + rule = ROOT_RULE; + } + + false => { + cover.chunk_ref = self.tree.last(); + + shift = unsafe { *cover.chunk_ref.span() }; + + rule = NON_ROOT_RULE; + } + }, + } + + if rule != ROOT_RULE { + loop { + { + match unsafe { cover.chunk_ref.cache() } { + None => { + unsafe { cover.chunk_ref.back() }; + + match cover.chunk_ref.is_dangling() { + false => { + shift += unsafe { *cover.chunk_ref.span() }; + continue; + } + + true => { + rule = ROOT_RULE; + break; + } + } + } + + Some(cache_cluster) => { + let parse_end_site = unsafe { cache_cluster.end_site(self) }; + + if let Some(parse_end_site) = parse_end_site { + if parse_end_site + cache_cluster.lookahead < cover.span.start { + unsafe { cover.chunk_ref.back() }; + + match cover.chunk_ref.is_dangling() { + false => { + shift += unsafe { *cover.chunk_ref.span() }; + continue; + } + + true => { + rule = ROOT_RULE; + break; + } + } + } + + if parse_end_site >= cover.span.end { + cover.span.start -= shift; + cover.span.end = parse_end_site; + cover.lookahead = cache_cluster.lookahead; + rule = cache_cluster.rule; + break; + } + } + } + } + } + + let ref_index = unsafe { cover.chunk_ref.remove_cache() }; + + unsafe { self.references.clusters_mut().remove_unchecked(ref_index) }; + } + } + + match rule == ROOT_RULE { + false => { + let cluster_ref = unsafe { + let cluster_ref_index = cover.chunk_ref.cache_index(); + + self.references.clusters_mut().make_ref(cluster_ref_index) + }; + + let (cluster_cache, parsed_end_site, _lookahead) = unsafe { + IncrementalSyntaxSession::run( + &self.id, + &mut self.tree, + &mut self.references, + rule, + cover.span.start, + cover.chunk_ref, + cluster_ref, + ) + }; + + unsafe { cover.chunk_ref.update_cache(cluster_cache) }; + + //todo check lookahead too + if cover.span.end == parsed_end_site { + break; + } + + cover.span.end = cover.span.end.max(parsed_end_site); + } + + true => { + let head = self.tree.first(); + + let (cluster_cache, mut parsed_end_site, _lookahead) = unsafe { + IncrementalSyntaxSession::run( + &self.id, + &mut self.tree, + &mut self.references, + ROOT_RULE, + 0, + head, + Ref::Primary, + ) + }; + + self.root_cluster = cluster_cache.cluster; + + let mut tail = self.tree.lookup(&mut parsed_end_site); + + debug_assert_eq!( + parsed_end_site, 0, + "Internal error. Incorrect span alignment." + ); + + while !tail.is_dangling() { + let has_cache = unsafe { tail.cache().is_some() }; + + if has_cache { + let ref_index = unsafe { tail.remove_cache() }; + + unsafe { self.references.clusters_mut().remove_unchecked(ref_index) }; + } + + unsafe { tail.next() } + } + + break; + } + } + } + } + + // Safety: + // 1. All references of the `tree` belong to `references` instance. + #[inline(always)] + fn initial_parse<'document>( + id: &'document Id, + tree: &'document mut Tree, + references: &'document mut References, + ) -> Cluster { + let head = tree.first(); + + let (cluster_cache, _parsed_end_site, _lookahead) = unsafe { + IncrementalSyntaxSession::run(id, tree, references, ROOT_RULE, 0, head, Ref::Primary) + }; + + cluster_cache.cluster + } +} + +struct Cover { + chunk_ref: ChildRefIndex, + span: SiteSpan, + lookahead: Length, +} + +impl ClusterCache { + // Safety: + // 1. ClusterCache belongs to specified `document` instance. + #[inline(always)] + pub(super) unsafe fn jump_to_end( + &self, + tree: &Tree, + references: &References, + ) -> (Site, ChildRefIndex) { + match self.parsed_end.inner() { + SiteRefInner::ChunkStart(token_ref) => { + let chunk_ref_index = match &token_ref.chunk_ref { + Ref::Repository { index, .. } => *index, + + _ => { + #[cfg(debug_assertions)] + { + unreachable!( + "Internal error. Incorrect cluster cache end site Ref type.", + ); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + let chunk_ref = unsafe { references.chunks().get_unchecked(chunk_ref_index) }; + + let site = unsafe { tree.site_of(chunk_ref) }; + + (site, *chunk_ref) + } + + SiteRefInner::CodeEnd(_) => (tree.length(), ChildRefIndex::dangling()), + } + } + + // Safety: + // 1. ClusterCache belongs to specified `document` instance. + #[inline(always)] + unsafe fn end_site(&self, document: &Document) -> Option { + match self.parsed_end.inner() { + SiteRefInner::ChunkStart(token_ref) => { + let chunk_ref = document.references.chunks().get(&token_ref.chunk_ref)?; + + Some(unsafe { document.tree.site_of(chunk_ref) }) + } + + SiteRefInner::CodeEnd(_) => Some(document.tree.length()), + } + } +} diff --git a/work/crates/main/src/incremental/errors.rs b/work/crates/main/src/incremental/errors.rs new file mode 100644 index 0000000..b631d69 --- /dev/null +++ b/work/crates/main/src/incremental/errors.rs @@ -0,0 +1,91 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, RepositoryIterator}, + incremental::storage::ChildRefIndex, + std::*, + syntax::Node, +}; + +pub struct DocumentErrorIterator<'document, N: Node> { + pub(super) id: &'document Id, + pub(super) cursor: ChildRefIndex, + pub(super) current: RepositoryIterator<'document, N::Error>, +} + +impl<'document, N: Node> Identifiable for DocumentErrorIterator<'document, N> { + #[inline(always)] + fn id(&self) -> &Id { + self.id + } +} + +impl<'document, N: Node> Iterator for DocumentErrorIterator<'document, N> { + type Item = &'document N::Error; + + #[inline] + fn next(&mut self) -> Option { + if let Some(error) = self.current.next() { + return Some(error); + } + + while !self.cursor.is_dangling() { + if let Some(cache) = unsafe { self.cursor.cache() } { + let mut iterator = (&cache.cluster.errors).into_iter(); + + match iterator.next() { + None => (), + + Some(result) => { + unsafe { self.cursor.next() }; + + self.current = iterator; + + return Some(result); + } + } + } + + unsafe { self.cursor.next() }; + } + + None + } +} + +impl<'document, N: Node> FusedIterator for DocumentErrorIterator<'document, N> {} diff --git a/work/crates/main/src/incremental/lexis.rs b/work/crates/main/src/incremental/lexis.rs new file mode 100644 index 0000000..a8ad069 --- /dev/null +++ b/work/crates/main/src/incremental/lexis.rs @@ -0,0 +1,469 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + incremental::storage::ChildRefIndex, + lexis::{ + utils::{get_lexis_character, NULL}, + ByteIndex, + Length, + LexisSession, + Site, + Token, + TokenCount, + }, + std::*, + syntax::Node, +}; + +pub(super) struct IncrementalLexisSession<'source, N: Node> { + input: Input<'source>, + product: Product, + next_cursor: Cursor, + begin_cursor: Cursor, + start_cursor: Cursor, + end_cursor: Cursor, + submission_site: Site, + submission_string: String, +} + +impl<'source, N: Node> LexisSession for IncrementalLexisSession<'source, N> { + #[inline(always)] + fn advance(&mut self) { + self.next_cursor.advance(self.input); + } + + #[inline(always)] + fn character(&self) -> char { + self.next_cursor.character + } + + #[inline(always)] + fn submit(&mut self) { + self.end_cursor = self.next_cursor; + } + + #[inline] + fn substring(&mut self) -> &str { + if self.end_cursor.site == self.submission_site { + return self.submission_string.as_str(); + } + + self.submission_site = self.end_cursor.site; + + self.submission_string.clear(); + + if self.start_cursor.site != self.end_cursor.site { + substring_to( + self.input, + &self.start_cursor, + &self.end_cursor, + &mut self.submission_string, + ); + } + + self.submission_string.as_str() + } +} + +impl<'source, N: Node> IncrementalLexisSession<'source, N> { + //Safety: + // 1. `tail` is a Page reference(possibly dangling). + // 2. `tail`'s Tree is immutable during `'source` lifetime. + // 3. `'source` does not outlive `tail`'s Tree. + // 4. `input` is not empty. + // 5. Each item in `input` is not empty. + #[inline] + pub(super) unsafe fn run( + product_capacity: TokenCount, + input: Input<'source>, + tail: ChildRefIndex, + ) -> Product { + let start_character = match input.first() { + Some(first) => { + debug_assert!( + !first.is_empty(), + "Internal error. Empty input first string.", + ); + + unsafe { get_lexis_character(first.chars()) } + } + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Empty Lexer input."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + let cursor = Cursor { + site: 0, + input_index: 0, + input_byte: 0, + character: start_character, + tail_ref: tail, + tail_length: 0, + }; + + let mut session = Self { + input, + product: Product { + length: 0, + spans: Vec::with_capacity(product_capacity), + strings: Vec::with_capacity(product_capacity), + tokens: Vec::with_capacity(product_capacity), + tail_ref: tail, + tail_length: 0, + }, + next_cursor: cursor, + begin_cursor: cursor, + start_cursor: cursor, + end_cursor: cursor, + submission_site: 0, + submission_string: String::new(), + }; + + loop { + let token = ::new(&mut session); + + if session.start_cursor.site != session.end_cursor.site { + let submission = session.get_submission(); + + session.product.push( + token, + &session.start_cursor, + &session.end_cursor, + submission, + ); + + if session.try_finish() { + break; + } + + continue; + } + + if session.enter_mismatch_loop(token) { + break; + } + } + + session.product + } + + // Returns true if the parsing process supposed to stop + #[inline] + fn enter_mismatch_loop(&mut self, mismatch: N::Token) -> bool { + loop { + self.start_cursor.advance(self.input); + self.next_cursor = self.start_cursor; + + if self.start_cursor.character == NULL { + self.product.push( + mismatch, + &self.begin_cursor, + &self.start_cursor, + self.get_rejection(), + ); + + return true; + } + + let token = ::new(self); + + if self.start_cursor.site < self.end_cursor.site { + self.product.push( + mismatch, + &self.begin_cursor, + &self.start_cursor, + self.get_rejection(), + ); + + let submission = self.get_submission(); + + self.product + .push(token, &self.start_cursor, &self.end_cursor, submission); + + return self.try_finish(); + } + } + } + + #[inline] + fn get_submission(&mut self) -> String { + if self.end_cursor.site != self.submission_site { + self.submission_site = self.end_cursor.site; + self.submission_string.clear(); + + substring_to( + self.input, + &self.start_cursor, + &self.end_cursor, + &mut self.submission_string, + ); + } + + return self.submission_string.clone(); + } + + #[inline] + fn get_rejection(&self) -> String { + let mut rejection = String::new(); + + substring_to( + self.input, + &self.begin_cursor, + &self.start_cursor, + &mut rejection, + ); + + rejection + } + + // Returns true if the parsing process supposed to stop + #[inline(always)] + fn try_finish(&mut self) -> bool { + if self.end_cursor.character == NULL { + return true; + } + + if self.end_cursor.input_byte == 0 && self.end_cursor.input_index >= self.input.len() { + return true; + } + + self.reset(); + + return false; + } + + #[inline(always)] + fn reset(&mut self) { + self.begin_cursor = self.end_cursor; + self.start_cursor = self.end_cursor; + self.next_cursor = self.end_cursor; + self.submission_string.clear(); + } +} + +pub(super) type Input<'source> = &'source [&'source str]; + +#[inline] +fn substring_to(input: Input, from: &Cursor, to: &Cursor, target: &mut String) { + if from.input_index == to.input_index { + debug_assert!( + from.input_byte <= to.input_byte, + "Internal error. From cursor is ahead of To cursor.", + ); + + let string = match from.input_index < input.len() { + true => unsafe { *input.get_unchecked(from.input_index) }, + + false => match from.tail_ref.is_dangling() { + true => unsafe { from.tail_ref.string() }, + false => "", + }, + }; + + target.push_str(unsafe { string.get_unchecked(from.input_byte..to.input_byte) }); + + return; + } + + let mut chunk_ref = from.tail_ref; + + for index in from.input_index..=to.input_index { + let string = match index < input.len() { + true => unsafe { *input.get_unchecked(index) }, + + false => match chunk_ref.is_dangling() { + false => { + let string = unsafe { from.tail_ref.string() }; + + unsafe { chunk_ref.next() }; + + string + } + true => "", + }, + }; + + if index == from.input_index { + target.push_str(unsafe { string.get_unchecked(from.input_byte..) }); + continue; + } + + if index == to.input_index { + target.push_str(unsafe { string.get_unchecked(0..to.input_byte) }); + continue; + } + + target.push_str(string); + } +} + +pub(super) struct Product { + pub(super) length: Length, + pub(super) spans: Vec, + pub(super) strings: Vec, + pub(super) tokens: Vec, + pub(super) tail_ref: ChildRefIndex, + pub(super) tail_length: Length, +} + +impl Product { + #[inline(always)] + pub(super) fn count(&self) -> TokenCount { + self.spans.len() + } + + #[inline(always)] + fn push(&mut self, token: N::Token, from: &Cursor, to: &Cursor, string: String) { + let span = to.site - from.site; + + self.length += span; + + self.spans.push(span); + self.strings.push(string); + self.tokens.push(token); + self.tail_ref = to.tail_ref; + self.tail_length = to.tail_length; + } +} + +struct Cursor { + site: Site, + input_index: usize, + input_byte: ByteIndex, + character: char, + tail_ref: ChildRefIndex, + tail_length: Length, +} + +impl Clone for Cursor { + #[inline(always)] + fn clone(&self) -> Self { + *self + } +} + +impl Copy for Cursor {} + +impl Cursor { + #[inline] + fn advance(&mut self, input: Input) { + if self.character == NULL { + return; + } + + self.site += 1; + self.input_byte += self.character.len_utf8(); + + match self.input_index < input.len() { + true => { + let string = unsafe { *input.get_unchecked(self.input_index) }; + + if self.input_byte < string.len() { + self.character = unsafe { + get_lexis_character(string.get_unchecked(self.input_byte..).chars()) + }; + + return; + } + + self.input_index += 1; + self.input_byte = 0; + + if self.input_index < input.len() { + let string = unsafe { input.get_unchecked(self.input_index) }; + + debug_assert!(!string.is_empty(), "Internal error. Empty input string."); + + self.character = unsafe { get_lexis_character(string.chars()) }; + + return; + } + + if self.tail_ref.is_dangling() { + self.character = NULL; + return; + } + + let string = unsafe { self.tail_ref.string() }; + + debug_assert!(!string.is_empty(), "Internal error. Empty tail string."); + + self.character = unsafe { get_lexis_character(string.chars()) }; + } + + false => { + self.tail_length += 1; + + let string = unsafe { self.tail_ref.string() }; + + if self.input_byte < string.len() { + self.character = unsafe { + get_lexis_character(string.get_unchecked(self.input_byte..).chars()) + }; + + return; + } + + self.input_index += 1; + self.input_byte = 0; + + unsafe { self.tail_ref.next() } + + if self.tail_ref.is_dangling() { + self.character = NULL; + return; + } + + let string = unsafe { self.tail_ref.string() }; + + debug_assert!(!string.is_empty(), "Internal error. Empty tail string."); + + self.character = unsafe { get_lexis_character(string.chars()) }; + } + } + } +} diff --git a/work/crates/main/src/incremental/mod.rs b/work/crates/main/src/incremental/mod.rs new file mode 100644 index 0000000..9deced7 --- /dev/null +++ b/work/crates/main/src/incremental/mod.rs @@ -0,0 +1,45 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +mod cursor; +mod document; +mod errors; +mod lexis; +mod storage; +mod syntax; + +pub use crate::incremental::document::Document; diff --git a/work/crates/main/src/incremental/storage/branch.rs b/work/crates/main/src/incremental/storage/branch.rs new file mode 100644 index 0000000..9be9182 --- /dev/null +++ b/work/crates/main/src/incremental/storage/branch.rs @@ -0,0 +1,1354 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + incremental::storage::{ + child::{ChildCount, ChildIndex, ChildRefIndex}, + item::{Item, ItemRef, ItemRefVariant, Split}, + nesting::{BranchLayer, Height, Layer, LayerDescriptor, PageLayer}, + references::References, + utils::{array_copy_to, array_shift, capacity}, + }, + lexis::Length, + std::*, + syntax::Node, +}; + +const BRANCHING: ChildCount = 6; + +#[repr(transparent)] +pub(super) struct Branch { + pub(super) inner: BranchInner, + pub(super) child_layer: PhantomData, +} + +pub(super) struct BranchInner { + pub(super) parent: ChildRefIndex, + pub(super) occupied: ChildCount, + pub(super) spans: [Length; capacity(BRANCHING)], + pub(super) children: [ItemRefVariant; capacity(BRANCHING)], +} + +impl Item for Branch { + const BRANCHING: ChildCount = BRANCHING; + + type Node = N; + + #[inline(always)] + fn occupied(&self) -> ChildCount { + self.inner.occupied + } + + #[inline(always)] + unsafe fn copy_to( + &mut self, + to: &mut Self, + source: ChildCount, + destination: ChildCount, + count: ChildCount, + ) { + debug_assert!( + source + count <= self.inner.occupied, + "Internal error. An attempt to copy non occupied data in Branch.", + ); + + unsafe { + array_copy_to( + &mut self.inner.spans, + &mut to.inner.spans, + source, + destination, + count, + ) + }; + unsafe { + array_copy_to( + &mut self.inner.children, + &mut to.inner.children, + source, + destination, + count, + ) + }; + } + + #[inline(always)] + unsafe fn inflate(&mut self, from: ChildIndex, count: ChildCount) { + debug_assert!( + from <= self.inner.occupied, + "Internal error. An attempt to inflate from out of bounds child in Branch." + ); + debug_assert!( + count + self.inner.occupied <= capacity(Self::BRANCHING), + "Internal error. An attempt to inflate with overflow in Branch." + ); + debug_assert!( + count > 0, + "Internal error. An attempt to inflate of empty range in Page." + ); + + if from < self.inner.occupied { + unsafe { + array_shift( + &mut self.inner.spans, + from, + from + count, + self.inner.occupied - from, + ) + }; + unsafe { + array_shift( + &mut self.inner.children, + from, + from + count, + self.inner.occupied - from, + ) + }; + } + + self.inner.occupied += count; + } + + #[inline(always)] + unsafe fn deflate(&mut self, from: ChildIndex, count: ChildCount) -> bool { + debug_assert!( + from < self.inner.occupied, + "Internal error. An attempt to deflate from non occupied child in Branch." + ); + debug_assert!( + from + count <= self.inner.occupied, + "Internal error. An attempt to deflate with overflow in Branch." + ); + debug_assert!( + count > 0, + "Internal error. An attempt to deflate of empty range." + ); + + if from + count < self.inner.occupied { + unsafe { + array_shift( + &mut self.inner.spans, + from + count, + from, + self.inner.occupied - from - count, + ) + }; + unsafe { + array_shift( + &mut self.inner.children, + from + count, + from, + self.inner.occupied - from - count, + ) + }; + } + + self.inner.occupied -= count; + + self.inner.occupied >= Self::BRANCHING + } +} + +impl Branch { + #[inline(always)] + pub(super) fn new(occupied: ChildCount) -> BranchRef { + debug_assert!( + occupied > 0, + "Internal error. An attempt to create Branch with zero occupied values." + ); + + debug_assert!( + occupied <= capacity(Self::BRANCHING), + "Internal error. An attempt to create Branch with occupied value exceeding capacity." + ); + + let branch = Self { + inner: BranchInner { + parent: ChildRefIndex::dangling(), + occupied, + spans: Default::default(), + children: Default::default(), + }, + child_layer: PhantomData::default(), + }; + + let pointer = unsafe { NonNull::new_unchecked(Box::leak(Box::new(branch))) }; + + BranchRef { pointer } + } + + // Safety: + // 1. All references belong to `references` instance. + // 2. `height >= 2`. + // 3. `height` fits to `ChildLayer`. + pub(crate) unsafe fn free( + mut self, + height: Height, + references: &mut References, + ) -> ChildCount { + let mut child_count = 0; + + for index in 0..self.inner.occupied { + let child = unsafe { self.inner.children.get_unchecked_mut(index) }; + + match height { + 0 | 1 => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Incorrect height."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + + 2 => { + debug_assert!( + matches!(ChildLayer::descriptor(), LayerDescriptor::Page), + "Internal error. Incorrect height.", + ); + + let page_ref = *unsafe { child.as_page_ref() }; + + let page = unsafe { page_ref.into_owned() }; + + child_count += unsafe { page.free(references) }; + } + + 3 => { + debug_assert!( + matches!(ChildLayer::descriptor(), LayerDescriptor::Branch), + "Internal error. Incorrect height.", + ); + + let branch_ref = *unsafe { child.as_branch_ref::() }; + + let branch = unsafe { branch_ref.into_owned() }; + + child_count += unsafe { branch.free(height - 1, references) } + } + + _ => { + debug_assert!( + matches!(ChildLayer::descriptor(), LayerDescriptor::Branch), + "Internal error. Incorrect height.", + ); + + let branch_ref = *unsafe { child.as_branch_ref::() }; + + let branch = unsafe { branch_ref.into_owned() }; + + child_count += unsafe { branch.free(height - 1, references) } + } + } + } + + child_count + } + + // Safety: + // 1. `ChildLayer` correctly describes children layer. + // 2. `count > 0` + // 3. `self` data within `from..(from + count)` range is occupied. + // 4. `self_variant` resolves to self pointer. + #[inline(always)] + unsafe fn update_children( + &mut self, + self_variant: ItemRefVariant, + from: ChildIndex, + count: ChildCount, + ) -> Length { + let mut length = 0; + + for index in from..(from + count) { + length += *unsafe { self.inner.spans.get_unchecked(index) }; + + let child = unsafe { self.inner.children.get_unchecked_mut(index) }; + + unsafe { + child.set_parent::(ChildRefIndex { + item: self_variant, + index, + }) + }; + } + + length + } +} + +#[repr(transparent)] +pub(super) struct BranchRef { + pointer: NonNull>, +} + +impl Clone for BranchRef { + #[inline(always)] + fn clone(&self) -> Self { + *self + } +} + +impl Copy for BranchRef {} + +impl ItemRef for BranchRef { + type SelfLayer = BranchLayer; + + type Item = Branch; + + #[inline(always)] + fn dangling() -> Self { + Self { + pointer: NonNull::dangling(), + } + } + + #[inline(always)] + unsafe fn as_ref(&self) -> &Self::Item { + unsafe { self.pointer.as_ref() } + } + + #[inline(always)] + unsafe fn as_mut(&mut self) -> &mut Self::Item { + unsafe { self.pointer.as_mut() } + } + + #[inline(always)] + unsafe fn into_variant(self) -> ItemRefVariant { + ItemRefVariant::from_branch(self) + } + + #[inline(always)] + unsafe fn into_owned(self) -> Box { + unsafe { Box::from_raw(self.pointer.as_ptr()) } + } + + #[inline(always)] + unsafe fn calculate_length(&self) -> Length { + let branch = unsafe { self.as_ref() }; + + let mut length = 0; + + for index in 0..branch.inner.occupied { + length += unsafe { branch.inner.spans.get_unchecked(index) }; + } + + length + } + + #[inline(always)] + unsafe fn parent(&self) -> &ChildRefIndex { + unsafe { &self.as_ref().inner.parent } + } + + #[inline(always)] + unsafe fn set_parent(&mut self, parent: ChildRefIndex) { + unsafe { self.as_mut().inner.parent = parent }; + } + + #[inline(always)] + unsafe fn parent_mut(&mut self) -> &mut BranchRef { + let parent_ref_index = unsafe { &mut self.as_mut().inner.parent }; + + debug_assert!( + !parent_ref_index.is_dangling(), + "Internal error. An attempt to get parent from root.", + ); + + unsafe { parent_ref_index.item.as_branch_mut() } + } + + #[inline(always)] + unsafe fn update_children( + &mut self, + _references: &mut References, + from: ChildIndex, + count: ChildCount, + ) -> Length { + let item = ItemRefVariant::from_branch(*self); + + let branch = unsafe { self.pointer.as_mut() }; + + unsafe { branch.update_children(item, from, count) } + } + + #[inline] + unsafe fn split( + &mut self, + references: &mut References, + mut children_split: Split, + length: Length, + from: ChildIndex, + ) -> Split { + let mut parent_split = Split::dangling(); + + let occupied = unsafe { self.as_ref().inner.occupied }; + + debug_assert!( + from < occupied, + "Internal error. Split at position out of bounds.", + ); + + match from == 0 { + false => { + let mut right_parent_ref = Branch::::new(occupied - from); + + match children_split.left_span == 0 { + false => match from + 1 == occupied { + false => { + let left_parent_variant = { + let left_parent_variant = unsafe { self.into_variant() }; + + unsafe { + children_split.left_item.set_parent::( + ChildRefIndex { + item: left_parent_variant, + index: from, + }, + ); + } + + let left_parent = unsafe { self.as_mut() }; + + unsafe { + left_parent.copy_to( + right_parent_ref.as_mut(), + from + 1, + 1, + occupied - from - 1, + ) + }; + + unsafe { + *left_parent.inner.spans.get_unchecked_mut(from) = + children_split.left_span + }; + unsafe { + *left_parent.inner.children.get_unchecked_mut(from) = + children_split.left_item + }; + + left_parent.inner.occupied = from + 1; + + left_parent_variant + }; + + let right_parent_variant = { + let right_parent_variant = + unsafe { right_parent_ref.into_variant() }; + + let right_parent = unsafe { right_parent_ref.as_mut() }; + + right_parent.inner.spans[0] = children_split.right_span; + right_parent.inner.children[0] = children_split.right_item; + + right_parent_variant + }; + + let right_parent_span = unsafe { + right_parent_ref.update_children(references, 0, occupied - from) + }; + + parent_split.left_span = length - right_parent_span; + parent_split.left_item = left_parent_variant; + parent_split.right_span = right_parent_span; + parent_split.right_item = right_parent_variant; + } + + true => { + let left_parent_variant = { + let left_parent_variant = unsafe { self.into_variant() }; + + unsafe { + children_split.left_item.set_parent::( + ChildRefIndex { + item: left_parent_variant, + index: from, + }, + ); + } + + let left_parent = unsafe { self.as_mut() }; + + unsafe { + *left_parent.inner.spans.get_unchecked_mut(from) = + children_split.left_span + }; + unsafe { + *left_parent.inner.children.get_unchecked_mut(from) = + children_split.left_item; + } + + left_parent_variant + }; + + let right_parent_variant = { + let right_parent_variant = + unsafe { right_parent_ref.into_variant() }; + + unsafe { + children_split.right_item.set_parent::( + ChildRefIndex { + item: right_parent_variant, + index: 0, + }, + ); + } + + let right_parent = unsafe { right_parent_ref.as_mut() }; + + right_parent.inner.spans[0] = children_split.right_span; + right_parent.inner.children[0] = children_split.right_item; + + right_parent_variant + }; + + parent_split.left_span = length - children_split.right_span; + parent_split.left_item = left_parent_variant; + parent_split.right_span = children_split.right_span; + parent_split.right_item = right_parent_variant; + } + }, + + true => { + let left_parent = unsafe { self.as_mut() }; + + unsafe { + left_parent.copy_to(right_parent_ref.as_mut(), from, 0, occupied - from) + }; + left_parent.inner.occupied = from; + + parent_split.right_span = unsafe { + right_parent_ref.update_children(references, 0, occupied - from) + }; + parent_split.right_item = unsafe { right_parent_ref.into_variant() }; + + parent_split.left_span = length - parent_split.right_span; + parent_split.left_item = self.into_variant(); + } + } + } + + true => match children_split.left_span == 0 { + false => { + let left_parent_variant = { + let mut left_parent_ref = Branch::::new(1); + let left_parent_variant = unsafe { left_parent_ref.into_variant() }; + + unsafe { + children_split + .left_item + .set_parent::(ChildRefIndex { + item: left_parent_variant, + index: 0, + }); + } + + let left_parent = unsafe { left_parent_ref.as_mut() }; + + left_parent.inner.spans[0] = children_split.left_span; + left_parent.inner.children[0] = children_split.left_item; + + left_parent_variant + }; + + let right_parent_variant = { + let right_parent_variant = unsafe { self.into_variant() }; + + unsafe { + children_split + .right_item + .set_parent::(ChildRefIndex { + item: right_parent_variant, + index: 0, + }); + } + + let right_parent = unsafe { self.as_mut() }; + + right_parent.inner.spans[0] = children_split.right_span; + right_parent.inner.children[0] = children_split.right_item; + + right_parent_variant + }; + + parent_split.left_span = children_split.left_span; + parent_split.left_item = left_parent_variant; + parent_split.right_span = length - children_split.left_span; + parent_split.right_item = right_parent_variant; + } + + true => { + parent_split.left_span = 0; + + parent_split.right_span = length; + parent_split.right_item = unsafe { self.into_variant() }; + } + }, + } + + parent_split + } +} + +impl BranchRef { + // Safety: + // 1. `self` is not dangling. + // 2. `ChildLayer` correctly describes children later of `self`. + // 3. `GrandchildLayer` correctly describes children later of the `ChildLayer`. + // 4. All references inside `self` subtree belong to `references` instance. + #[inline] + pub(super) unsafe fn fix_leftmost_balance( + &mut self, + references: &mut References, + ) -> (bool, ItemRefVariant) { + let parent_occupied = unsafe { self.as_ref().occupied() }; + + match parent_occupied { + 0 => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Empty item."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + + 1 => (true, unsafe { self.as_ref().inner.children[0] }), + + _ => { + let mut first_child_variant = unsafe { self.as_ref().inner.children[0] }; + + let first_child_occupied = match ChildLayer::descriptor() { + LayerDescriptor::Branch => unsafe { + first_child_variant + .as_branch_ref::() + .as_ref() + .occupied() + }, + + LayerDescriptor::Page => unsafe { + first_child_variant.as_page_ref().as_ref().occupied() + }, + }; + + if first_child_occupied >= ChildLayer::branching::() { + return (true, first_child_variant); + } + + let mut next_child_variant = unsafe { self.as_ref().inner.children[1] }; + + let next_child_occupied = match ChildLayer::descriptor() { + LayerDescriptor::Branch => unsafe { + next_child_variant + .as_branch_ref::() + .as_ref() + .occupied() + }, + + LayerDescriptor::Page => unsafe { + next_child_variant.as_page_ref().as_ref().occupied() + }, + }; + + if first_child_occupied + next_child_occupied + <= capacity(ChildLayer::branching::()) + { + let addition = match ChildLayer::descriptor() { + LayerDescriptor::Branch => { + let first_child_ref = + unsafe { first_child_variant.as_branch_mut::() }; + + let next_child_ref = + unsafe { next_child_variant.as_branch_mut::() }; + + unsafe { + ItemRef::merge_to_right(first_child_ref, next_child_ref, references) + } + } + + LayerDescriptor::Page => { + let first_child_ref = unsafe { first_child_variant.as_page_mut() }; + + let next_child_ref = unsafe { next_child_variant.as_page_mut() }; + + unsafe { + ItemRef::merge_to_right(first_child_ref, next_child_ref, references) + } + } + }; + + let parent_variant = unsafe { self.into_variant() }; + + let parent = unsafe { self.as_mut() }; + + parent.inner.spans[1] += addition; + + let balanced = unsafe { parent.deflate(0, 1) }; + + let _ = unsafe { parent.update_children(parent_variant, 0, parent.occupied()) }; + + return (balanced, next_child_variant); + } + + let transfer_length = match ChildLayer::descriptor() { + LayerDescriptor::Branch => { + let first_child_ref = + unsafe { first_child_variant.as_branch_mut::() }; + + let next_child_ref = + unsafe { next_child_variant.as_branch_mut::() }; + + unsafe { + ItemRef::balance_to_left(first_child_ref, next_child_ref, references) + } + } + + LayerDescriptor::Page => { + let first_child_ref = unsafe { first_child_variant.as_page_mut() }; + + let next_child_ref = unsafe { next_child_variant.as_page_mut() }; + + unsafe { + ItemRef::balance_to_left(first_child_ref, next_child_ref, references) + } + } + }; + + let parent = unsafe { self.as_mut() }; + + unsafe { parent.inner.spans[0] += transfer_length }; + unsafe { parent.inner.spans[1] -= transfer_length }; + + (true, first_child_variant) + } + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `ChildLayer` correctly describes children later of `self`. + // 3. `GrandchildLayer` correctly describes children later of the `ChildLayer`. + // 4. All references inside `self` subtree belong to `references` instance. + #[inline] + pub(super) unsafe fn fix_rightmost_balance( + &mut self, + references: &mut References, + ) -> (bool, ItemRefVariant) { + let parent_occupied = unsafe { self.as_ref().occupied() }; + + match parent_occupied { + 0 => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Empty item."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + + 1 => (true, unsafe { self.as_ref().inner.children[0] }), + + _ => { + let mut last_child_variant = unsafe { + *self + .as_ref() + .inner + .children + .get_unchecked(parent_occupied - 1) + }; + + let last_child_occupied = match ChildLayer::descriptor() { + LayerDescriptor::Branch => unsafe { + last_child_variant + .as_branch_ref::() + .as_ref() + .occupied() + }, + + LayerDescriptor::Page => unsafe { + last_child_variant.as_page_ref().as_ref().occupied() + }, + }; + + if last_child_occupied >= ChildLayer::branching::() { + return (true, last_child_variant); + } + + let mut previous_child_variant = unsafe { + *self + .as_ref() + .inner + .children + .get_unchecked(parent_occupied - 2) + }; + + let previous_child_occupied = match ChildLayer::descriptor() { + LayerDescriptor::Branch => unsafe { + previous_child_variant + .as_branch_ref::() + .as_ref() + .occupied() + }, + + LayerDescriptor::Page => unsafe { + previous_child_variant.as_page_ref().as_ref().occupied() + }, + }; + + if previous_child_occupied + last_child_occupied + <= capacity(ChildLayer::branching::()) + { + let addition = match ChildLayer::descriptor() { + LayerDescriptor::Branch => { + let previous_child_ref = unsafe { + previous_child_variant.as_branch_mut::() + }; + + let last_child_ref = + unsafe { last_child_variant.as_branch_mut::() }; + + unsafe { + ItemRef::merge_to_left( + previous_child_ref, + last_child_ref, + references, + ) + } + } + + LayerDescriptor::Page => { + let previous_child_ref = + unsafe { previous_child_variant.as_page_mut() }; + + let last_child_ref = unsafe { last_child_variant.as_page_mut() }; + + unsafe { + ItemRef::merge_to_left( + previous_child_ref, + last_child_ref, + references, + ) + } + } + }; + + let parent = unsafe { self.as_mut() }; + + unsafe { + *parent.inner.spans.get_unchecked_mut(parent_occupied - 2) += addition + }; + + parent.inner.occupied -= 1; + + return ( + parent.inner.occupied >= Branch::::BRANCHING, + previous_child_variant, + ); + } + + let transfer_length = match ChildLayer::descriptor() { + LayerDescriptor::Branch => { + let previous_child_ref = + unsafe { previous_child_variant.as_branch_mut::() }; + + let last_child_ref = + unsafe { last_child_variant.as_branch_mut::() }; + + unsafe { + ItemRef::balance_to_right( + previous_child_ref, + last_child_ref, + references, + ) + } + } + + LayerDescriptor::Page => { + let previous_child_ref = unsafe { previous_child_variant.as_page_mut() }; + + let last_child_ref = unsafe { last_child_variant.as_page_mut() }; + + unsafe { + ItemRef::balance_to_right( + previous_child_ref, + last_child_ref, + references, + ) + } + } + }; + + let parent = unsafe { self.as_mut() }; + + unsafe { + *parent.inner.spans.get_unchecked_mut(parent_occupied - 1) += transfer_length + }; + + unsafe { + *parent.inner.spans.get_unchecked_mut(parent_occupied - 2) -= transfer_length + }; + + (true, last_child_variant) + } + } + } + + // Safety: + // 1. `self` is not dangling. + #[inline] + pub(super) unsafe fn inc_span_left(&mut self, addition: Length) { + let mut branch = unsafe { self.as_mut() }; + + loop { + branch.inner.spans[0] += addition; + + match branch.inner.parent.is_dangling() { + true => break, + + false => { + branch = unsafe { branch.inner.parent.item.as_branch_mut().as_mut() }; + } + } + } + } + + // Safety: + // 1. `self` is not dangling. + #[inline] + pub(super) unsafe fn inc_span_right(&mut self, addition: Length) { + let mut branch = unsafe { self.as_mut() }; + + loop { + unsafe { + *branch + .inner + .spans + .get_unchecked_mut(branch.inner.occupied - 1) += addition + }; + + match branch.inner.parent.is_dangling() { + true => break, + + false => { + branch = unsafe { branch.inner.parent.item.as_branch_mut().as_mut() }; + } + } + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `item_variant` is not dangling. + // 3. `ChildLayer` correctly describes `item_variant` type, and the `self` children layer. + // 4. `self` Branch is not a root branch. + #[inline] + pub(super) unsafe fn add_child_left( + &mut self, + root_length: Length, + mut head_subtraction: Length, + mut item_length: Length, + mut item_variant: ItemRefVariant, + ) -> Option> { + let mut branch = unsafe { self.as_mut() }; + + loop { + branch.inner.spans[0] -= head_subtraction; + + match branch.inner.occupied < capacity(Branch::::BRANCHING) { + true => { + let branch_variant = ItemRefVariant::from_branch(BranchRef { + pointer: unsafe { NonNull::new_unchecked(branch) }, + }); + + let parent_ref_index = ChildRefIndex { + item: branch_variant, + index: 0, + }; + + match ChildLayer::descriptor() { + LayerDescriptor::Page => unsafe { + item_variant.as_page_mut().set_parent(parent_ref_index) + }, + + LayerDescriptor::Branch => unsafe { + item_variant + .as_branch_mut::<()>() + .set_parent(parent_ref_index) + }, + } + + unsafe { branch.inflate(0, 1) }; + + branch.inner.children[0] = item_variant; + branch.inner.spans[0] = item_length; + + unsafe { + let _ = + branch.update_children(branch_variant, 1, branch.inner.occupied - 1); + } + + if !branch.inner.parent.is_dangling() { + let parent = + unsafe { branch.inner.parent.item.as_branch_mut::() }; + + unsafe { parent.inc_span_left(item_length - head_subtraction) }; + } + + break; + } + + false => { + let mut new_sibling_ref = Branch::new(Branch::::BRANCHING); + let new_sibling_variant = unsafe { new_sibling_ref.into_variant() }; + let transfer_length; + + { + let new_sibling = unsafe { new_sibling_ref.as_mut() }; + + unsafe { + branch.copy_to( + new_sibling, + 0, + 1, + Branch::::BRANCHING - 1, + ) + } + + transfer_length = unsafe { + new_sibling.update_children( + new_sibling_variant, + 1, + Branch::::BRANCHING - 1, + ) + }; + + new_sibling.inner.spans[0] = item_length; + new_sibling.inner.children[0] = item_variant; + + unsafe { + item_variant.set_parent::(ChildRefIndex { + item: new_sibling_variant, + index: 0, + }); + } + } + + unsafe { + let _ = branch.deflate(0, Branch::::BRANCHING - 1); + } + + let branch_variant = unsafe { + BranchRef { + pointer: NonNull::new_unchecked(branch), + } + .into_variant() + }; + + let _ = unsafe { + branch.update_children( + branch_variant, + 0, + Branch::::BRANCHING, + ) + }; + + item_length += transfer_length; + item_variant = new_sibling_variant; + head_subtraction += transfer_length; + + match branch.inner.parent.is_dangling() { + false => match ChildLayer::descriptor() { + LayerDescriptor::Branch => { + branch = unsafe { + branch + .inner + .parent + .item + .as_branch_mut::() + .as_mut() + }; + continue; + } + + LayerDescriptor::Page => { + return unsafe { + branch + .inner + .parent + .item + .as_branch_mut::() + .add_child_left( + root_length, + head_subtraction, + item_length, + item_variant, + ) + }; + } + }, + + true => { + let mut new_root_ref = Branch::::new(2); + + let new_root_variant = unsafe { new_root_ref.into_variant() }; + + unsafe { + new_sibling_ref.set_parent(ChildRefIndex { + item: new_root_variant, + index: 0, + }); + } + + branch.inner.parent = ChildRefIndex { + item: new_root_variant, + index: 1, + }; + + { + let new_root = unsafe { new_root_ref.as_mut() }; + + new_root.inner.children[0] = new_sibling_variant; + new_root.inner.children[1] = + ItemRefVariant::from_branch(BranchRef { + pointer: NonNull::new_unchecked(branch), + }); + + new_root.inner.spans[0] = item_length; + new_root.inner.spans[1] = root_length - head_subtraction; + } + + return Some(new_root_variant); + } + } + } + } + } + + return None; + } + + // Safety: + // 1. `self` is not dangling. + // 2. `item_variant` is not dangling. + // 3. `ChildLayer` correctly describes `item_variant` type, and the `self` children layer. + // 4. `self` Branch is not a root branch. + #[inline] + pub(super) unsafe fn add_child_right( + &mut self, + root_length: Length, + mut tail_subtraction: Length, + mut item_length: Length, + mut item_variant: ItemRefVariant, + ) -> Option> { + let mut branch = unsafe { self.as_mut() }; + + loop { + unsafe { + *branch + .inner + .spans + .get_unchecked_mut(branch.inner.occupied - 1) -= tail_subtraction + }; + + match branch.inner.occupied < capacity(Branch::::BRANCHING) { + true => { + let parent_ref_index = ChildRefIndex { + item: ItemRefVariant::from_branch(BranchRef { + pointer: unsafe { NonNull::new_unchecked(branch) }, + }), + index: branch.inner.occupied, + }; + + match ChildLayer::descriptor() { + LayerDescriptor::Page => unsafe { + item_variant.as_page_mut().set_parent(parent_ref_index) + }, + + LayerDescriptor::Branch => unsafe { + item_variant + .as_branch_mut::<()>() + .set_parent(parent_ref_index) + }, + } + + unsafe { + *branch + .inner + .children + .get_unchecked_mut(branch.inner.occupied) = item_variant; + } + + unsafe { + *branch.inner.spans.get_unchecked_mut(branch.inner.occupied) = item_length; + } + + branch.inner.occupied += 1; + + if !branch.inner.parent.is_dangling() { + let parent = + unsafe { branch.inner.parent.item.as_branch_mut::() }; + + unsafe { parent.inc_span_right(item_length - tail_subtraction) }; + } + + break; + } + + false => { + let mut new_sibling_ref = Branch::new(Branch::::BRANCHING); + let new_sibling_variant = unsafe { new_sibling_ref.into_variant() }; + let transfer_length; + + { + let new_sibling = unsafe { new_sibling_ref.as_mut() }; + + unsafe { + branch.copy_to( + new_sibling, + Branch::::BRANCHING, + 0, + Branch::::BRANCHING - 1, + ) + } + + transfer_length = unsafe { + new_sibling.update_children( + new_sibling_variant, + 0, + Branch::::BRANCHING - 1, + ) + }; + + new_sibling.inner.spans[Branch::::BRANCHING - 1] = + item_length; + new_sibling.inner.children[Branch::::BRANCHING - 1] = + item_variant; + + unsafe { + item_variant.set_parent::(ChildRefIndex { + item: new_sibling_variant, + index: Branch::::BRANCHING - 1, + }); + } + } + + branch.inner.occupied = Branch::::BRANCHING; + + item_length += transfer_length; + item_variant = new_sibling_variant; + tail_subtraction += transfer_length; + + match branch.inner.parent.is_dangling() { + false => match ChildLayer::descriptor() { + LayerDescriptor::Branch => { + branch = unsafe { + branch + .inner + .parent + .item + .as_branch_mut::() + .as_mut() + }; + continue; + } + + LayerDescriptor::Page => { + return unsafe { + branch + .inner + .parent + .item + .as_branch_mut::() + .add_child_right( + root_length, + tail_subtraction, + item_length, + item_variant, + ) + }; + } + }, + + true => { + let mut new_root_ref = Branch::::new(2); + + let new_root_variant = unsafe { new_root_ref.into_variant() }; + + branch.inner.parent = ChildRefIndex { + item: new_root_variant, + index: 0, + }; + + unsafe { + new_sibling_ref.set_parent(ChildRefIndex { + item: new_root_variant, + index: 1, + }); + } + + { + let new_root = unsafe { new_root_ref.as_mut() }; + + new_root.inner.children[0] = + ItemRefVariant::from_branch(BranchRef { + pointer: NonNull::new_unchecked(branch), + }); + new_root.inner.children[1] = new_sibling_variant; + + new_root.inner.spans[0] = root_length - tail_subtraction; + new_root.inner.spans[1] = item_length; + } + + return Some(new_root_variant); + } + } + } + } + } + + return None; + } +} diff --git a/work/crates/main/src/incremental/storage/cache.rs b/work/crates/main/src/incremental/storage/cache.rs new file mode 100644 index 0000000..fb95f3b --- /dev/null +++ b/work/crates/main/src/incremental/storage/cache.rs @@ -0,0 +1,55 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::RefIndex, + lexis::{Length, SiteRef}, + syntax::{Cluster, Node, SyntaxRule}, +}; + +pub(crate) struct ClusterCache { + pub(crate) cluster: Cluster, + pub(crate) rule: SyntaxRule, + pub(crate) parsed_end: SiteRef, + pub(crate) lookahead: Length, + pub(crate) successful: bool, +} + +pub(super) struct CacheEntry { + pub(super) cache: ClusterCache, + pub(super) ref_index: RefIndex, +} diff --git a/work/crates/main/src/incremental/storage/child.rs b/work/crates/main/src/incremental/storage/child.rs new file mode 100644 index 0000000..6452412 --- /dev/null +++ b/work/crates/main/src/incremental/storage/child.rs @@ -0,0 +1,646 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::RefIndex, + incremental::storage::{ + cache::{CacheEntry, ClusterCache}, + item::{Item, ItemRef, ItemRefVariant}, + page::Page, + }, + lexis::{Length, TokenCount}, + std::*, + syntax::Node, +}; + +pub(super) type ChildIndex = usize; +pub(super) type ChildCount = usize; + +pub(crate) struct ChildRefIndex { + pub(super) item: ItemRefVariant, + pub(super) index: ChildIndex, +} + +impl Clone for ChildRefIndex { + #[inline(always)] + fn clone(&self) -> Self { + *self + } +} + +impl Copy for ChildRefIndex {} + +impl ChildRefIndex { + #[inline(always)] + pub(crate) const fn dangling() -> Self { + Self { + item: ItemRefVariant::dangling(), + index: ChildIndex::MAX, + } + } + + #[inline(always)] + pub(crate) const fn is_dangling(&self) -> bool { + self.index == ChildIndex::MAX + } + + // Safety: + // 1. `self.item` and `other.item` are possibly dangling Page references. + // 2. `self` and `other` belong to the same `Tree` instance. + #[inline(always)] + pub(crate) unsafe fn same_chunk_as(&self, other: &Self) -> bool { + if self.index != other.index { + return false; + } + + if self.index != ChildIndex::MAX + && unsafe { self.item.as_page_ref() != other.item.as_page_ref() } + { + return false; + } + + true + } + + // Safety: + // 1. `self.item` and `other.item` are Page references. + // 2. `self` and `other` belong to the same `Tree` instance. + // 3. `self` is not ahead of `other`. + // 4. `self` is not dangling. + #[inline] + pub(crate) unsafe fn continuous_to(&self, tail: &Self) -> Option { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let head_page_ref = unsafe { self.item.as_page_ref() }; + let head_page = unsafe { head_page_ref.as_ref() }; + + debug_assert!( + self.index < head_page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + match tail.is_dangling() { + false => { + let tail_page_ref = unsafe { tail.item.as_page_ref() }; + + match head_page_ref == tail_page_ref { + true => { + debug_assert!( + tail.index < head_page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + match self.index <= tail.index { + true => Some(tail.index - self.index), + + false => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Head is ahead of tail."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + + false => { + if tail.index > 0 { + return None; + } + + match &head_page.next { + Some(next) if next == tail_page_ref => { + Some(head_page.occupied - self.index) + } + + _ => None, + } + } + } + } + + true => match head_page.next.is_some() { + false => Some(head_page.occupied - self.index), + + true => None, + }, + } + } + + #[inline(always)] + pub(super) fn make_dangle(&mut self) { + self.index = ChildIndex::MAX; + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. `'a` does not outlive corresponding Page instance. + // 4. There are no other mutable references to this span. + #[inline(always)] + pub(crate) unsafe fn span<'a>(&self) -> &'a Length { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + let span = unsafe { page.spans.get_unchecked(self.index) }; + + debug_assert!(*span > 0, "Internal error. Zero span in Page."); + + span + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. `'a` does not outlive corresponding Page instance. + // 4. There are no other mutable references to this String. + #[inline(always)] + pub(crate) unsafe fn string<'a>(&self) -> &'a str { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + let string = unsafe { page.strings.get_unchecked(self.index).assume_init_ref() }; + + debug_assert!(!string.is_empty(), "Internal error. Empty string in Page."); + + string + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. `'a` does not outlive corresponding Page instance. + // 4. There are no other mutable references to this Token. + #[inline(always)] + pub(crate) unsafe fn token<'a>(&self) -> &'a ::Token { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + unsafe { page.tokens.get_unchecked(self.index).assume_init_ref() } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. `'a` does not outlive corresponding Page instance. + // 4. There are no other mutable references to this Token. + #[inline(always)] + pub(crate) unsafe fn token_mut<'a>(&self) -> &'a mut ::Token { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_mut() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + unsafe { page.tokens.get_unchecked_mut(self.index).assume_init_mut() } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. `'a` does not outlive corresponding Page instance. + // 4. There are no other mutable references to this ClusterCache. + #[inline(always)] + pub(crate) unsafe fn cache<'a>(&self) -> Option<&'a ClusterCache> { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + match unsafe { page.clusters.get_unchecked(self.index).assume_init_ref() } { + Some(cache_entry) => Some(&cache_entry.cache), + + None => None, + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. `'a` does not outlive corresponding Page instance. + // 4. There are no references to this ClusterCache. + #[inline(always)] + pub(crate) unsafe fn cache_mut<'a>(&self) -> Option<&'a mut ClusterCache> { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_mut() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + match unsafe { + page.clusters + .get_unchecked_mut(self.index) + .assume_init_mut() + } { + Some(cache_entry) => Some(&mut cache_entry.cache), + + None => None, + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. `'a` does not outlive corresponding Page instance. + // 4. Referred item contains a cluster cache. + #[inline(always)] + pub(crate) unsafe fn cache_index(&self) -> RefIndex { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + match unsafe { page.clusters.get_unchecked(self.index).assume_init_ref() } { + Some(cache_entry) => cache_entry.ref_index, + + None => { + #[cfg(debug_assertions)] + { + unreachable!( + "Internal error. An attempt to get RefIndex of undefined ClusterCache." + ); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. Referred item contains a cluster cache. + // 4. There are no other references to this ClusterCache. + #[inline(always)] + pub(crate) unsafe fn remove_cache(&self) -> RefIndex { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_mut() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + match unsafe { + take( + page.clusters + .get_unchecked_mut(self.index) + .assume_init_mut(), + ) + } { + Some(cache_entry) => cache_entry.ref_index, + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. An attempt to remove undefined ClusterCache."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. If referred item contains valid CLusterCache, there are no external reference to that instance. + #[inline(always)] + pub(crate) unsafe fn set_cache( + &self, + ref_index: RefIndex, + cache: ClusterCache, + ) -> Option { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_mut() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + let previous = replace( + page.clusters + .get_unchecked_mut(self.index) + .assume_init_mut(), + Some(CacheEntry { cache, ref_index }), + ); + + match previous { + Some(cache_entry) => Some(cache_entry.ref_index), + + None => None, + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + // 3. Referred item contains a cluster cache. + // 4. There are no references to this ClusterCache. + #[inline(always)] + pub(crate) unsafe fn update_cache(&self, cache: ClusterCache) { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_mut() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + match unsafe { + page.clusters + .get_unchecked_mut(self.index) + .assume_init_mut() + } { + Some(cache_entry) => { + cache_entry.cache = cache; + } + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. An attempt to remove undefined ClusterCache."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + #[inline(always)] + pub(crate) unsafe fn chunk_ref_index(&self) -> RefIndex { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_external_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + unsafe { *page.chunks.get_unchecked(self.index) } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + #[inline(always)] + pub(crate) unsafe fn is_first(&self) -> bool { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + self.index == 0 && page.previous.is_none() + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + #[inline(always)] + #[allow(unused)] + pub(crate) unsafe fn is_last(&self) -> bool { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + self.index + 1 == page.occupied && page.next.is_none() + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + #[inline(always)] + pub(crate) unsafe fn next(&mut self) { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + if self.index + 1 < page.occupied { + self.index += 1; + return; + } + + match &page.next { + None => { + self.index = ChildIndex::MAX; + } + + Some(next_ref) => { + debug_assert!( + unsafe { next_ref.as_ref().occupied } >= Page::::BRANCHING, + "Internal error. Incorrect Page balance." + ); + + self.item = unsafe { next_ref.into_variant() }; + self.index = 0; + } + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Page reference. + #[inline(always)] + pub(crate) unsafe fn back(&mut self) { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { self.item.as_page_ref().as_ref() }; + + debug_assert!( + self.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + if self.index > 0 { + self.index -= 1; + return; + } + + match &page.previous { + None => { + self.index = ChildIndex::MAX; + } + + Some(previous_ref) => { + let previous_occupied = unsafe { previous_ref.as_ref().occupied }; + + debug_assert!( + previous_occupied >= Page::::BRANCHING, + "Internal error. Incorrect Page balance." + ); + + self.item = unsafe { previous_ref.into_variant() }; + self.index = previous_occupied - 1; + } + } + } + + // Safety: + // 1. `self` is not dangling. + // 2. `self.item` is a Branch reference. + #[inline(always)] + pub(super) unsafe fn branch_span(&self) -> Length { + debug_assert!( + !self.is_dangling(), + "Internal error. An attempt to get span from dangling ChildRefIndex.", + ); + + let branch = unsafe { self.item.as_branch_ref::<()>().as_ref() }; + + debug_assert!( + self.index < branch.inner.occupied, + "Internal error. ChildRefIndex index is out of bounds.", + ); + + unsafe { *branch.inner.spans.get_unchecked(self.index) } + } +} diff --git a/work/crates/main/src/incremental/storage/item.rs b/work/crates/main/src/incremental/storage/item.rs new file mode 100644 index 0000000..8f03038 --- /dev/null +++ b/work/crates/main/src/incremental/storage/item.rs @@ -0,0 +1,620 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + incremental::storage::{ + branch::{Branch, BranchRef}, + child::{ChildCount, ChildIndex, ChildRefIndex}, + nesting::{BranchLayer, Layer, LayerDescriptor}, + page::PageRef, + references::References, + utils::capacity, + }, + lexis::Length, + std::*, + syntax::Node, +}; + +pub(super) trait Item: Sized { + const BRANCHING: ChildCount; + + type Node: Node; + + fn occupied(&self) -> ChildCount; + + // Safety: + // 1. `self` data within `source..(source + count)` range is occupied. + // 2. `destination..(destination + count)` range is withing the `to` data capacity. + unsafe fn copy_to( + &mut self, + to: &mut Self, + source: ChildCount, + destination: ChildCount, + count: ChildCount, + ); + + // Safety: + // 1. `from <= self.occupied`. + // 2. `self.occupied + count <= capacity`. + // 3. `count > 0` + unsafe fn inflate(&mut self, from: ChildIndex, count: ChildCount); + + // Safety: + // 1. `from < self.occupied`. + // 2. `from + count <= self.occupied. + // 3. `count > 0` + unsafe fn deflate(&mut self, from: ChildIndex, count: ChildCount) -> bool; +} + +pub(super) trait ItemRef: Copy { + type SelfLayer: Layer; + type Item: Item; + + fn dangling() -> Self; + + //Safety: + // 1. `self` is not dangling. + unsafe fn as_ref(&self) -> &Self::Item; + + //Safety: + // 1. `self` is not dangling. + unsafe fn as_mut(&mut self) -> &mut Self::Item; + + //Safety: + // 1. `self` is not dangling. + unsafe fn into_variant(self) -> ItemRefVariant; + + //Safety: + // 1. `self` is not dangling. + unsafe fn into_owned(self) -> Box; + + //Safety: + // 1. `self` is not dangling. + unsafe fn calculate_length(&self) -> Length; + + //Safety: + // 1. `self` is not dangling. + unsafe fn parent(&self) -> &ChildRefIndex; + + //Safety: + // 1. `self` is not dangling. + unsafe fn set_parent(&mut self, parent: ChildRefIndex); + + //Safety: + // 1. `self` is not dangling. + // 2. `self` is not a root Item. + unsafe fn parent_mut(&mut self) -> &mut BranchRef; + + // Safety: + // 1. `self` is not dangling. + // 2. All references belong to `references` instance. + // 3. `count > 0` + // 4. `self` data within `from..(from + count)` range is occupied. + // 5. `ChildLayer` is correctly describes children kind. + unsafe fn update_children( + &mut self, + references: &mut References, + from: ChildIndex, + count: ChildCount, + ) -> Length; + + //Safety: + // 1. `self` is not dangling. + // 2. All references belong to `references` instance. + // 3. `from` is lesser than the number of occupied children. + // 4. `children_split` correctly describes children layer splitting. + unsafe fn split( + &mut self, + references: &mut References, + children_split: Split, + length: Length, + from: ChildIndex, + ) -> Split; + + //Safety: + // 1. `left_ref` is not dangling. + // 2. `right_ref` is not dangling. + // 3. `left_ref` and `right_ref` both have children layers of the same kind. + // 4. `ChildLayer` is correctly describes children kind. + // 5. All references belong to `references` instance. + // 6. `left_ref` is not a root item. + // 7. `right_ref` is a root item. + #[inline] + unsafe fn join_to_left( + left_ref: &mut Self, + right_ref: &mut Self, + left_root_length: Length, + right_length: Length, + references: &mut References, + ) -> (bool, Option>) { + let left_occupied = unsafe { left_ref.as_ref().occupied() }; + let right_occupied = unsafe { right_ref.as_ref().occupied() }; + + if left_occupied + right_occupied <= capacity(::BRANCHING) { + let span_addition = unsafe { ItemRef::merge_to_left(left_ref, right_ref, references) }; + + unsafe { left_ref.parent_mut().inc_span_right(span_addition) }; + + return (true, None); + } + + let transfer_length = match right_occupied < ::BRANCHING { + false => 0, + + true => unsafe { ItemRef::balance_to_right(left_ref, right_ref, references) }, + }; + + let left_parent = unsafe { left_ref.parent_mut() }; + + (false, unsafe { + left_parent.add_child_right( + left_root_length, + transfer_length, + right_length + transfer_length, + right_ref.into_variant(), + ) + }) + } + + //Safety: + // 1. `left_ref` is not dangling. + // 2. `right_ref` is not dangling. + // 3. `left_ref` and `right_ref` both have children layers of the same kind. + // 4. `ChildLayer` is correctly describes children kind. + // 5. All references belong to `references` instance. + // 6. `left_ref` is a root item. + // 7. `right_ref` is not a root item. + #[inline] + unsafe fn join_to_right( + left_ref: &mut Self, + right_ref: &mut Self, + left_length: Length, + right_root_length: Length, + references: &mut References, + ) -> (bool, Option>) { + let left_occupied = unsafe { left_ref.as_ref().occupied() }; + let right_occupied = unsafe { right_ref.as_ref().occupied() }; + + if left_occupied + right_occupied <= capacity(::BRANCHING) { + let span_addition = unsafe { ItemRef::merge_to_right(left_ref, right_ref, references) }; + + unsafe { right_ref.parent_mut().inc_span_left(span_addition) }; + + return (true, None); + } + + let transfer_length = match left_occupied < ::BRANCHING { + false => 0, + + true => unsafe { ItemRef::balance_to_left(left_ref, right_ref, references) }, + }; + + let right_parent = unsafe { right_ref.parent_mut() }; + + (false, unsafe { + right_parent.add_child_left( + right_root_length, + transfer_length, + left_length + transfer_length, + left_ref.into_variant(), + ) + }) + } + + //Safety: + // 1. `left_ref` is not dangling. + // 2. `right_ref` is not dangling. + // 3. `left_ref` and `right_ref` both have children layers of the same kind. + // 4. `ChildLayer` is correctly describes children kind. + // 5. All references belong to `references` instance. + #[inline] + unsafe fn join_roots( + left_ref: &mut Self, + right_ref: &mut Self, + mut left_length: Length, + mut right_length: Length, + references: &mut References, + ) -> Option> { + let left_occupied = unsafe { left_ref.as_ref().occupied() }; + let right_occupied = unsafe { right_ref.as_ref().occupied() }; + + if left_occupied + right_occupied <= capacity(::BRANCHING) { + let _ = unsafe { ItemRef::merge_to_left(left_ref, right_ref, references) }; + + return None; + } + + if left_occupied < ::BRANCHING { + let difference = unsafe { ItemRef::balance_to_left(left_ref, right_ref, references) }; + + left_length += difference; + right_length -= difference; + } else if right_occupied < ::BRANCHING { + let difference = unsafe { ItemRef::balance_to_right(left_ref, right_ref, references) }; + + left_length -= difference; + right_length += difference; + } + + let mut new_root_ref = Branch::::new(2); + + let parent_ref_variant = unsafe { new_root_ref.into_variant() }; + + unsafe { + left_ref.set_parent(ChildRefIndex { + item: parent_ref_variant, + index: 0, + }) + }; + + unsafe { + right_ref.set_parent(ChildRefIndex { + item: parent_ref_variant, + index: 1, + }) + }; + + { + let new_root = unsafe { new_root_ref.as_mut() }; + + new_root.inner.children[0] = unsafe { left_ref.into_variant() }; + new_root.inner.children[1] = unsafe { right_ref.into_variant() }; + new_root.inner.spans[0] = left_length; + new_root.inner.spans[1] = right_length; + } + + Some(parent_ref_variant) + } + + //Safety: + // 1. `left_ref` is not dangling. + // 2. `right_ref` is not dangling. + // 3. `left_ref` and `right_ref` both have children layers of the same kind. + // 4. `ChildLayer` is correctly describes children kind. + // 5. All references belong to `references` instance. + // 6. The total `left_ref` and `right_ref` occupied count is lesser or equal to capacity. + #[inline] + unsafe fn merge_to_left( + left_ref: &mut Self, + right_ref: &mut Self, + references: &mut References, + ) -> Length { + let left_occupied = unsafe { left_ref.as_ref().occupied() }; + let right_occupied = unsafe { right_ref.as_ref().occupied() }; + + debug_assert!( + left_occupied + right_occupied <= capacity(::BRANCHING), + "Internal error. Merge failure.", + ); + + unsafe { left_ref.as_mut().inflate(left_occupied, right_occupied) }; + + unsafe { + right_ref + .as_mut() + .copy_to(left_ref.as_mut(), 0, left_occupied, right_occupied) + }; + + forget(*unsafe { right_ref.into_owned() }); + + let difference = + unsafe { left_ref.update_children(references, left_occupied, right_occupied) }; + + difference + } + + //Safety: + // 1. `left_ref` is not dangling. + // 2. `right_ref` is not dangling. + // 3. `left_ref` and `right_ref` both have children layers of the same kind. + // 4. `ChildLayer` is correctly describes children kind. + // 5. All references belong to `references` instance. + // 6. The total `left_ref` and `right_ref` occupied count is lesser or equal to capacity. + #[inline] + unsafe fn merge_to_right( + left_ref: &mut Self, + right_ref: &mut Self, + references: &mut References, + ) -> Length { + let left_occupied = unsafe { left_ref.as_ref().occupied() }; + let right_occupied = unsafe { right_ref.as_ref().occupied() }; + + debug_assert!( + left_occupied + right_occupied <= capacity(::BRANCHING), + "Internal error. Merge failure.", + ); + + unsafe { right_ref.as_mut().inflate(0, left_occupied) }; + + unsafe { + left_ref + .as_mut() + .copy_to(right_ref.as_mut(), 0, 0, left_occupied) + }; + + forget(*unsafe { left_ref.into_owned() }); + + let difference = unsafe { right_ref.update_children(references, 0, left_occupied) }; + + let _ = unsafe { right_ref.update_children(references, left_occupied, right_occupied) }; + + difference + } + + //Safety: + // 1. `left_ref` is not dangling. + // 2. `right_ref` is not dangling. + // 3. `left_ref` and `right_ref` both have children layers of the same kind. + // 4. `ChildLayer` is correctly describes children kind. + // 5. All references belong to `references` instance. + // 6. The total `left_ref` and `right_ref` occupied count is greater than capacity. + // 7. `left_ref` occupied count is lesser than branching factor. + // 8. `right_ref` occupied count is greater or equal to branching factor. + #[inline] + unsafe fn balance_to_left( + left_ref: &mut Self, + right_ref: &mut Self, + references: &mut References, + ) -> Length { + let left_occupied = unsafe { left_ref.as_ref().occupied() }; + let right_occupied = unsafe { right_ref.as_ref().occupied() }; + + debug_assert!( + left_occupied + right_occupied > capacity(::BRANCHING), + "Internal error. Balance failure.", + ); + + debug_assert!( + left_occupied < ::BRANCHING, + "Internal error. Balance failure.", + ); + + debug_assert!( + right_occupied >= ::BRANCHING, + "Internal error. Balance failure.", + ); + + let transfer_count = ::BRANCHING - left_occupied; + + debug_assert!( + right_occupied - ::BRANCHING >= transfer_count, + "Internal error. Balance failure.", + ); + + unsafe { left_ref.as_mut().inflate(left_occupied, transfer_count) }; + + unsafe { + right_ref + .as_mut() + .copy_to(left_ref.as_mut(), 0, left_occupied, transfer_count) + }; + + let is_right_balanced = unsafe { right_ref.as_mut().deflate(0, transfer_count) }; + + debug_assert!( + is_right_balanced, + "Internal error. Balance-to-left failure.", + ); + + let difference = + unsafe { left_ref.update_children(references, left_occupied, transfer_count) }; + + let _ = + unsafe { right_ref.update_children(references, 0, right_occupied - transfer_count) }; + + difference + } + + //Safety: + // 1. `left_ref` is not dangling. + // 2. `right_ref` is not dangling. + // 3. `left_ref` and `right_ref` both have children layers of the same kind. + // 4. `ChildLayer` is correctly describes children kind. + // 5. All references belong to `references` instance. + // 6. The total `left_ref` and `right_ref` occupied count is greater than capacity. + // 7. `left_ref` occupied count is greater or equal to branching factor. + // 8. `right_ref` occupied count is lesser than branching factor. + #[inline] + unsafe fn balance_to_right( + left_ref: &mut Self, + right_ref: &mut Self, + references: &mut References, + ) -> Length { + let left_occupied = unsafe { left_ref.as_ref().occupied() }; + let right_occupied = unsafe { right_ref.as_ref().occupied() }; + + debug_assert!( + left_occupied + right_occupied > capacity(::BRANCHING), + "Internal error. Balance failure.", + ); + + debug_assert!( + left_occupied >= ::BRANCHING, + "Internal error. Balance failure.", + ); + + debug_assert!( + right_occupied < ::BRANCHING, + "Internal error. Balance failure.", + ); + + let transfer_count = ::BRANCHING - right_occupied; + + debug_assert!( + left_occupied >= ::BRANCHING, + "Internal error. Balance failure.", + ); + + debug_assert!( + left_occupied - ::BRANCHING >= transfer_count, + "Internal error. Balance failure.", + ); + + unsafe { right_ref.as_mut().inflate(0, transfer_count) }; + + unsafe { + left_ref.as_mut().copy_to( + right_ref.as_mut(), + left_occupied - transfer_count, + 0, + transfer_count, + ) + }; + + let is_left_balanced = unsafe { + left_ref + .as_mut() + .deflate(left_occupied - transfer_count, transfer_count) + }; + + debug_assert!( + is_left_balanced, + "Internal error. Balance-to-right failure.", + ); + + let difference = unsafe { right_ref.update_children(references, 0, transfer_count) }; + + let _ = unsafe { + right_ref.update_children( + references, + transfer_count, + ::BRANCHING - transfer_count, + ) + }; + + difference + } +} + +pub(super) struct Split { + pub(super) left_span: Length, + pub(super) left_item: ItemRefVariant, + pub(super) right_span: Length, + pub(super) right_item: ItemRefVariant, +} + +impl Split { + #[inline(always)] + pub(super) const fn dangling() -> Self { + Self { + left_span: 0, + left_item: ItemRefVariant::dangling(), + right_span: 0, + right_item: ItemRefVariant::dangling(), + } + } +} + +pub(super) union ItemRefVariant { + branch: BranchRef<(), N>, + page: PageRef, + dangling: (), +} + +impl Default for ItemRefVariant { + #[inline(always)] + fn default() -> Self { + Self::dangling() + } +} + +impl Clone for ItemRefVariant { + #[inline(always)] + fn clone(&self) -> Self { + *self + } +} + +impl Copy for ItemRefVariant {} + +impl ItemRefVariant { + #[inline(always)] + pub(super) const fn dangling() -> Self { + Self { dangling: () } + } + + #[inline(always)] + pub(super) fn from_branch(branch: BranchRef) -> Self { + Self { + branch: unsafe { transmute(branch) }, + } + } + + #[inline(always)] + pub(super) fn from_page(page: PageRef) -> Self { + Self { page } + } + + // Safety: + // 1. Variant is a Branch variant. + // 2. `ChildLayer` correctly describes child layer of the Branch instance. + #[inline(always)] + pub(super) unsafe fn as_branch_ref(&self) -> &BranchRef { + unsafe { transmute(&self.branch) } + } + + // Safety: + // 1. Variant is a Branch variant. + // 2. `ChildLayer` correctly describes child layer of the Branch instance. + #[inline(always)] + pub(super) unsafe fn as_branch_mut( + &mut self, + ) -> &mut BranchRef { + unsafe { transmute(&mut self.branch) } + } + + //Safety: Variant is a Page variant. + #[inline(always)] + pub(super) unsafe fn as_page_ref(&self) -> &PageRef { + unsafe { &self.page } + } + + //Safety: Variant is a Page variant. + #[inline(always)] + pub(super) unsafe fn as_page_mut(&mut self) -> &mut PageRef { + unsafe { &mut self.page } + } + + //Safety: `SelfLayer` correctly describes variant kind. + #[inline(always)] + pub(super) unsafe fn set_parent(&mut self, parent: ChildRefIndex) { + match SelfLayer::descriptor() { + LayerDescriptor::Branch => unsafe { self.as_branch_mut::<()>().set_parent(parent) }, + LayerDescriptor::Page => unsafe { self.as_page_mut().set_parent(parent) }, + } + } +} diff --git a/work/crates/main/src/incremental/storage/mod.rs b/work/crates/main/src/incremental/storage/mod.rs new file mode 100644 index 0000000..d2949b7 --- /dev/null +++ b/work/crates/main/src/incremental/storage/mod.rs @@ -0,0 +1,959 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +mod branch; +mod cache; +mod child; +mod item; +mod nesting; +mod page; +mod references; +mod tree; +mod utils; + +pub(crate) use crate::incremental::storage::{ + cache::ClusterCache, + child::ChildRefIndex, + references::References, + tree::Tree, +}; + +#[cfg(test)] +mod tests { + #[cfg(not(debug_assertions))] + use crate::{ + incremental::storage::{branch::Branch, item::Item, page::Page}, + lexis::{Length, Site}, + }; + use crate::{ + incremental::storage::{ + child::ChildIndex, + item::{ItemRef, ItemRefVariant}, + nesting::{BranchLayer, Height, PageLayer}, + references::References, + tree::Tree, + }, + lexis::{LexisSession, Token}, + std::*, + syntax::{Node, SyntaxError, SyntaxRule, SyntaxSession}, + }; + + #[test] + fn test_bulk_load_1() { + let mut references = References::::default(); + + let mut tree = Tree::default(); + + assert_eq!(TreeDisplay(&tree).to_string(), r#"height: 0, length: 0"#,); + + unsafe { + tree.free(&mut references); + } + } + + #[test] + fn test_bulk_load_2() { + let mut references = References::::default(); + + let mut tree = gen(&mut references, 1..=1); + + assert_eq!( + TreeDisplay(&tree).to_string(), + r#"height: 1, length: 1, [ + 1, +]"#, + ); + + unsafe { + tree.free(&mut references); + } + } + + #[test] + fn test_bulk_load_3() { + let mut references = References::::default(); + + let mut tree = gen(&mut references, 1..=20); + + assert_eq!( + TreeDisplay(&tree).to_string().as_str(), + r#"height: 2, length: 20, { + 6: [1, 2, 3, 4, 5, 6], + 7: [7, 8, 9, 10, 11, 12, 13], + 7: [14, 15, 16, 17, 18, 19, 20], +}"# + ); + + unsafe { + tree.free(&mut references); + } + } + + #[test] + fn test_bulk_load_4() { + let mut references = References::::default(); + + let mut tree = gen(&mut references, 1..=300); + + assert_eq!( + TreeDisplay(&tree).to_string().as_str(), + r#"height: 3, length: 300, { + 36: { + 6: [1, 2, 3, 4, 5, 6], + 6: [7, 8, 9, 10, 11, 12], + 6: [13, 14, 15, 16, 17, 18], + 6: [19, 20, 21, 22, 23, 24], + 6: [25, 26, 27, 28, 29, 30], + 6: [31, 32, 33, 34, 35, 36], + }, + 36: { + 6: [37, 38, 39, 40, 41, 42], + 6: [43, 44, 45, 46, 47, 48], + 6: [49, 50, 51, 52, 53, 54], + 6: [55, 56, 57, 58, 59, 60], + 6: [61, 62, 63, 64, 65, 66], + 6: [67, 68, 69, 70, 71, 72], + }, + 36: { + 6: [73, 74, 75, 76, 77, 78], + 6: [79, 80, 81, 82, 83, 84], + 6: [85, 86, 87, 88, 89, 90], + 6: [91, 92, 93, 94, 95, 96], + 6: [97, 98, 99, 100, 101, 102], + 6: [103, 104, 105, 106, 107, 108], + }, + 36: { + 6: [109, 110, 111, 112, 113, 114], + 6: [115, 116, 117, 118, 119, 120], + 6: [121, 122, 123, 124, 125, 126], + 6: [127, 128, 129, 130, 131, 132], + 6: [133, 134, 135, 136, 137, 138], + 6: [139, 140, 141, 142, 143, 144], + }, + 36: { + 6: [145, 146, 147, 148, 149, 150], + 6: [151, 152, 153, 154, 155, 156], + 6: [157, 158, 159, 160, 161, 162], + 6: [163, 164, 165, 166, 167, 168], + 6: [169, 170, 171, 172, 173, 174], + 6: [175, 176, 177, 178, 179, 180], + }, + 36: { + 6: [181, 182, 183, 184, 185, 186], + 6: [187, 188, 189, 190, 191, 192], + 6: [193, 194, 195, 196, 197, 198], + 6: [199, 200, 201, 202, 203, 204], + 6: [205, 206, 207, 208, 209, 210], + 6: [211, 212, 213, 214, 215, 216], + }, + 42: { + 6: [217, 218, 219, 220, 221, 222], + 6: [223, 224, 225, 226, 227, 228], + 6: [229, 230, 231, 232, 233, 234], + 6: [235, 236, 237, 238, 239, 240], + 6: [241, 242, 243, 244, 245, 246], + 6: [247, 248, 249, 250, 251, 252], + 6: [253, 254, 255, 256, 257, 258], + }, + 42: { + 6: [259, 260, 261, 262, 263, 264], + 6: [265, 266, 267, 268, 269, 270], + 6: [271, 272, 273, 274, 275, 276], + 6: [277, 278, 279, 280, 281, 282], + 6: [283, 284, 285, 286, 287, 288], + 6: [289, 290, 291, 292, 293, 294], + 6: [295, 296, 297, 298, 299, 300], + }, +}"# + ); + + unsafe { + tree.free(&mut references); + } + } + + #[test] + fn test_join_roots_1() { + let mut references = References::::default(); + let mut left = gen(&mut references, 1..=300); + let right = gen(&mut references, 301..=600); + + unsafe { left.join(&mut references, right) }; + + assert_eq!( + TreeDisplay(&left).to_string().as_str(), + r#"height: 4, length: 600, { + 300: { + 36: { + 6: [1, 2, 3, 4, 5, 6], + 6: [7, 8, 9, 10, 11, 12], + 6: [13, 14, 15, 16, 17, 18], + 6: [19, 20, 21, 22, 23, 24], + 6: [25, 26, 27, 28, 29, 30], + 6: [31, 32, 33, 34, 35, 36], + }, + 36: { + 6: [37, 38, 39, 40, 41, 42], + 6: [43, 44, 45, 46, 47, 48], + 6: [49, 50, 51, 52, 53, 54], + 6: [55, 56, 57, 58, 59, 60], + 6: [61, 62, 63, 64, 65, 66], + 6: [67, 68, 69, 70, 71, 72], + }, + 36: { + 6: [73, 74, 75, 76, 77, 78], + 6: [79, 80, 81, 82, 83, 84], + 6: [85, 86, 87, 88, 89, 90], + 6: [91, 92, 93, 94, 95, 96], + 6: [97, 98, 99, 100, 101, 102], + 6: [103, 104, 105, 106, 107, 108], + }, + 36: { + 6: [109, 110, 111, 112, 113, 114], + 6: [115, 116, 117, 118, 119, 120], + 6: [121, 122, 123, 124, 125, 126], + 6: [127, 128, 129, 130, 131, 132], + 6: [133, 134, 135, 136, 137, 138], + 6: [139, 140, 141, 142, 143, 144], + }, + 36: { + 6: [145, 146, 147, 148, 149, 150], + 6: [151, 152, 153, 154, 155, 156], + 6: [157, 158, 159, 160, 161, 162], + 6: [163, 164, 165, 166, 167, 168], + 6: [169, 170, 171, 172, 173, 174], + 6: [175, 176, 177, 178, 179, 180], + }, + 36: { + 6: [181, 182, 183, 184, 185, 186], + 6: [187, 188, 189, 190, 191, 192], + 6: [193, 194, 195, 196, 197, 198], + 6: [199, 200, 201, 202, 203, 204], + 6: [205, 206, 207, 208, 209, 210], + 6: [211, 212, 213, 214, 215, 216], + }, + 42: { + 6: [217, 218, 219, 220, 221, 222], + 6: [223, 224, 225, 226, 227, 228], + 6: [229, 230, 231, 232, 233, 234], + 6: [235, 236, 237, 238, 239, 240], + 6: [241, 242, 243, 244, 245, 246], + 6: [247, 248, 249, 250, 251, 252], + 6: [253, 254, 255, 256, 257, 258], + }, + 42: { + 6: [259, 260, 261, 262, 263, 264], + 6: [265, 266, 267, 268, 269, 270], + 6: [271, 272, 273, 274, 275, 276], + 6: [277, 278, 279, 280, 281, 282], + 6: [283, 284, 285, 286, 287, 288], + 6: [289, 290, 291, 292, 293, 294], + 6: [295, 296, 297, 298, 299, 300], + }, + }, + 300: { + 36: { + 6: [301, 302, 303, 304, 305, 306], + 6: [307, 308, 309, 310, 311, 312], + 6: [313, 314, 315, 316, 317, 318], + 6: [319, 320, 321, 322, 323, 324], + 6: [325, 326, 327, 328, 329, 330], + 6: [331, 332, 333, 334, 335, 336], + }, + 36: { + 6: [337, 338, 339, 340, 341, 342], + 6: [343, 344, 345, 346, 347, 348], + 6: [349, 350, 351, 352, 353, 354], + 6: [355, 356, 357, 358, 359, 360], + 6: [361, 362, 363, 364, 365, 366], + 6: [367, 368, 369, 370, 371, 372], + }, + 36: { + 6: [373, 374, 375, 376, 377, 378], + 6: [379, 380, 381, 382, 383, 384], + 6: [385, 386, 387, 388, 389, 390], + 6: [391, 392, 393, 394, 395, 396], + 6: [397, 398, 399, 400, 401, 402], + 6: [403, 404, 405, 406, 407, 408], + }, + 36: { + 6: [409, 410, 411, 412, 413, 414], + 6: [415, 416, 417, 418, 419, 420], + 6: [421, 422, 423, 424, 425, 426], + 6: [427, 428, 429, 430, 431, 432], + 6: [433, 434, 435, 436, 437, 438], + 6: [439, 440, 441, 442, 443, 444], + }, + 36: { + 6: [445, 446, 447, 448, 449, 450], + 6: [451, 452, 453, 454, 455, 456], + 6: [457, 458, 459, 460, 461, 462], + 6: [463, 464, 465, 466, 467, 468], + 6: [469, 470, 471, 472, 473, 474], + 6: [475, 476, 477, 478, 479, 480], + }, + 36: { + 6: [481, 482, 483, 484, 485, 486], + 6: [487, 488, 489, 490, 491, 492], + 6: [493, 494, 495, 496, 497, 498], + 6: [499, 500, 501, 502, 503, 504], + 6: [505, 506, 507, 508, 509, 510], + 6: [511, 512, 513, 514, 515, 516], + }, + 42: { + 6: [517, 518, 519, 520, 521, 522], + 6: [523, 524, 525, 526, 527, 528], + 6: [529, 530, 531, 532, 533, 534], + 6: [535, 536, 537, 538, 539, 540], + 6: [541, 542, 543, 544, 545, 546], + 6: [547, 548, 549, 550, 551, 552], + 6: [553, 554, 555, 556, 557, 558], + }, + 42: { + 6: [559, 560, 561, 562, 563, 564], + 6: [565, 566, 567, 568, 569, 570], + 6: [571, 572, 573, 574, 575, 576], + 6: [577, 578, 579, 580, 581, 582], + 6: [583, 584, 585, 586, 587, 588], + 6: [589, 590, 591, 592, 593, 594], + 6: [595, 596, 597, 598, 599, 600], + }, + }, +}"# + ); + + unsafe { + left.free(&mut references); + } + } + + #[test] + fn test_join_roots_2() { + let mut references = References::::default(); + let mut left = gen(&mut references, 1..=61); + let right = gen(&mut references, 101..=115); + + unsafe { left.join(&mut references, right) }; + + assert_eq!( + TreeDisplay(&left).to_string().as_str(), + r#"height: 3, length: 76, { + 36: { + 6: [1, 2, 3, 4, 5, 6], + 6: [7, 8, 9, 10, 11, 12], + 6: [13, 14, 15, 16, 17, 18], + 6: [19, 20, 21, 22, 23, 24], + 6: [25, 26, 27, 28, 29, 30], + 6: [31, 32, 33, 34, 35, 36], + }, + 40: { + 6: [37, 38, 39, 40, 41, 42], + 6: [43, 44, 45, 46, 47, 48], + 6: [49, 50, 51, 52, 53, 54], + 7: [55, 56, 57, 58, 59, 60, 61], + 7: [101, 102, 103, 104, 105, 106, 107], + 8: [108, 109, 110, 111, 112, 113, 114, 115], + }, +}"# + ); + + unsafe { + left.free(&mut references); + } + } + + #[test] + fn test_join_roots_3() { + let mut references = References::::default(); + let mut left = gen(&mut references, 1..=14); + let right = gen(&mut references, 101..=161); + + unsafe { left.join(&mut references, right) }; + + assert_eq!( + TreeDisplay(&left).to_string().as_str(), + r#"height: 3, length: 75, { + 38: { + 7: [1, 2, 3, 4, 5, 6, 7], + 7: [8, 9, 10, 11, 12, 13, 14], + 6: [101, 102, 103, 104, 105, 106], + 6: [107, 108, 109, 110, 111, 112], + 6: [113, 114, 115, 116, 117, 118], + 6: [119, 120, 121, 122, 123, 124], + }, + 37: { + 6: [125, 126, 127, 128, 129, 130], + 6: [131, 132, 133, 134, 135, 136], + 6: [137, 138, 139, 140, 141, 142], + 6: [143, 144, 145, 146, 147, 148], + 6: [149, 150, 151, 152, 153, 154], + 7: [155, 156, 157, 158, 159, 160, 161], + }, +}"# + ); + + unsafe { + left.free(&mut references); + } + } + + #[test] + fn test_join_right_1() { + let mut references = References::::default(); + let mut left = gen(&mut references, 1..=70); + let right = gen(&mut references, 101..=103); + + unsafe { left.join(&mut references, right) }; + + assert_eq!( + TreeDisplay(&left).to_string().as_str(), + r#"height: 2, length: 73, { + 6: [1, 2, 3, 4, 5, 6], + 6: [7, 8, 9, 10, 11, 12], + 6: [13, 14, 15, 16, 17, 18], + 6: [19, 20, 21, 22, 23, 24], + 6: [25, 26, 27, 28, 29, 30], + 6: [31, 32, 33, 34, 35, 36], + 6: [37, 38, 39, 40, 41, 42], + 7: [43, 44, 45, 46, 47, 48, 49], + 7: [50, 51, 52, 53, 54, 55, 56], + 7: [57, 58, 59, 60, 61, 62, 63], + 10: [64, 65, 66, 67, 68, 69, 70, 101, 102, 103], +}"# + ); + + unsafe { + left.free(&mut references); + } + } + + #[test] + fn test_join_right_2() { + let mut references = References::::default(); + let mut left = gen(&mut references, 1..=65); + let right = gen(&mut references, 101..=105); + + unsafe { left.join(&mut references, right) }; + + assert_eq!( + TreeDisplay(&left).to_string().as_str(), + r#"height: 2, length: 70, { + 6: [1, 2, 3, 4, 5, 6], + 6: [7, 8, 9, 10, 11, 12], + 6: [13, 14, 15, 16, 17, 18], + 6: [19, 20, 21, 22, 23, 24], + 6: [25, 26, 27, 28, 29, 30], + 7: [31, 32, 33, 34, 35, 36, 37], + 7: [38, 39, 40, 41, 42, 43, 44], + 7: [45, 46, 47, 48, 49, 50, 51], + 7: [52, 53, 54, 55, 56, 57, 58], + 6: [59, 60, 61, 62, 63, 64], + 6: [65, 101, 102, 103, 104, 105], +}"# + ); + + unsafe { + left.free(&mut references); + } + } + + #[test] + fn test_join_right_3() { + let mut references = References::::default(); + let mut left = gen(&mut references, 1..=70); + let right = gen(&mut references, 101..=105); + + unsafe { left.join(&mut references, right) }; + + assert_eq!( + TreeDisplay(&left).to_string().as_str(), + r#"height: 3, length: 75, { + 36: { + 6: [1, 2, 3, 4, 5, 6], + 6: [7, 8, 9, 10, 11, 12], + 6: [13, 14, 15, 16, 17, 18], + 6: [19, 20, 21, 22, 23, 24], + 6: [25, 26, 27, 28, 29, 30], + 6: [31, 32, 33, 34, 35, 36], + }, + 39: { + 6: [37, 38, 39, 40, 41, 42], + 7: [43, 44, 45, 46, 47, 48, 49], + 7: [50, 51, 52, 53, 54, 55, 56], + 7: [57, 58, 59, 60, 61, 62, 63], + 6: [64, 65, 66, 67, 68, 69], + 6: [70, 101, 102, 103, 104, 105], + }, +}"# + ); + + unsafe { + left.free(&mut references); + } + } + + #[test] + fn test_join_left_1() { + let mut references = References::::default(); + let mut left = gen(&mut references, 101..=106); + let right = gen(&mut references, 1..=70); + + unsafe { left.join(&mut references, right) }; + + assert_eq!( + TreeDisplay(&left).to_string().as_str(), + r#"height: 3, length: 76, { + 36: { + 6: [101, 102, 103, 104, 105, 106], + 6: [1, 2, 3, 4, 5, 6], + 6: [7, 8, 9, 10, 11, 12], + 6: [13, 14, 15, 16, 17, 18], + 6: [19, 20, 21, 22, 23, 24], + 6: [25, 26, 27, 28, 29, 30], + }, + 40: { + 6: [31, 32, 33, 34, 35, 36], + 6: [37, 38, 39, 40, 41, 42], + 7: [43, 44, 45, 46, 47, 48, 49], + 7: [50, 51, 52, 53, 54, 55, 56], + 7: [57, 58, 59, 60, 61, 62, 63], + 7: [64, 65, 66, 67, 68, 69, 70], + }, +}"# + ); + + unsafe { + left.free(&mut references); + } + } + + #[cfg(not(debug_assertions))] + #[test] + fn test_tree_release() { + let mut references = References::::default(); + + let empty = Tree::::default(); + + assert!(empty.lookup(&mut 0).is_dangling()); + assert!(empty.lookup(&mut 10).is_dangling()); + + for high in 0..4 { + for low in 1..20 { + let child_count = high * 1000 + low; + let mut tree = gen(&mut references, 1..=child_count); + + check_tree_structure(&tree); + check_tree_data(&tree, 1); + + let length = tree.length(); + + for site in 1..length { + let chunk_ref = { + let mut site = site; + + let chunk_ref = tree.lookup(&mut site); + + assert_eq!(site, 0); + + chunk_ref + }; + + let start = unsafe { chunk_ref.token().0 }; + + let right = unsafe { tree.split(&mut references, chunk_ref) }; + + assert_eq!(tree.length(), site); + assert_eq!(right.length(), length - site); + + check_tree_structure(&tree); + check_tree_data(&tree, 1); + + check_tree_structure(&right); + check_tree_data(&right, start); + + unsafe { tree.join(&mut references, right) }; + + check_tree_structure(&tree); + check_tree_data(&tree, 1); + + assert_eq!(tree.length(), length); + } + + unsafe { tree.free(&mut references) } + } + } + } + + fn gen( + references: &mut References, + range: RangeInclusive, + ) -> Tree { + let count = range.end() - range.start() + 1; + + let mut spans = Vec::with_capacity(count); + let mut strings = Vec::with_capacity(count); + let mut tokens = Vec::with_capacity(count); + + for index in range { + spans.push(1); + strings.push(index.to_string()); + tokens.push(TestToken(index)); + } + + unsafe { + Tree::from_chunks( + references, + count, + spans.into_iter(), + strings.into_iter(), + tokens.into_iter(), + ) + } + } + + #[cfg(not(debug_assertions))] + fn check_tree_structure(tree: &Tree) { + fn check_page(page_variant: &ItemRefVariant, outer_span: Length) { + let page = unsafe { page_variant.as_page_ref().as_ref() }; + + assert!(page.occupied >= Page::::BRANCHING); + + assert!(!page.parent.is_dangling()); + + let mut inner_span = 0; + + for index in 0..page.occupied { + let child_span = page.spans[index]; + + inner_span += child_span; + } + + assert_eq!(outer_span, inner_span); + } + + fn check_branch( + branch_variant: &ItemRefVariant, + depth: Height, + outer_span: Length, + ) { + let branch = unsafe { branch_variant.as_branch_ref::<()>().as_ref() }; + + assert!(branch.inner.occupied >= Branch::<(), TestNode>::BRANCHING); + + assert!(!branch.inner.parent.is_dangling()); + + let mut inner_span = 0; + + if depth > 2 { + for index in 0..branch.inner.occupied { + let child_span = branch.inner.spans[index]; + + check_branch(&branch.inner.children[index], depth - 1, child_span); + + inner_span += child_span; + } + + assert_eq!(outer_span, inner_span); + + return; + } + + for index in 0..branch.inner.occupied { + let child_span = branch.inner.spans[index]; + + check_page(&branch.inner.children[index], child_span); + + inner_span += child_span; + } + + assert_eq!(outer_span, inner_span); + } + + match tree.height { + 0 => (), + + 1 => { + let root = unsafe { tree.root.as_page_ref().as_ref() }; + + assert!(root.parent.is_dangling()); + + let mut inner_span = 0; + + for index in 0..root.occupied { + let child_span = root.spans[index]; + + inner_span += child_span; + } + + assert_eq!(tree.length, inner_span); + } + + 2 => { + let root = unsafe { tree.root.as_branch_ref::<()>().as_ref() }; + + assert!(root.inner.parent.is_dangling()); + + let mut inner_span = 0; + + for index in 0..root.inner.occupied { + let child_span = root.inner.spans[index]; + + check_page(&root.inner.children[index], child_span); + + inner_span += child_span; + } + + assert_eq!(tree.length, inner_span); + } + + _ => { + let root = unsafe { tree.root.as_branch_ref::<()>().as_ref() }; + + assert!(root.inner.parent.is_dangling()); + + let mut inner_span = 0; + + for index in 0..root.inner.occupied { + let child_span = root.inner.spans[index]; + + check_branch(&root.inner.children[index], tree.height - 1, child_span); + + inner_span += child_span; + } + + assert_eq!(tree.length, inner_span); + } + } + } + + #[cfg(not(debug_assertions))] + fn check_tree_data(tree: &Tree, start: Site) { + if tree.height > 0 { + let mut first = tree.first(); + let mut last = tree.last(); + + assert!(!first.is_dangling()); + assert!(unsafe { first.is_first() }); + + assert!(!last.is_dangling()); + assert!(unsafe { last.is_last() }); + + match tree.length > 1 { + true => { + assert!(!unsafe { first.is_last() }); + assert!(!unsafe { last.is_first() }); + } + + false => { + assert!(unsafe { first.is_last() }); + assert!(unsafe { last.is_first() }); + } + } + + for index in 0..tree.length { + assert_eq!(*unsafe { first.span() }, 1); + assert_eq!( + unsafe { first.string() }, + format!("{}", index + start).as_str(), + ); + assert_eq!(unsafe { first.token() }, &TestToken(index + start)); + + assert_eq!(*unsafe { last.span() }, 1); + assert_eq!( + unsafe { last.string() }, + format!("{}", (tree.length + start - index - 1)).as_str(), + ); + assert_eq!( + unsafe { last.token() }, + &TestToken(tree.length + start - index - 1), + ); + + unsafe { first.next() }; + unsafe { last.back() }; + } + + assert!(first.is_dangling()); + assert!(last.is_dangling()); + } + + for site in 0..tree.length { + let chunk_ref = { + let mut site = site; + + let chunk_ref = tree.lookup(&mut site); + + assert_eq!(site, 0); + + chunk_ref + }; + + assert_eq!( + unsafe { chunk_ref.string() }, + format!("{}", (site + start)).as_str(), + ); + } + } + + struct TestNode; + + impl Node for TestNode { + type Token = TestToken; + type Error = SyntaxError; + + fn new<'code>( + _rule: SyntaxRule, + _session: &mut impl SyntaxSession<'code, Node = Self>, + ) -> Self { + unimplemented!() + } + } + + #[derive(PartialEq, Eq, Debug)] + struct TestToken(usize); + + impl Token for TestToken { + fn new(_session: &mut impl LexisSession) -> Self { + unimplemented!() + } + } + + struct TreeDisplay<'a, N: Node>(&'a Tree); + + impl<'a, N: Node> Display for TreeDisplay<'a, N> { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + struct PageDisplay<'a, N: Node> { + page: &'a ItemRefVariant, + } + + impl<'a, N: Node> Debug for PageDisplay<'a, N> { + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + let page = unsafe { self.page.as_page_ref().as_ref() }; + + let mut list = formatter.debug_list(); + + for index in 0..page.occupied { + let string = unsafe { page.strings[index].assume_init_ref() }; + + list.entry(&format_args!("{}", string)); + } + + list.finish() + } + } + + struct BranchDisplay<'a, N: Node> { + height: Height, + branch: &'a ItemRefVariant, + } + + impl<'a, N: Node> Debug for BranchDisplay<'a, N> { + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + match self.height { + 0 => unreachable!("Incorrect height"), + + 1 => { + let branch = + unsafe { self.branch.as_branch_ref::().as_ref() }; + + let mut list = formatter.debug_map(); + + for index in 0..branch.inner.occupied { + let span = branch.inner.spans[index]; + let page = unsafe { branch.inner.children[index] }; + + list.entry( + &span, + &format_args!("{:?}", PageDisplay { page: &page }), + ); + } + + list.finish() + } + + _ => { + let branch = + unsafe { self.branch.as_branch_ref::().as_ref() }; + + let mut list = formatter.debug_map(); + + for index in 0..branch.inner.occupied { + let span = branch.inner.spans[index]; + let branch = unsafe { branch.inner.children[index] }; + + list.entry( + &span, + &BranchDisplay { + height: self.height - 1, + branch: &branch, + }, + ); + } + + list.finish() + } + } + } + } + + formatter.write_str(&format!( + "height: {}, length: {}", + &self.0.height, &self.0.length + ))?; + + match self.0.height { + 0 => (), + + 1 => { + formatter.write_str(&format!(", {:#?}", PageDisplay { page: &self.0.root }))?; + } + + _ => { + formatter.write_str(&format!( + ", {:#?}", + BranchDisplay { + height: self.0.height - 1, + branch: &self.0.root + } + ))?; + } + } + + Ok(()) + } + } +} diff --git a/work/crates/main/src/incremental/storage/nesting.rs b/work/crates/main/src/incremental/storage/nesting.rs new file mode 100644 index 0000000..991e3ed --- /dev/null +++ b/work/crates/main/src/incremental/storage/nesting.rs @@ -0,0 +1,116 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + incremental::storage::{branch::Branch, child::ChildCount, item::Item, page::Page}, + std::*, + syntax::Node, +}; + +pub(super) type Height = usize; + +// Safety: `Layer` is implemented for zero-sized and 'static types only. +pub(super) unsafe trait Layer { + fn branching() -> ChildCount; + + fn descriptor() -> &'static LayerDescriptor; +} + +unsafe impl Layer for () { + #[inline(always)] + fn branching() -> ChildCount { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. An attempt to get unit layer branching value."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + + #[inline(always)] + fn descriptor() -> &'static LayerDescriptor { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. An attempt to get unit layer description."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } +} + +pub(super) struct BranchLayer; + +unsafe impl Layer for BranchLayer { + #[inline(always)] + fn branching() -> ChildCount { + Branch::::BRANCHING + } + + #[inline(always)] + fn descriptor() -> &'static LayerDescriptor { + static BRANCH: LayerDescriptor = LayerDescriptor::Branch; + + &BRANCH + } +} + +pub(super) struct PageLayer; + +unsafe impl Layer for PageLayer { + #[inline(always)] + fn branching() -> ChildCount { + Page::::BRANCHING + } + + #[inline(always)] + fn descriptor() -> &'static LayerDescriptor { + static PAGE: LayerDescriptor = LayerDescriptor::Page; + + &PAGE + } +} + +pub(super) enum LayerDescriptor { + Branch, + Page, +} diff --git a/work/crates/main/src/incremental/storage/page.rs b/work/crates/main/src/incremental/storage/page.rs new file mode 100644 index 0000000..178c23f --- /dev/null +++ b/work/crates/main/src/incremental/storage/page.rs @@ -0,0 +1,895 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::RefIndex, + incremental::storage::{ + branch::BranchRef, + cache::CacheEntry, + child::{ChildCount, ChildIndex, ChildRefIndex}, + item::{Item, ItemRef, ItemRefVariant, Split}, + nesting::PageLayer, + references::References, + utils::{array_copy_to, array_shift, capacity}, + }, + lexis::Length, + std::*, + syntax::Node, +}; + +const BRANCHING: ChildCount = 6; + +pub(super) struct Page { + pub(super) parent: ChildRefIndex, + pub(super) previous: Option>, + pub(super) next: Option>, + pub(super) occupied: ChildCount, + pub(super) spans: [Length; capacity(BRANCHING)], + pub(super) strings: [MaybeUninit; capacity(BRANCHING)], + pub(super) tokens: [MaybeUninit; capacity(BRANCHING)], + pub(super) chunks: [RefIndex; capacity(BRANCHING)], + pub(super) clusters: [MaybeUninit>>; capacity(BRANCHING)], +} + +impl Item for Page { + const BRANCHING: ChildCount = BRANCHING; + + type Node = N; + + #[inline(always)] + fn occupied(&self) -> ChildCount { + self.occupied + } + + #[inline(always)] + unsafe fn copy_to( + &mut self, + to: &mut Self, + source: ChildCount, + destination: ChildCount, + count: ChildCount, + ) { + debug_assert!( + source + count <= self.occupied, + "Internal error. An attempt to copy non occupied data in Page.", + ); + + unsafe { array_copy_to(&mut self.spans, &mut to.spans, source, destination, count) }; + + unsafe { + array_copy_to( + &mut self.strings, + &mut to.strings, + source, + destination, + count, + ) + }; + + unsafe { array_copy_to(&mut self.tokens, &mut to.tokens, source, destination, count) }; + + unsafe { array_copy_to(&mut self.chunks, &mut to.chunks, source, destination, count) }; + + unsafe { + array_copy_to( + &mut self.clusters, + &mut to.clusters, + source, + destination, + count, + ) + }; + } + + #[inline(always)] + unsafe fn inflate(&mut self, from: ChildIndex, count: ChildCount) { + debug_assert!( + from <= self.occupied, + "Internal error. An attempt to inflate from out of bounds child in Page." + ); + debug_assert!( + count + self.occupied <= capacity(Self::BRANCHING), + "Internal error. An attempt to inflate with overflow in Page." + ); + debug_assert!( + count > 0, + "Internal error. An attempt to inflate of empty range in Page." + ); + + if from < self.occupied { + unsafe { array_shift(&mut self.spans, from, from + count, self.occupied - from) }; + unsafe { array_shift(&mut self.strings, from, from + count, self.occupied - from) }; + unsafe { array_shift(&mut self.tokens, from, from + count, self.occupied - from) }; + unsafe { array_shift(&mut self.chunks, from, from + count, self.occupied - from) }; + unsafe { array_shift(&mut self.clusters, from, from + count, self.occupied - from) }; + } + + self.occupied += count; + } + + #[inline(always)] + unsafe fn deflate(&mut self, from: ChildIndex, count: ChildCount) -> bool { + debug_assert!( + from < self.occupied, + "Internal error. An attempt to deflate from non occupied child in Page." + ); + debug_assert!( + from + count <= self.occupied, + "Internal error. An attempt to deflate with overflow in Page." + ); + debug_assert!( + count > 0, + "Internal error. An attempt to deflate of empty range." + ); + + if from + count < self.occupied { + unsafe { + array_shift( + &mut self.spans, + from + count, + from, + self.occupied - from - count, + ) + }; + unsafe { + array_shift( + &mut self.strings, + from + count, + from, + self.occupied - from - count, + ) + }; + unsafe { + array_shift( + &mut self.tokens, + from + count, + from, + self.occupied - from - count, + ) + }; + unsafe { + array_shift( + &mut self.chunks, + from + count, + from, + self.occupied - from - count, + ) + }; + unsafe { + array_shift( + &mut self.clusters, + from + count, + from, + self.occupied - from - count, + ) + }; + } + + self.occupied -= count; + + self.occupied >= Self::BRANCHING + } +} + +impl Page { + #[inline(always)] + pub(super) fn new(occupied: ChildCount) -> PageRef { + debug_assert!( + occupied > 0, + "Internal error. An attempt to create Page with zero occupied values." + ); + + debug_assert!( + occupied <= capacity(Self::BRANCHING), + "Internal error. An attempt to create Page with occupied value exceeding capacity." + ); + + let page = Self { + parent: ChildRefIndex::dangling(), + previous: None, + next: None, + occupied, + spans: Default::default(), + strings: unsafe { MaybeUninit::uninit().assume_init() }, + tokens: unsafe { MaybeUninit::uninit().assume_init() }, + chunks: Default::default(), + clusters: unsafe { MaybeUninit::uninit().assume_init() }, + }; + + let pointer = unsafe { NonNull::new_unchecked(Box::leak(Box::new(page))) }; + + PageRef { pointer } + } + + // Safety: + // 1. All references belong to `references` instance. + pub(super) unsafe fn free(mut self, references: &mut References) -> ChildCount { + for index in 0..self.occupied { + let string = unsafe { self.strings.get_unchecked_mut(index) }; + + unsafe { string.assume_init_drop() }; + + let token = unsafe { self.tokens.get_unchecked_mut(index) }; + + unsafe { token.assume_init_drop() }; + + let chunk_index = *unsafe { self.chunks.get_unchecked(index) }; + + unsafe { references.chunks.remove_unchecked(chunk_index) }; + + let cache_entry = + take(unsafe { self.clusters.get_unchecked_mut(index).assume_init_mut() }); + + if let Some(cache_entry) = cache_entry { + unsafe { references.clusters.remove_unchecked(cache_entry.ref_index) }; + } + } + + self.occupied + } +} + +#[repr(transparent)] +pub(super) struct PageRef { + pointer: NonNull>, +} + +impl Clone for PageRef { + #[inline(always)] + fn clone(&self) -> Self { + *self + } +} + +impl Copy for PageRef {} + +impl PartialEq for PageRef { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + self.pointer == other.pointer + } +} + +impl Eq for PageRef {} + +impl ItemRef<(), N> for PageRef { + type SelfLayer = PageLayer; + + type Item = Page; + + #[inline(always)] + fn dangling() -> Self { + Self { + pointer: NonNull::dangling(), + } + } + + #[inline(always)] + unsafe fn as_ref(&self) -> &Self::Item { + unsafe { self.pointer.as_ref() } + } + + #[inline(always)] + unsafe fn as_mut(&mut self) -> &mut Self::Item { + unsafe { self.pointer.as_mut() } + } + + #[inline(always)] + unsafe fn into_variant(self) -> ItemRefVariant { + ItemRefVariant::from_page(self) + } + + #[inline(always)] + unsafe fn into_owned(self) -> Box { + unsafe { Box::from_raw(self.pointer.as_ptr()) } + } + + #[inline(always)] + unsafe fn calculate_length(&self) -> Length { + let page = unsafe { self.as_ref() }; + + let mut length = 0; + + for index in 0..page.occupied { + length += unsafe { page.spans.get_unchecked(index) }; + } + + length + } + + #[inline(always)] + unsafe fn parent(&self) -> &ChildRefIndex { + unsafe { &self.as_ref().parent } + } + + #[inline(always)] + unsafe fn set_parent(&mut self, parent: ChildRefIndex) { + unsafe { self.as_mut().parent = parent }; + } + + #[inline(always)] + unsafe fn parent_mut(&mut self) -> &mut BranchRef { + let parent_ref_index = unsafe { &mut self.as_mut().parent }; + + debug_assert!( + !parent_ref_index.is_dangling(), + "Internal error. An attempt to get parent from root.", + ); + + unsafe { parent_ref_index.item.as_branch_mut() } + } + + unsafe fn update_children( + &mut self, + references: &mut References, + from: ChildIndex, + count: ChildCount, + ) -> Length { + let self_variant = self.into_variant(); + + let page = unsafe { self.as_mut() }; + + debug_assert!( + from + count <= page.occupied, + "Internal error. An attempt to update references in non occupied data in Page.", + ); + + let mut length = 0; + + for index in from..(from + count) { + length += *unsafe { page.spans.get_unchecked(index) }; + + { + let chunk_index = *unsafe { page.chunks.get_unchecked(index) }; + let chunk_ref = unsafe { references.chunks.get_unchecked_mut(chunk_index) }; + + chunk_ref.item = self_variant; + chunk_ref.index = index; + } + + let cache_entry = unsafe { page.clusters.get_unchecked(index).assume_init_ref() }; + + if let Some(cache_entry) = cache_entry { + let cluster_ref = + unsafe { references.clusters.get_unchecked_mut(cache_entry.ref_index) }; + + cluster_ref.item = self_variant; + cluster_ref.index = index; + } + } + + length + } + + #[inline] + unsafe fn split( + &mut self, + references: &mut References, + _children_split: Split, + length: Length, + from: ChildIndex, + ) -> Split { + let mut parent_split = Split::dangling(); + + let occupied = unsafe { self.as_ref().occupied }; + + debug_assert!( + from < occupied, + "Internal error. Split at position out of bounds.", + ); + + match from == 0 { + true => { + parent_split.right_span = length; + parent_split.right_item = unsafe { self.into_variant() }; + + parent_split.left_span = 0; + } + + false => { + let left = unsafe { self.as_mut() }; + let mut right_ref = Page::new(occupied - from); + + match &mut left.next { + None => (), + + Some(next) => { + unsafe { PageRef::interconnect(&mut right_ref, next) }; + + left.next = None; + } + }; + + unsafe { left.copy_to(right_ref.as_mut(), from, 0, occupied - from) }; + left.occupied = from; + + parent_split.right_span = + unsafe { right_ref.update_children(references, 0, occupied - from) }; + parent_split.right_item = unsafe { right_ref.into_variant() }; + + parent_split.left_span = length - parent_split.right_span; + parent_split.left_item = unsafe { self.into_variant() }; + } + } + + parent_split + } +} + +impl PageRef { + // Safety: `left` and `right` are not dangling reference. + #[inline(always)] + pub(super) unsafe fn interconnect(left: &mut Self, right: &mut Self) { + unsafe { + left.as_mut().next = Some(*right); + } + + unsafe { + right.as_mut().previous = Some(*left); + } + } + + // Safety: `self` is not a dangling reference. + #[inline(always)] + pub(super) unsafe fn disconnect_left(&mut self) { + unsafe { + self.as_mut().previous = None; + } + } + + // Safety: `self` is not a dangling reference. + #[inline(always)] + pub(super) unsafe fn disconnect_right(&mut self) { + unsafe { + self.as_mut().next = None; + } + } + + // Safety: + // 1. `self` is not a dangling reference. + // 2. `'a` does not outlive Page instance. + #[inline(always)] + pub(super) unsafe fn as_external_ref<'a>(&self) -> &'a Page { + unsafe { self.pointer.as_ref() } + } + + // Safety: + // 1. `self` is not a dangling reference. + // 2. `'a` does not outlive Page instance. + #[inline(always)] + pub(super) unsafe fn as_external_mut<'a>(&self) -> &'a mut Page { + let mut pointer = self.pointer; + + unsafe { pointer.as_mut() } + } + + // Safety: + // 1. `self` is not a dangling reference. + // 2. All references belong to `references` instance. + // 3. `from < self.occupied`. + // 4. `from + count <= self.occupied. + // 5. `count > 0` + // 6. `spans`, `strings` and `tokens` can produce at least `count` items. + #[inline] + pub(super) unsafe fn rewrite( + &mut self, + references: &mut References, + from: ChildIndex, + count: ChildCount, + spans: &mut impl Iterator, + strings: &mut impl Iterator, + tokens: &mut impl Iterator, + ) -> (Length, Length) { + let page = unsafe { self.as_mut() }; + + debug_assert!( + from < page.occupied, + "Internal error. An attempt to rewrite from non occupied child in Page." + ); + debug_assert!( + from + count <= page.occupied, + "Internal error. An attempt to rewrite with overflow in Page." + ); + debug_assert!( + count > 0, + "Internal error. An attempt to rewrite of empty range." + ); + + let mut dec = 0; + let mut inc = 0; + + references.chunks.commit(); + + for index in from..(from + count) { + debug_assert!( + index < capacity(Page::::BRANCHING), + "Internal error. Chunk index is out of bounds.", + ); + + let new_span = match spans.next() { + Some(span) => span, + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Spans iterator exceeded."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + debug_assert!(new_span > 0, "Internal error. Zero input span."); + + let new_string = match strings.next() { + Some(string) => string, + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Strings iterator exceeded."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + let new_token = match tokens.next() { + Some(token) => token, + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Tokens iterator exceeded."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + let span = unsafe { page.spans.get_unchecked_mut(index) }; + let string = unsafe { page.strings.get_unchecked_mut(index).assume_init_mut() }; + let token = unsafe { page.tokens.get_unchecked_mut(index).assume_init_mut() }; + let chunk_index = unsafe { *page.chunks.get_unchecked(index) }; + let cache_entry = + take(unsafe { page.clusters.get_unchecked_mut(index).assume_init_mut() }); + + dec += *span; + inc += new_span; + + *span = new_span; + let _ = replace(string, new_string); + let _ = replace(token, new_token); + + unsafe { references.chunks.upgrade(chunk_index) }; + + if let Some(cache_entry) = cache_entry { + unsafe { references.clusters.remove_unchecked(cache_entry.ref_index) } + } + } + + (dec, inc) + } + + // Safety: + // 1. `self` is not a dangling reference. + // 2. All references belong to `references` instance. + // 3. `from < self.occupied`. + // 4. `from + count <= self.occupied. + // 5. `count > 0` + #[inline] + pub(super) unsafe fn remove( + &mut self, + references: &mut References, + from: ChildIndex, + count: ChildCount, + ) -> Length { + let page = unsafe { self.as_mut() }; + + debug_assert!( + from < page.occupied, + "Internal error. An attempt to remove from non occupied child in Page." + ); + debug_assert!( + from + count <= page.occupied, + "Internal error. An attempt to remove with overflow in Page." + ); + debug_assert!( + count > 0, + "Internal error. An attempt to remove of empty range." + ); + + let mut length = 0; + + for index in from..(from + count) { + let span = unsafe { *page.spans.get_unchecked(index) }; + + unsafe { page.strings.get_unchecked_mut(index).assume_init_drop() }; + unsafe { page.tokens.get_unchecked_mut(index).assume_init_drop() }; + + let chunk_index = unsafe { *page.chunks.get_unchecked(index) }; + + unsafe { references.chunks.remove_unchecked(chunk_index) }; + + let cache_entry = + take(unsafe { page.clusters.get_unchecked_mut(index).assume_init_mut() }); + + if let Some(cache_entry) = cache_entry { + unsafe { references.clusters.remove_unchecked(cache_entry.ref_index) } + } + + length += span; + } + + if from + count < page.occupied { + unsafe { + array_shift( + &mut page.spans, + from + count, + from, + page.occupied - from - count, + ) + }; + unsafe { + array_shift( + &mut page.strings, + from + count, + from, + page.occupied - from - count, + ) + }; + unsafe { + array_shift( + &mut page.tokens, + from + count, + from, + page.occupied - from - count, + ) + }; + unsafe { + array_shift( + &mut page.chunks, + from + count, + from, + page.occupied - from - count, + ) + }; + unsafe { + array_shift( + &mut page.clusters, + from + count, + from, + page.occupied - from - count, + ) + }; + + for index in from..(page.occupied - count) { + { + let chunk_index = *unsafe { page.chunks.get_unchecked(index) }; + let chunk_ref = unsafe { references.chunks.get_unchecked_mut(chunk_index) }; + + chunk_ref.index = index; + } + + let cache_entry = unsafe { page.clusters.get_unchecked(index).assume_init_ref() }; + + if let Some(cache_entry) = cache_entry { + let cluster_ref = + unsafe { references.clusters.get_unchecked_mut(cache_entry.ref_index) }; + + cluster_ref.index = index; + } + } + } + + page.occupied -= count; + + length + } + + // Safety: + // 1. `self` is not a dangling reference. + // 2. All references belong to `references` instance. + // 3. `from <= self.occupied`. + // 4. `from + count <= self.occupied. + // 5. `count > 0` + // 6. `spans`, `strings` and `tokens` can produce at least `count` items. + #[inline] + pub(super) unsafe fn insert( + &mut self, + references: &mut References, + from: ChildIndex, + count: ChildCount, + spans: &mut impl Iterator, + strings: &mut impl Iterator, + tokens: &mut impl Iterator, + ) -> Length { + let self_ref_variant = unsafe { self.into_variant() }; + + let page = unsafe { self.as_mut() }; + + debug_assert!( + from <= page.occupied, + "Internal error. An attempt to insert from non occupied child in Page." + ); + debug_assert!( + from + count <= capacity(Page::::BRANCHING), + "Internal error. An attempt to insert with overflow in Page." + ); + debug_assert!( + count > 0, + "Internal error. An attempt to insert of empty range." + ); + + unsafe { + page.inflate(from, count); + } + + let mut length = 0; + + for index in from..(from + count) { + debug_assert!( + index < capacity(Page::::BRANCHING), + "Internal error. Chunk index is out of bounds.", + ); + + let new_span = match spans.next() { + Some(span) => span, + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Spans iterator exceeded."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + debug_assert!(new_span > 0, "Internal error. Zero input span."); + + let new_string = match strings.next() { + Some(string) => string, + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Strings iterator exceeded."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + let new_token = match tokens.next() { + Some(token) => token, + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Tokens iterator exceeded."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + length += new_span; + + unsafe { + *page.spans.get_unchecked_mut(index) = new_span; + } + + unsafe { + page.strings.get_unchecked_mut(index).write(new_string); + } + + unsafe { + page.tokens.get_unchecked_mut(index).write(new_token); + } + + unsafe { + *page.chunks.get_unchecked_mut(index) = + references.chunks.insert_index(ChildRefIndex { + item: self_ref_variant, + index, + }) + } + + unsafe { + page.clusters.get_unchecked_mut(index).write(None); + } + } + + for index in (from + count)..page.occupied { + { + let chunk_index = *unsafe { page.chunks.get_unchecked(index) }; + let chunk_ref = unsafe { references.chunks.get_unchecked_mut(chunk_index) }; + + chunk_ref.index = index; + } + + let cache_entry = unsafe { page.clusters.get_unchecked(index).assume_init_ref() }; + + if let Some(cache_entry) = cache_entry { + let cluster_ref = + unsafe { references.clusters.get_unchecked_mut(cache_entry.ref_index) }; + + cluster_ref.index = index; + } + } + + length + } +} + +pub(super) struct PageList { + pub(super) first: PageRef, + pub(super) last: PageRef, +} + +impl Clone for PageList { + #[inline(always)] + fn clone(&self) -> Self { + *self + } +} + +impl Copy for PageList {} + +impl PageList { + #[inline(always)] + pub(super) fn dangling() -> Self { + Self { + first: PageRef::dangling(), + last: PageRef::dangling(), + } + } +} diff --git a/work/crates/main/src/incremental/storage/references.rs b/work/crates/main/src/incremental/storage/references.rs new file mode 100644 index 0000000..5e46bd0 --- /dev/null +++ b/work/crates/main/src/incremental/storage/references.rs @@ -0,0 +1,78 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{arena::Repository, incremental::storage::child::ChildRefIndex, std::*, syntax::Node}; + +pub(crate) struct References { + pub(super) chunks: Repository>, + pub(super) clusters: Repository>, +} + +impl Default for References { + #[inline(always)] + fn default() -> Self { + Self { + chunks: Default::default(), + clusters: Default::default(), + } + } +} + +impl References { + #[inline(always)] + pub(crate) fn with_capacity(capacity: usize) -> Self { + Self { + chunks: Repository::with_capacity(capacity), + clusters: Default::default(), + } + } + + #[inline(always)] + pub(crate) fn chunks(&self) -> &Repository> { + &self.chunks + } + + #[inline(always)] + pub(crate) fn clusters(&self) -> &Repository> { + &self.clusters + } + + #[inline(always)] + pub(crate) fn clusters_mut(&mut self) -> &mut Repository> { + &mut self.clusters + } +} diff --git a/work/crates/main/src/incremental/storage/tree.rs b/work/crates/main/src/incremental/storage/tree.rs new file mode 100644 index 0000000..4f11581 --- /dev/null +++ b/work/crates/main/src/incremental/storage/tree.rs @@ -0,0 +1,1444 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + incremental::storage::{ + branch::Branch, + child::{ChildIndex, ChildRefIndex}, + item::{Item, ItemRef, ItemRefVariant, Split}, + nesting::{BranchLayer, Height, PageLayer}, + page::{Page, PageList, PageRef}, + references::References, + utils::{capacity, Spread}, + }, + lexis::{Length, Site, TokenCount}, + std::*, + syntax::Node, +}; + +pub(crate) struct Tree { + pub(super) length: Length, + pub(super) height: Height, + pub(super) root: ItemRefVariant, + pub(super) pages: PageList, +} + +impl Default for Tree { + #[inline(always)] + fn default() -> Self { + Self { + length: 0, + height: 0, + root: ItemRefVariant::dangling(), + pages: PageList::dangling(), + } + } +} + +impl Drop for Tree { + fn drop(&mut self) { + #[cfg(feature = "std")] + { + if self.height != 0 { + println!("Internal error. Document memory leak."); + } + } + + #[cfg(not(feature = "std"))] + { + assert_eq!(self.height, 0, "Internal error. Document memory leak."); + } + } +} + +impl Tree { + //Safety: + // 1. `spans`, `strings` and `tokens` produce the same number of items equal to `count`. + // 2. All `spans` values are positive integers. + pub(crate) unsafe fn from_chunks( + references: &mut References, + count: TokenCount, + mut spans: impl Iterator, + mut strings: impl Iterator, + mut tokens: impl Iterator, + ) -> Self { + if count == 0 { + return Self::default(); + } + + let mut height = 1; + let mut length = 0; + + let mut spread = Spread::new::>(count); + let mut first_page = None; + let mut last_page = None; + let mut layer_size = spread.layer_size(); + + debug_assert_eq!( + count, + spread.total_items(), + "Internal error. Partition failure.", + ); + + loop { + let index = spread.advance(); + + if index == ChildIndex::MAX { + break; + } + + let span = match spans.next() { + Some(span) => span, + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Spans iterator exceeded."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + debug_assert!(span > 0, "Internal error. Zero input span."); + + let string = match strings.next() { + Some(string) => string, + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Strings iterator exceeded."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + let token = match tokens.next() { + Some(token) => token, + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Tokens iterator exceeded."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + length += span; + + if index == 0 { + let mut new_page_ref = Page::new(spread.items); + + if let Some(mut previous_page) = replace(&mut last_page, Some(new_page_ref)) { + unsafe { PageRef::interconnect(&mut previous_page, &mut new_page_ref) }; + } + + if first_page.is_none() { + first_page = Some(new_page_ref); + } + } + + match &mut last_page { + Some(page_ref) => { + let reference = references.chunks.insert_index(ChildRefIndex { + item: unsafe { page_ref.into_variant() }, + index, + }); + + let page = unsafe { page_ref.as_mut() }; + + debug_assert!(index < page.occupied, "Internal error. Partition failure."); + + unsafe { *page.spans.get_unchecked_mut(index) = span }; + unsafe { page.strings.get_unchecked_mut(index).write(string) }; + unsafe { page.tokens.get_unchecked_mut(index).write(token) }; + unsafe { *page.chunks.get_unchecked_mut(index) = reference }; + unsafe { page.clusters.get_unchecked_mut(index).write(None) }; + } + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Missing last page."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + + let mut first_item = None; + let mut last_item = None; + + if layer_size > 1 { + height += 1; + + let mut next = first_page; + + spread = Spread::new::>(layer_size); + layer_size = spread.layer_size(); + + loop { + let index = spread.advance(); + + if index == ChildIndex::MAX { + break; + } + + if index == 0 { + let new_branch_ref = Branch::::new(spread.items); + + let new_branch_variant = unsafe { new_branch_ref.into_variant() }; + + if let Some(mut previous_branch) = + replace(&mut last_item, Some(new_branch_variant)) + { + unsafe { + previous_branch + .as_branch_mut::() + .as_mut() + .inner + .parent + .item = new_branch_variant; + } + } + + if first_item.is_none() { + first_item = Some(new_branch_variant); + } + } + + match &mut last_item { + Some(last) => { + let mut child_ref = match &next { + Some(page) => { + let current = *page; + + next = unsafe { current.as_ref().next }; + + current + } + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Missing last branch."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + unsafe { child_ref.as_mut().parent = ChildRefIndex { item: *last, index } }; + + let branch = unsafe { last.as_branch_mut::().as_mut() }; + + debug_assert!( + index < branch.inner.occupied, + "Internal error. Partition failure.", + ); + + let child_span = unsafe { child_ref.calculate_length() }; + + unsafe { *branch.inner.spans.get_unchecked_mut(index) = child_span }; + unsafe { + *branch.inner.children.get_unchecked_mut(index) = + child_ref.into_variant() + }; + } + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Missing last branch."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + } + + while layer_size > 1 { + height += 1; + + let mut next = match first_item { + Some(first_item) => first_item, + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Missing layer first item."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + first_item = None; + last_item = None; + + spread = Spread::new::>(layer_size); + layer_size = spread.layer_size(); + + loop { + let index = spread.advance(); + + if index == ChildIndex::MAX { + break; + } + + if index == 0 { + let new_branch_ref = Branch::::new(spread.items); + + let new_branch_variant = unsafe { new_branch_ref.into_variant() }; + + if let Some(mut previous_branch) = + replace(&mut last_item, Some(new_branch_variant)) + { + unsafe { + previous_branch + .as_branch_mut::() + .as_mut() + .inner + .parent + .item = new_branch_variant; + } + } + + if first_item.is_none() { + first_item = Some(new_branch_variant); + } + } + + match &mut last_item { + Some(last) => { + let mut child_variant = next; + + let child_ref = { + let current_ref = unsafe { child_variant.as_branch_mut::<()>() }; + + next = unsafe { current_ref.as_ref().inner.parent.item }; + + current_ref + }; + + unsafe { + child_ref.as_mut().inner.parent = ChildRefIndex { item: *last, index } + }; + + let branch = unsafe { last.as_branch_mut::().as_mut() }; + + debug_assert!( + index < branch.inner.occupied, + "Internal error. Partition failure.", + ); + + let child_span = unsafe { child_ref.calculate_length() }; + + unsafe { *branch.inner.spans.get_unchecked_mut(index) = child_span }; + unsafe { *branch.inner.children.get_unchecked_mut(index) = child_variant }; + } + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Missing last branch."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + } + + let first_page = match first_page { + Some(first_page) => first_page, + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Missing first page."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + let last_page = match last_page { + Some(last_page) => last_page, + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Missing last page."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + }; + + Self { + length, + height, + root: match first_item { + Some(root) => root, + None => unsafe { first_page.into_variant() }, + }, + pages: PageList { + first: first_page, + last: last_page, + }, + } + } + + #[inline] + pub(crate) fn length(&self) -> Length { + self.length + } + + #[inline(always)] + pub(crate) fn first(&self) -> ChildRefIndex { + if self.height == 0 { + return ChildRefIndex::dangling(); + } + + ChildRefIndex { + item: unsafe { self.pages.first.into_variant() }, + index: 0, + } + } + + #[inline(always)] + pub(crate) fn last(&self) -> ChildRefIndex { + if self.height == 0 { + return ChildRefIndex::dangling(); + } + + let last_page = unsafe { self.pages.last.as_ref() }; + + debug_assert!(last_page.occupied > 0, "Internal error. Empty page."); + + ChildRefIndex { + item: unsafe { self.pages.last.into_variant() }, + index: last_page.occupied - 1, + } + } + + #[inline] + pub(crate) fn lookup(&self, site: &mut Site) -> ChildRefIndex { + if *site >= self.length { + *site = 0; + return ChildRefIndex::dangling(); + } + + debug_assert!( + self.height > 0, + "Internal error. An attempt to search in empty Tree.", + ); + + let mut item = self.root; + let mut depth = self.height; + + while depth > 1 { + depth -= 1; + + let branch = unsafe { item.as_branch_ref::<()>().as_ref() }; + let mut index = 0; + + loop { + debug_assert!( + index < branch.inner.occupied, + "Internal error. Branch span inconsistency.", + ); + + let span = unsafe { *branch.inner.spans.get_unchecked(index) }; + + if span <= *site { + *site -= span; + index += 1; + continue; + } + + item = unsafe { *branch.inner.children.get_unchecked(index) }; + break; + } + } + + let page = unsafe { item.as_page_ref().as_ref() }; + let mut index = 0; + + loop { + debug_assert!( + index < page.occupied, + "Internal error. Page span inconsistency.", + ); + + let span = unsafe { *page.spans.get_unchecked(index) }; + + if span <= *site { + *site -= span; + index += 1; + continue; + } + + break; + } + + ChildRefIndex { item, index } + } + + // Safety: + // 1. `chunk_ref`(possibly dangling) refers valid data inside this instance. + #[inline] + pub(crate) unsafe fn site_of(&self, chunk_ref: &ChildRefIndex) -> Site { + if chunk_ref.is_dangling() { + return self.length; + } + + debug_assert!(self.height > 0, "Internal error. Empty tree."); + + let page = unsafe { chunk_ref.item.as_page_ref().as_ref() }; + + let mut site = 0; + let mut index = chunk_ref.index; + + while index > 0 { + debug_assert!( + index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + index -= 1; + + site += unsafe { *page.spans.get_unchecked(index) }; + } + + let mut depth = self.height; + let mut branch_ref = &page.parent; + + while depth > 1 { + depth -= 1; + + debug_assert!( + !branch_ref.is_dangling(), + "Internal error. Dangling parent ref.", + ); + + let branch = unsafe { branch_ref.item.as_branch_ref::<()>().as_ref() }; + + index = branch_ref.index; + + while index > 0 { + debug_assert!( + index < branch.inner.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + index -= 1; + + site += unsafe { *branch.inner.spans.get_unchecked(index) }; + } + + branch_ref = &branch.inner.parent; + } + + site + } + + // Safety: + // 1. `chunk_ref` refers valid data inside this instance. + #[inline(always)] + pub(crate) unsafe fn is_writeable( + &self, + chunk_ref: &ChildRefIndex, + remove: TokenCount, + insert: TokenCount, + ) -> bool { + debug_assert!( + !chunk_ref.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page = unsafe { chunk_ref.item.as_page_ref().as_external_ref() }; + + debug_assert!( + chunk_ref.index < page.occupied, + "Internal error. ChildRefIndex index out of bounds.", + ); + + if page.occupied - chunk_ref.index < remove { + return false; + } + + match self.height { + 0 => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Incorrect height."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + + 1 => { + page.occupied + insert >= remove + && page.occupied + insert <= capacity(Page::::BRANCHING) + remove + } + + _ => { + page.occupied + insert >= Page::::BRANCHING + remove + && page.occupied + insert <= capacity(Page::::BRANCHING) + remove + } + } + } + + // Safety: + // 1. All references belong to `references` instance. + // 2. `chunk_ref` is not dangling and refers valid data inside this instance. + // 3. Referred Page has enough space to remove `remove` and to insert `insert` items. + // 4. `spans`, `strings` and `tokens` produce the same number of items equal to `insert`. + // 5. All `spans` values are positive integers. + pub(crate) unsafe fn write( + &mut self, + references: &mut References, + mut chunk_ref: ChildRefIndex, + remove: TokenCount, + insert: TokenCount, + mut spans: impl Iterator, + mut strings: impl Iterator, + mut tokens: impl Iterator, + ) -> (ChildRefIndex, Length) { + debug_assert!(self.height > 0, "Internal error. Empty tree."); + + debug_assert!( + !chunk_ref.is_dangling(), + "Internal error. An attempt to access dangling ChildRefIndex.", + ); + + let page_ref = unsafe { chunk_ref.item.as_page_mut() }; + let occupied = unsafe { page_ref.as_ref().occupied }; + + if self.height == 1 && insert == 0 && remove == occupied { + let mut tree = replace(self, Self::default()); + + let removed_count = unsafe { tree.free(references) }; + + debug_assert_eq!( + remove, removed_count, + "Internal error. Token count inconsistency.", + ); + + return (ChildRefIndex::dangling(), 0); + } + + let rewrite = remove.min(insert); + + let (mut span_dec, mut span_inc) = match rewrite > 0 { + true => unsafe { + page_ref.rewrite( + references, + chunk_ref.index, + rewrite, + &mut spans, + &mut strings, + &mut tokens, + ) + }, + + false => (0, 0), + }; + + if remove > rewrite { + unsafe { + span_dec += page_ref.remove(references, chunk_ref.index + rewrite, remove - rewrite) + }; + } + + if insert > rewrite { + unsafe { + span_inc += page_ref.insert( + references, + chunk_ref.index + rewrite, + insert - rewrite, + &mut spans, + &mut strings, + &mut tokens, + ) + }; + } + + let mut parent = unsafe { &mut page_ref.as_mut().parent }; + + while !parent.is_dangling() { + let branch = unsafe { parent.item.as_branch_mut::<()>().as_mut() }; + + let span = unsafe { branch.inner.spans.get_unchecked_mut(parent.index) }; + + debug_assert!( + *span + span_inc > span_dec, + "Internal error. Span inconsistency.", + ); + + *span += span_inc; + *span -= span_dec; + + parent = &mut branch.inner.parent; + } + + debug_assert!( + self.length + span_inc > span_dec, + "Internal error. Length inconsistency.", + ); + + self.length += span_inc; + self.length -= span_dec; + + if insert == 0 && chunk_ref.index + remove == occupied { + chunk_ref = match chunk_ref.item.as_page_ref().as_ref().next { + Some(next) => ChildRefIndex { + item: next.into_variant(), + index: 0, + }, + + None => ChildRefIndex::dangling(), + }; + } + + (chunk_ref, span_inc) + } + + //Safety: + // 1. All references belong to `references` instance. + // 2. `chunk_ref` refers valid data inside this instance. + pub(crate) unsafe fn split( + &mut self, + references: &mut References, + mut chunk_ref: ChildRefIndex, + ) -> Self { + if chunk_ref.is_dangling() { + return Self::default(); + } + + debug_assert!( + self.height > 0, + "Internal error. An attempt to split empty Tree.", + ); + + if self.height == 1 { + return match chunk_ref.index == 0 { + true => replace(self, Self::default()), + + false => { + let split = unsafe { + chunk_ref.item.as_page_mut().split( + references, + Split::dangling(), + self.length, + chunk_ref.index, + ) + }; + + self.root = split.left_item; + self.length = split.left_span; + + let right_page = *unsafe { split.right_item.as_page_ref() }; + + Self { + length: split.right_span, + height: 1, + root: split.right_item, + pages: PageList { + first: right_page, + last: right_page, + }, + } + } + }; + } + + let mut container = unsafe { *chunk_ref.item.as_page_ref().parent() }; + + let mut split = { + let length = unsafe { container.branch_span() }; + + unsafe { + chunk_ref.item.as_page_mut().split( + references, + Split::dangling(), + length, + chunk_ref.index, + ) + } + }; + + match self.height > 2 { + true => { + let parent = unsafe { *container.item.as_branch_ref::().parent() }; + + split = { + let length = unsafe { parent.branch_span() }; + let container_ref = unsafe { container.item.as_branch_mut::() }; + + unsafe { container_ref.split(references, split, length, container.index) } + }; + + container = parent; + + let mut depth = 3; + + while depth < self.height { + let parent = unsafe { *container.item.as_branch_ref::().parent() }; + + split = { + let length = unsafe { parent.branch_span() }; + let container_ref = + unsafe { container.item.as_branch_mut::() }; + + unsafe { container_ref.split(references, split, length, container.index) } + }; + + container = parent; + + depth += 1; + } + + let container_ref = unsafe { container.item.as_branch_mut::() }; + + split = + unsafe { container_ref.split(references, split, self.length, container.index) } + } + + false => { + let container_ref = unsafe { container.item.as_branch_mut::() }; + + split = + unsafe { container_ref.split(references, split, self.length, container.index) } + } + }; + + if split.left_span == 0 { + return replace(self, Self::default()); + } + + let mut right = Self { + length: split.right_span, + height: self.height, + root: split.right_item, + pages: PageList { + first: PageRef::dangling(), + last: self.pages.last, + }, + }; + + while !unsafe { right.fix_leftmost_balance(references) } {} + + if unsafe { right.pages.first.as_ref().next.is_none() } { + right.pages.last = right.pages.first; + } + + right.shrink_top(); + + self.length = split.left_span; + self.root = split.left_item; + + while !unsafe { self.fix_rightmost_balance(references) } {} + + if unsafe { self.pages.last.as_ref().previous.is_none() } { + self.pages.first = self.pages.last; + } + + self.shrink_top(); + + right + } + + //Safety: + // 1. All references belong to `references` instance. + #[inline] + pub(crate) unsafe fn join(&mut self, references: &mut References, other: Self) { + if other.height == 0 { + return; + } + + if self.height == 0 { + *self = other; + return; + } + + if self.height == other.height { + unsafe { self.join_roots(other, references) }; + return; + } + + if self.height > other.height { + unsafe { self.join_to_left(other, references) }; + return; + } + + unsafe { self.join_to_right(other, references) }; + } + + //Safety: + // 1. All references belong to `references` instance. + pub(crate) unsafe fn free(&mut self, references: &mut References) -> TokenCount { + if self.height == 0 { + return 0; + } + + let root = &mut self.root; + + let token_count = match self.height { + 1 => unsafe { root.as_page_ref().into_owned().free(references) }, + + 2 => unsafe { + root.as_branch_ref::() + .into_owned() + .free(self.height, references) + }, + + _ => unsafe { + root.as_branch_ref::() + .into_owned() + .free(self.height, references) + }, + }; + + self.height = 0; + + token_count + } + + //Safety: + // 1. `self.height >= 2`. + // 2. All references belong to `references` instance. + unsafe fn fix_leftmost_balance(&mut self, references: &mut References) -> bool { + debug_assert!(self.height >= 2, "Internal error. Incorrect height."); + + let mut depth = 1; + let mut leftmost_variant = self.root; + let mut balanced = true; + + while depth < self.height - 2 { + depth += 1; + + let leftmost_ref = unsafe { leftmost_variant.as_branch_mut::() }; + let is_balanced; + + (is_balanced, leftmost_variant) = + unsafe { leftmost_ref.fix_leftmost_balance::(references) }; + + balanced = balanced && is_balanced; + } + + if depth < self.height - 1 { + depth += 1; + + let leftmost_ref = unsafe { leftmost_variant.as_branch_mut::() }; + let is_balanced; + + (is_balanced, leftmost_variant) = + unsafe { leftmost_ref.fix_leftmost_balance::(references) }; + + balanced = balanced && is_balanced; + } + + debug_assert_eq!(depth, self.height - 1, "Internal error. Depth mismatch."); + + self.pages.first = { + let leftmost_ref = unsafe { leftmost_variant.as_branch_mut::() }; + let is_balanced; + + (is_balanced, leftmost_variant) = + unsafe { leftmost_ref.fix_leftmost_balance::<()>(references) }; + + balanced = balanced && is_balanced; + + let mut first_page = unsafe { *leftmost_variant.as_page_ref() }; + + unsafe { first_page.disconnect_left() }; + + first_page + }; + + balanced + } + + //Safety: + // 1. `self.height >= 2`. + // 2. All references belong to `references` instance. + #[inline] + unsafe fn fix_rightmost_balance(&mut self, references: &mut References) -> bool { + debug_assert!(self.height >= 2, "Internal error. Incorrect height."); + + let mut depth = 1; + let mut rightmost_variant = self.root; + let mut balanced = true; + + while depth < self.height - 2 { + depth += 1; + + let rightmost_ref = unsafe { rightmost_variant.as_branch_mut::() }; + let is_balanced; + + (is_balanced, rightmost_variant) = + unsafe { rightmost_ref.fix_rightmost_balance::(references) }; + + balanced = balanced && is_balanced; + } + + if depth < self.height - 1 { + depth += 1; + + let rightmost_ref = unsafe { rightmost_variant.as_branch_mut::() }; + let is_balanced; + + (is_balanced, rightmost_variant) = + unsafe { rightmost_ref.fix_rightmost_balance::(references) }; + + balanced = balanced && is_balanced; + } + + debug_assert_eq!(depth, self.height - 1, "Internal error. Depth mismatch."); + + self.pages.last = { + let rightmost_ref = unsafe { rightmost_variant.as_branch_mut::() }; + let is_balanced; + + (is_balanced, rightmost_variant) = + unsafe { rightmost_ref.fix_rightmost_balance::<()>(references) }; + + balanced = balanced && is_balanced; + + let mut last_page = unsafe { *rightmost_variant.as_page_ref() }; + + unsafe { last_page.disconnect_right() }; + + last_page + }; + + balanced + } + + #[inline] + fn shrink_top(&mut self) { + while self.height > 1 { + let root_occupied = unsafe { self.root.as_branch_ref::<()>().as_ref().occupied() }; + + if root_occupied > 1 { + break; + } + + let child = unsafe { self.root.as_branch_ref::<()>().as_ref().inner.children[0] }; + + forget(unsafe { *self.root.as_branch_ref::<()>().into_owned() }); + + self.root = child; + + self.height -= 1; + } + + match self.height { + 0 => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Incorrect height."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + + 1 => unsafe { self.root.as_page_mut().as_mut().parent.make_dangle() }, + + _ => unsafe { + self.root + .as_branch_mut::<()>() + .as_mut() + .inner + .parent + .make_dangle() + }, + } + } + + //Safety: + // 1. `self` height is greater than `other` height. + // 2. `self.height` is positive value. + // 3. All references belong to `references` instance. + unsafe fn join_to_left(&mut self, mut other: Self, references: &mut References) { + let mut depth = self.height; + let mut left = self.root; + + while depth > other.height { + depth -= 1; + + let parent = unsafe { left.as_branch_ref::<()>().as_ref() }; + + left = *unsafe { + parent + .inner + .children + .get_unchecked(parent.inner.occupied - 1) + }; + } + + let right = &mut other.root; + + let new_root = match depth { + 0 => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Incorrect height."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + + 1 => { + let left_ref = unsafe { left.as_page_mut() }; + let right_ref = unsafe { right.as_page_mut() }; + + let (merged, new_root) = unsafe { + ItemRef::join_to_left( + left_ref, + right_ref, + self.length, + other.length, + references, + ) + }; + + if !merged { + unsafe { + PageRef::interconnect(&mut self.pages.last, &mut other.pages.first); + } + + self.pages.last = other.pages.last; + } + + new_root + } + + 2 => { + unsafe { + PageRef::interconnect(&mut self.pages.last, &mut other.pages.first); + } + + self.pages.last = other.pages.last; + + let left_ref = unsafe { left.as_branch_mut::() }; + let right_ref = unsafe { right.as_branch_mut::() }; + + unsafe { + ItemRef::join_to_left( + left_ref, + right_ref, + self.length, + other.length, + references, + ) + } + .1 + } + + _ => { + unsafe { + PageRef::interconnect(&mut self.pages.last, &mut other.pages.first); + } + + self.pages.last = other.pages.last; + + let left_ref = unsafe { left.as_branch_mut::() }; + let right_ref = unsafe { right.as_branch_mut::() }; + + unsafe { + ItemRef::join_to_left( + left_ref, + right_ref, + self.length, + other.length, + references, + ) + } + .1 + } + }; + + self.length += other.length; + + if let Some(new_root) = new_root { + self.height += 1; + self.root = new_root; + } + + other.height = 0; + } + + //Safety: + // 1. `self` height is greater than `other` height. + // 2. `self.height` is positive value. + // 3. All references belong to `references` instance. + unsafe fn join_to_right(&mut self, mut other: Self, references: &mut References) { + let mut depth = other.height; + let mut right = other.root; + + while depth > self.height { + depth -= 1; + + let parent = unsafe { right.as_branch_ref::<()>().as_ref() }; + + right = parent.inner.children[0]; + } + + let left = &mut self.root; + + let new_root = match depth { + 0 => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Incorrect height."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + + 1 => { + let left_ref = unsafe { left.as_page_mut() }; + let right_ref = unsafe { right.as_page_mut() }; + + let (merged, new_root) = unsafe { + ItemRef::join_to_right( + left_ref, + right_ref, + self.length, + other.length, + references, + ) + }; + + if !merged { + unsafe { + PageRef::interconnect(&mut self.pages.last, &mut other.pages.first); + } + + other.pages.first = self.pages.first; + } + + new_root + } + + 2 => { + unsafe { + PageRef::interconnect(&mut self.pages.last, &mut other.pages.first); + } + + other.pages.first = self.pages.first; + + let left_ref = unsafe { left.as_branch_mut::() }; + let right_ref = unsafe { right.as_branch_mut::() }; + + unsafe { + ItemRef::join_to_right( + left_ref, + right_ref, + self.length, + other.length, + references, + ) + } + .1 + } + + _ => { + unsafe { + PageRef::interconnect(&mut self.pages.last, &mut other.pages.first); + } + + other.pages.first = self.pages.first; + + let left_ref = unsafe { left.as_branch_mut::() }; + let right_ref = unsafe { right.as_branch_mut::() }; + + unsafe { + ItemRef::join_to_right( + left_ref, + right_ref, + self.length, + other.length, + references, + ) + } + .1 + } + }; + + other.length += self.length; + + if let Some(new_root) = new_root { + other.height += 1; + other.root = new_root; + } + + self.height = 0; + + let _ = replace(self, other); + } + + //Safety: + // 1. `self` height equals to `right` height. + // 2. Height is positive value. + // 3. All references belong to `references` instance. + unsafe fn join_roots(&mut self, mut other: Self, references: &mut References) { + let left = &mut self.root; + let right = &mut other.root; + + other.height = 0; + + let new_root = match self.height { + 0 => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Incorrect height."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + + 1 => { + let left_ref = unsafe { left.as_page_mut() }; + let right_ref = unsafe { right.as_page_mut() }; + + let new_root = unsafe { + ItemRef::join_roots(left_ref, right_ref, self.length, other.length, references) + }; + + if new_root.is_some() { + unsafe { + PageRef::interconnect(&mut self.pages.last, &mut other.pages.first); + } + + self.pages.last = other.pages.last; + } + + new_root + } + + 2 => { + unsafe { + PageRef::interconnect(&mut self.pages.last, &mut other.pages.first); + } + + self.pages.last = other.pages.last; + + let left_ref = unsafe { left.as_branch_mut::() }; + let right_ref = unsafe { right.as_branch_mut::() }; + + unsafe { + ItemRef::join_roots(left_ref, right_ref, self.length, other.length, references) + } + } + + _ => { + unsafe { + PageRef::interconnect(&mut self.pages.last, &mut other.pages.first); + } + + self.pages.last = other.pages.last; + + let left_ref = unsafe { left.as_branch_mut::() }; + let right_ref = unsafe { right.as_branch_mut::() }; + + unsafe { + ItemRef::join_roots(left_ref, right_ref, self.length, other.length, references) + } + } + }; + + self.length += other.length; + + if let Some(new_root) = new_root { + self.height += 1; + self.root = new_root; + } + } +} diff --git a/work/crates/main/src/incremental/storage/utils.rs b/work/crates/main/src/incremental/storage/utils.rs new file mode 100644 index 0000000..1a6d187 --- /dev/null +++ b/work/crates/main/src/incremental/storage/utils.rs @@ -0,0 +1,181 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + incremental::storage::{ + child::{ChildCount, ChildIndex}, + item::Item, + }, + std::*, +}; + +#[derive(Debug)] +pub(super) struct Spread { + pub(super) head: ChildCount, + pub(super) tail: ChildCount, + pub(super) items: ChildCount, + next: ChildIndex, +} + +impl Spread { + #[inline(always)] + pub(super) const fn new(total: ChildCount) -> Spread { + if total <= capacity(I::BRANCHING) { + return Spread { + head: 1, + tail: 0, + items: total, + next: 0, + }; + } + + let branch_count = total / I::BRANCHING; + let reminder = total - branch_count * I::BRANCHING; + let reminder_spread = reminder / branch_count; + let items = I::BRANCHING + reminder_spread; + let tail = reminder - reminder_spread * branch_count; + + Spread { + head: branch_count - tail, + tail, + items, + next: 0, + } + } + + #[inline(always)] + pub(super) const fn layer_size(&self) -> ChildCount { + self.head + self.tail + } + + #[inline(always)] + pub(super) const fn total_items(&self) -> ChildCount { + self.head * self.items + self.tail * (self.items + 1) + } + + #[inline(always)] + pub(super) fn advance(&mut self) -> ChildIndex { + if self.next < self.items { + self.next += 1; + + return self.next - 1; + } + + self.next = 1; + + if self.head > 0 { + self.head -= 1; + + if self.head == 0 { + if self.tail == 0 { + return ChildIndex::MAX; + } + + self.items += 1; + } + } else { + self.tail -= 1; + + if self.tail == 0 { + return ChildIndex::MAX; + } + } + + 0 + } +} + +#[inline(always)] +pub(super) const fn capacity(branching: ChildCount) -> ChildCount { + branching * 2 - 1 +} + +//Safety: +// 1. `from` and `to` are two distinct arrays. +// 2. The `from` data within `source..(source + count)` range is within N bounds. +// 3. The `to` data within `destination..(destination + count)` range is is within N bounds. +#[inline(always)] +pub(super) unsafe fn array_copy_to( + from: &mut [T; N], + to: &mut [T; N], + source: ChildCount, + destination: ChildCount, + count: ChildCount, +) { + debug_assert_ne!( + from.as_mut_ptr(), + to.as_mut_ptr(), + "Internal error. Array copy overlapping." + ); + debug_assert!( + source + count <= N, + "Internal error. Source range exceeds capacity." + ); + debug_assert!( + destination + count <= N, + "Internal error. Source range exceeds capacity." + ); + + let from = unsafe { from.as_mut_ptr().offset(source as isize) }; + let to = unsafe { to.as_mut_ptr().offset(destination as isize) }; + + unsafe { copy_nonoverlapping(from, to, count) }; +} + +//Safety: +// 1. `from + count <= N`. +// 1. `from + to <= N`. +// 2. `count > 0`. +#[inline(always)] +pub(super) unsafe fn array_shift( + array: &mut [T; N], + from: ChildCount, + to: ChildCount, + count: ChildCount, +) { + debug_assert!(from + count <= N, "Internal error. Shift with overflow."); + debug_assert!(to + count <= N, "Internal error. Shift with overflow."); + debug_assert!(count > 0, "Internal error. Empty shift range."); + + let source = unsafe { array.as_mut_ptr().offset(from as isize) }; + let destination = unsafe { array.as_mut_ptr().offset(to as isize) }; + + match from + count <= to || to + count <= from { + false => unsafe { copy(source, destination, count) }, + true => unsafe { copy_nonoverlapping(source, destination, count) }, + } +} diff --git a/work/crates/main/src/incremental/syntax.rs b/work/crates/main/src/incremental/syntax.rs new file mode 100644 index 0000000..6445b38 --- /dev/null +++ b/work/crates/main/src/incremental/syntax.rs @@ -0,0 +1,520 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, Ref, Repository}, + incremental::storage::{ChildRefIndex, ClusterCache, References, Tree}, + lexis::{Length, Site, SiteRef, TokenCount, TokenCursor, TokenRef}, + std::*, + syntax::{Cluster, ErrorRef, NoSyntax, Node, NodeRef, SyntaxRule, SyntaxSession, ROOT_RULE}, +}; + +pub struct IncrementalSyntaxSession<'document, N: Node> { + id: &'document Id, + tree: &'document mut Tree, + references: &'document mut References, + pending: Pending, + next_chunk_ref: ChildRefIndex, + next_site: Site, + peek_chunk_ref: ChildRefIndex, + peek_distance: TokenCount, + peek_site: Site, +} + +impl<'document, N: Node> Identifiable for IncrementalSyntaxSession<'document, N> { + #[inline(always)] + fn id(&self) -> &Id { + self.id + } +} + +impl<'document, N: Node> TokenCursor<'document> for IncrementalSyntaxSession<'document, N> { + type Token = N::Token; + + #[inline(always)] + fn advance(&mut self) -> bool { + if self.next_chunk_ref.is_dangling() { + return false; + } + + self.next_site += unsafe { self.next_chunk_ref.span() }; + + unsafe { self.next_chunk_ref.next() }; + + match self.peek_distance == 0 { + true => { + self.peek_chunk_ref = self.next_chunk_ref; + self.peek_site = self.next_site; + } + + false => { + self.peek_distance -= 1; + } + } + + self.pending.leftmost = false; + + true + } + + #[inline(always)] + fn token(&mut self, distance: TokenCount) -> Option<&'document Self::Token> { + if unsafe { self.next_chunk_ref.is_dangling() } { + return None; + } + + if unsafe { self.jump(distance) } { + self.pending.lookahead_end_site = self.tree.length(); + return None; + } + + self.pending.lookahead_end_site = self + .pending + .lookahead_end_site + .max(self.peek_site + unsafe { *self.peek_chunk_ref.span() }); + + Some(unsafe { self.peek_chunk_ref.token() }) + } + + #[inline(always)] + fn site(&mut self, distance: TokenCount) -> Option { + if self.next_chunk_ref.is_dangling() { + return None; + } + + if unsafe { self.jump(distance) } { + self.pending.lookahead_end_site = self.tree.length(); + return None; + } + + self.pending.lookahead_end_site = self + .pending + .lookahead_end_site + .max(self.peek_site + unsafe { *self.peek_chunk_ref.span() }); + + Some(unsafe { self.tree.site_of(&self.peek_chunk_ref) }) + } + + #[inline(always)] + fn length(&mut self, distance: TokenCount) -> Option { + if self.next_chunk_ref.is_dangling() { + return None; + } + + if unsafe { self.jump(distance) } { + self.pending.lookahead_end_site = self.tree.length(); + return None; + } + + let span = unsafe { *self.peek_chunk_ref.span() }; + + self.pending.lookahead_end_site = + self.pending.lookahead_end_site.max(self.peek_site + span); + + Some(span) + } + + #[inline(always)] + fn string(&mut self, distance: TokenCount) -> Option<&'document str> { + if self.next_chunk_ref.is_dangling() { + return None; + } + + if unsafe { self.jump(distance) } { + self.pending.lookahead_end_site = self.tree.length(); + return None; + } + + self.pending.lookahead_end_site = self + .pending + .lookahead_end_site + .max(self.peek_site + unsafe { *self.peek_chunk_ref.span() }); + + Some(unsafe { self.peek_chunk_ref.string() }) + } + + #[inline(always)] + fn token_ref(&mut self, distance: TokenCount) -> TokenRef { + if self.next_chunk_ref.is_dangling() { + return TokenRef::nil(); + } + + if unsafe { self.jump(distance) } { + self.pending.lookahead_end_site = self.tree.length(); + return TokenRef::nil(); + } + + self.pending.lookahead_end_site = self + .pending + .lookahead_end_site + .max(self.peek_site + unsafe { *self.peek_chunk_ref.span() }); + + let ref_index = unsafe { self.peek_chunk_ref.chunk_ref_index() }; + + let chunk_ref = unsafe { self.references.chunks().make_ref(ref_index) }; + + TokenRef { + id: *self.id, + chunk_ref, + } + } + + #[inline(always)] + fn site_ref(&mut self, distance: TokenCount) -> SiteRef { + if self.next_chunk_ref.is_dangling() { + return self.end_site_ref(); + } + + if unsafe { self.jump(distance) } { + self.pending.lookahead_end_site = self.tree.length(); + return self.end_site_ref(); + } + + self.pending.lookahead_end_site = self + .pending + .lookahead_end_site + .max(self.peek_site + unsafe { *self.peek_chunk_ref.span() }); + + let ref_index = unsafe { self.peek_chunk_ref.chunk_ref_index() }; + + let chunk_ref = unsafe { self.references.chunks().make_ref(ref_index) }; + + TokenRef { + id: *self.id, + chunk_ref, + } + .site_ref() + } + + #[inline(always)] + fn end_site_ref(&mut self) -> SiteRef { + SiteRef::new_code_end(*self.id) + } +} + +impl<'document, N: Node> SyntaxSession<'document> for IncrementalSyntaxSession<'document, N> { + type Node = N; + + fn descend(&mut self, rule: SyntaxRule) -> NodeRef { + if self.pending.leftmost { + let node = N::new(rule, self); + + let node_ref = self.pending.nodes.insert(node); + + return NodeRef { + id: *self.id, + cluster_ref: self.pending.cluster_ref, + node_ref, + }; + } + + if self.next_chunk_ref.is_dangling() { + return NodeRef::nil(); + } + + if let Some(cache) = unsafe { self.next_chunk_ref.cache() } { + if cache.successful && cache.rule == rule { + let cluster_ref_index = unsafe { self.next_chunk_ref.cache_index() }; + + let result = NodeRef { + id: *self.id, + cluster_ref: unsafe { self.references.clusters().make_ref(cluster_ref_index) }, + node_ref: Ref::Primary, + }; + + let (end_site, end_chunk_ref) = + unsafe { cache.jump_to_end(self.tree, self.references) }; + + self.pending.lookahead_end_site = self + .pending + .lookahead_end_site + .max(end_site + cache.lookahead); + self.pending.leftmost = false; + + self.next_chunk_ref = end_chunk_ref; + self.next_site = end_site; + self.peek_chunk_ref = end_chunk_ref; + self.peek_distance = 0; + self.peek_site = end_site; + + return result; + } + }; + + let child_chunk_ref = self.next_chunk_ref; + + let cluster_ref_index; + let cluster_ref; + + { + let clusters = self.references.clusters_mut(); + + cluster_ref_index = clusters.insert_index(child_chunk_ref); + cluster_ref = unsafe { clusters.make_ref(cluster_ref_index) }; + }; + + let parent = replace( + &mut self.pending, + Pending { + lookahead_end_site: self.next_site, + leftmost: true, + cluster_ref, + nodes: Repository::default(), + errors: Repository::default(), + successful: true, + }, + ); + + let primary = N::new(rule, self); + + let child = replace(&mut self.pending, parent); + + self.pending.lookahead_end_site = self + .pending + .lookahead_end_site + .max(child.lookahead_end_site); + + let lookahead = child.lookahead_end_site - self.next_site; + + let parsed_end = self.parsed_end(); + + let previous_ref_index = unsafe { + child_chunk_ref.set_cache( + cluster_ref_index, + ClusterCache { + cluster: Cluster { + primary, + nodes: child.nodes, + errors: child.errors, + }, + rule, + parsed_end, + lookahead, + successful: child.successful, + }, + ) + }; + + if let Some(previous_ref_index) = previous_ref_index { + unsafe { + self.references + .clusters_mut() + .remove_unchecked(previous_ref_index) + }; + } + + NodeRef { + id: *self.id, + cluster_ref, + node_ref: Ref::Primary, + } + } + + #[inline(always)] + fn error(&mut self, error: ::Error) -> ErrorRef { + self.pending.successful = false; + + ErrorRef { + id: *self.id, + cluster_ref: self.pending.cluster_ref, + error_ref: self.pending.errors.insert(error), + } + } +} + +impl<'document, N: Node> IncrementalSyntaxSession<'document, N> { + // Safety: + // 1. `head` belongs to the `tree` instance. + // 2. All references of the `tree` belong to `references` instance. + pub(super) unsafe fn run( + id: &'document Id, + tree: &'document mut Tree, + references: &'document mut References, + rule: SyntaxRule, + start: Site, + head: ChildRefIndex, + cluster_ref: Ref, + ) -> (ClusterCache, Site, Length) { + if TypeId::of::() == TypeId::of::::Token>>() { + debug_assert_eq!( + rule, ROOT_RULE, + "Internal error. An attempt to reparse void syntax.", + ); + + return ( + ClusterCache { + cluster: Cluster { + primary: unsafe { MaybeUninit::zeroed().assume_init() }, + nodes: Default::default(), + errors: Default::default(), + }, + rule, + parsed_end: SiteRef::nil(), + lookahead: 0, + successful: true, + }, + 0, + 0, + ); + } + + let pending = Pending { + lookahead_end_site: start, + leftmost: true, + cluster_ref, + nodes: Repository::default(), + errors: Repository::default(), + successful: true, + }; + + let mut session = Self { + id, + tree, + references, + pending, + next_chunk_ref: head, + next_site: start, + peek_chunk_ref: head, + peek_distance: 0, + peek_site: start, + }; + + let primary = N::new(rule, &mut session); + let parsed_end_site = session.next_site; + let parsed_end = session.parsed_end(); + let lookahead = session.pending.lookahead_end_site - session.next_site; + let successful = session.pending.successful; + let nodes = session.pending.nodes; + let errors = session.pending.errors; + + let cluster = Cluster { + primary, + nodes, + errors, + }; + + let cluster_cache = ClusterCache { + cluster, + rule, + parsed_end, + lookahead, + successful, + }; + + (cluster_cache, parsed_end_site, lookahead) + } + + #[inline(always)] + fn parsed_end(&self) -> SiteRef { + match self.next_chunk_ref.is_dangling() { + false => { + let chunk_ref_index = unsafe { self.next_chunk_ref.chunk_ref_index() }; + let chunk_ref = unsafe { self.references.chunks().make_ref(chunk_ref_index) }; + + TokenRef { + id: *self.id, + chunk_ref, + } + .site_ref() + } + + true => SiteRef::new_code_end(*self.id), + } + } + + // Returns `true` if jump has failed. + // Safety: `self.next_chunk_ref` is not dangling. + #[inline] + unsafe fn jump(&mut self, target: TokenCount) -> bool { + while self.peek_distance < target { + self.peek_distance += 1; + self.peek_site += unsafe { *self.peek_chunk_ref.span() }; + + unsafe { self.peek_chunk_ref.next() }; + + if unsafe { self.peek_chunk_ref.is_dangling() } { + self.peek_distance = 0; + self.peek_site = self.next_site; + self.peek_chunk_ref = self.next_chunk_ref; + return true; + } + } + + if self.peek_distance > target * 2 { + self.peek_distance = 0; + self.peek_site = self.next_site; + self.peek_chunk_ref = self.next_chunk_ref; + + while self.peek_distance < target { + self.peek_distance += 1; + self.peek_site += unsafe { *self.peek_chunk_ref.span() }; + + unsafe { self.peek_chunk_ref.next() }; + + debug_assert!( + !self.peek_chunk_ref.is_dangling(), + "Internal error. Dangling peek ref.", + ); + } + + return false; + } + + while self.peek_distance > target { + unsafe { self.peek_chunk_ref.back() } + + debug_assert!( + !self.peek_chunk_ref.is_dangling(), + "Internal error. Dangling peek ref.", + ); + + self.peek_distance -= 1; + self.peek_site -= unsafe { *self.peek_chunk_ref.span() }; + } + + false + } +} + +struct Pending { + lookahead_end_site: Site, + leftmost: bool, + cluster_ref: Ref, + nodes: Repository, + errors: Repository, + successful: bool, +} diff --git a/work/crates/main/src/lexis/buffer.rs b/work/crates/main/src/lexis/buffer.rs new file mode 100644 index 0000000..b3bcf2b --- /dev/null +++ b/work/crates/main/src/lexis/buffer.rs @@ -0,0 +1,464 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, Ref, Sequence}, + lexis::{ + cursor::TokenBufferCursor, + session::{Cursor, SequentialLexisSession}, + Chunk, + ChunkRef, + Length, + Site, + SourceCode, + ToSpan, + Token, + TokenCount, + TokenRef, + }, + std::*, + syntax::Node, + Document, +}; + +/// A growable buffer of the source code lexical data. +/// +/// This buffer is a default implementation of the [SourceCode](crate::lexis::SourceCode) trait that +/// holds the source code of a compilation unit, and the lexical structure of the code(tokens and +/// token metadata). +/// +/// In contrast to [Document](crate::Document), TokenBuffer provides an +/// [append](TokenBuffer::append) function to write strings to the end of underlying source code +/// only, but this operation works faster than the random-access +/// [`Document::write`](crate::Document::write) function. +/// +/// An API user is encouraged to use TokenBuffer together with Rust's [BufRead](::std::io::BufRead) +/// and similar objects to preload source code files by sequentially reading strings from the source +/// and feeding them to the TokenBuffer using Append function. +/// +/// Later on a TokenBuffer can either be turned into a Document instance using +/// [into_document](TokenBuffer::into_document) function, or to be used by itself as +/// a non-incremental storage of the lexical data of compilation unit. +/// +/// For non-incremental usage an API user can also obtain a non-incremental syntax structure of the +/// Unit using [SyntaxBuffer](crate::syntax::SyntaxBuffer). +/// +/// ```rust +/// use lady_deirdre::{ +/// Document, +/// lexis::{TokenBuffer, SimpleToken, SourceCode, ChunkRef, Chunk}, +/// syntax::{SyntaxBuffer, SimpleNode, Node}, +/// }; +/// +/// // Alternatively, you can use +/// // - `TokenBuffer::from("head string")` providing an initial String to parse; +/// // - or a shortcut function `SimpleToken::parse("head string")`; +/// // - or a `TokenBuffer::with_capacity(10)` function to specify buffer's token capacity. +/// let mut token_buf = TokenBuffer::::default(); +/// +/// token_buf.append("First line\n"); +/// token_buf.append("Second line\n"); +/// +/// // Turning the TokenBuffer to incremental Document. +/// let _doc = token_buf.into_document::(); +/// +/// let mut token_buf = TokenBuffer::::default(); +/// +/// token_buf.append("First line\n"); +/// token_buf.append("Second line\n"); +/// +/// // Obtaining a non-incremental syntax structure of the entire compilation unit. +/// let _syntax_tree: SyntaxBuffer = SimpleNode::parse(token_buf.cursor(..)); +/// +/// // TokenBuffer is traversable structure of Chunk references. +/// let token_strings = (&token_buf) +/// .into_iter() +/// .map(|chunk_ref: ChunkRef| chunk_ref.string) +/// .collect::>(); +/// +/// assert_eq!(token_strings, ["First", " ", "line", "\n", "Second", " ", "line", "\n"]); +/// +/// // An API user can turn TokenBuffer into owned iterator of Chunks. +/// let chunks = token_buf.into_iter().collect::>>(); +/// +/// assert_eq!(chunks[4].string.as_str(), "Second"); +/// ``` +pub struct TokenBuffer { + id: Id, + length: Length, + pub(crate) tokens: Sequence, + pub(super) sites: Sequence, + pub(crate) spans: Sequence, + pub(crate) strings: Sequence, + pub(super) tail: String, +} + +impl Debug for TokenBuffer { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + formatter + .debug_struct("TokenBuffer") + .field("id", &self.id) + .field("length", &self.length) + .finish_non_exhaustive() + } +} + +impl Index for TokenBuffer { + type Output = T; + + #[inline(always)] + fn index(&self, index: TokenRef) -> &Self::Output { + &self + .get_token(&index.chunk_ref) + .expect("TokenRef is not a valid index into specified TokenBuffer.") + } +} + +impl IndexMut for TokenBuffer { + #[inline(always)] + fn index_mut(&mut self, index: TokenRef) -> &mut Self::Output { + self.get_token_mut(&index.chunk_ref) + .expect("TokenRef is not a valid index into specified TokenBuffer.") + } +} + +impl<'r, T: Token> Index<&'r TokenRef> for TokenBuffer { + type Output = T; + + #[inline(always)] + fn index(&self, index: &'r TokenRef) -> &Self::Output { + &self + .get_token(&index.chunk_ref) + .expect("TokenRef is not a valid index into specified TokenBuffer.") + } +} + +impl<'r, T: Token> IndexMut<&'r TokenRef> for TokenBuffer { + #[inline(always)] + fn index_mut(&mut self, index: &'r TokenRef) -> &mut Self::Output { + self.get_token_mut(&index.chunk_ref) + .expect("TokenRef is not a valid index into specified TokenBuffer.") + } +} + +impl> From for TokenBuffer { + #[inline(always)] + fn from(string: S) -> Self { + T::parse(string) + } +} + +impl Identifiable for TokenBuffer { + #[inline(always)] + fn id(&self) -> &Id { + &self.id + } +} + +impl SourceCode for TokenBuffer { + type Token = T; + + type Cursor<'code> = TokenBufferCursor<'code, Self::Token>; + + #[inline(always)] + fn contains(&self, chunk_ref: &Ref) -> bool { + self.tokens.contains(chunk_ref) + } + + #[inline(always)] + fn get_token(&self, chunk_ref: &Ref) -> Option<&Self::Token> { + self.tokens.get(chunk_ref) + } + + #[inline(always)] + fn get_token_mut(&mut self, chunk_ref: &Ref) -> Option<&mut Self::Token> { + self.tokens.get_mut(chunk_ref) + } + + #[inline(always)] + fn get_site(&self, chunk_ref: &Ref) -> Option { + self.sites.get(chunk_ref).copied() + } + + #[inline(always)] + fn get_string(&self, chunk_ref: &Ref) -> Option<&str> { + self.strings.get(chunk_ref).map(|string| string.as_str()) + } + + #[inline(always)] + fn get_length(&self, chunk_ref: &Ref) -> Option { + self.spans.get(chunk_ref).copied() + } + + #[inline(always)] + fn cursor(&self, span: impl ToSpan) -> TokenBufferCursor<'_, Self::Token> { + let span = match span.to_span(self) { + None => panic!("Specified span is invalid."), + Some(span) => span, + }; + + Self::Cursor::new(self, span) + } + + #[inline(always)] + fn length(&self) -> Length { + self.length + } + + #[inline(always)] + fn token_count(&self) -> TokenCount { + let inner = unsafe { self.tokens.inner() }; + + inner.len() + } +} + +impl Default for TokenBuffer { + #[inline] + fn default() -> Self { + Self { + id: Id::new(), + length: 0, + tokens: Default::default(), + sites: Default::default(), + spans: Default::default(), + strings: Default::default(), + tail: String::new(), + } + } +} + +impl IntoIterator for TokenBuffer { + type Item = ::Item; + type IntoIter = TokenBufferIntoIter; + + #[inline(always)] + fn into_iter(self) -> Self::IntoIter { + Self::IntoIter { + site: 0, + tokens: self.tokens.into_vec().into_iter(), + spans: self.spans.into_vec().into_iter(), + strings: self.strings.into_vec().into_iter(), + } + } +} + +impl<'buffer, T: Token> IntoIterator for &'buffer TokenBuffer { + type Item = ::Item; + type IntoIter = TokenBufferIter<'buffer, T>; + + #[inline(always)] + fn into_iter(self) -> Self::IntoIter { + Self::IntoIter { + site: 0, + tokens: self.tokens.inner().iter(), + spans: self.spans.inner().iter(), + strings: self.strings.inner().iter(), + } + } +} + +impl TokenBuffer { + /// Creates a new TokenBuffer instance with pre-allocated memory for at least `capacity` token + /// chunks to be stored in. + #[inline(always)] + pub fn with_capacity(capacity: TokenCount) -> Self { + Self { + id: Id::new(), + length: 0, + tokens: Sequence::with_capacity(capacity), + sites: Sequence::with_capacity(capacity), + spans: Sequence::with_capacity(capacity), + strings: Sequence::with_capacity(capacity), + tail: String::new(), + } + } + + /// Writes `text` to the end of the buffer's source code, lexically parses source code tail + /// in accordance to these changes. + /// + /// Performance of this operation is relative to the `text` size. + /// + /// An intended use of this function is to feed strings(e.g. lines) of the source code file + /// from the Rust's [BufRead](::std::io::BufRead). + pub fn append(&mut self, text: impl AsRef) { + let text = text.as_ref(); + + if text.is_empty() { + return; + } + + let site = match self.strings.pop() { + None => 0, + + Some(last) => { + self.tail = last; + + let _ = self.spans.pop(); + let _ = self.tokens.pop(); + + match self.sites.pop() { + Some(site) => site, + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. TokenBuffer inconsistency."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } + } + }; + + self.tail.push_str(text); + + SequentialLexisSession::run(self, site); + } + + /// Reserves capacity to store at least `additional` token chunks to be inserted on top of this + /// buffer. + #[inline(always)] + pub fn reserve(&mut self, additional: TokenCount) { + self.tokens.reserve(additional); + self.sites.reserve(additional); + self.spans.reserve(additional); + self.strings.reserve(additional); + } + + /// Turns this buffer into incremental [Document](crate::Document) instance. + /// + /// Generic parameter `N` of type [Node](crate::syntax::Node) specifies source code syntax + /// grammar. Node's [Token](crate::syntax::Node::Token) associative type must be compatible with + /// the TokenBuffer Token type. In other words, Document's syntax structure must be compatible + /// with the TokenBuffer's lexical structure. + /// + /// ```rust + /// use lady_deirdre::{Document, lexis::{TokenBuffer, SimpleToken}, syntax::SimpleNode}; + /// + /// let buf = TokenBuffer::::from("foo [bar]"); + /// + /// // SimpleNode syntax uses SimpleToken's lexis. + /// let _doc = buf.into_document::(); + /// ``` + #[inline(always)] + pub fn into_document(self) -> Document + where + N: Node, + { + Document::from_buffer(self) + } + + #[inline] + pub(super) fn push(&mut self, token: T, from: &Cursor, to: &Cursor) { + let length = to.site - from.site; + + self.length += length; + + let string = unsafe { self.tail.get_unchecked(from.byte_index..to.byte_index) }.to_string(); + + let _ = self.tokens.push(token); + let _ = self.sites.push(from.site); + let _ = self.spans.push(length); + let _ = self.strings.push(string); + } +} + +pub struct TokenBufferIntoIter { + site: Site, + tokens: IntoIter, + spans: IntoIter, + strings: IntoIter, +} + +impl Iterator for TokenBufferIntoIter { + type Item = Chunk; + + #[inline(always)] + fn next(&mut self) -> Option { + let token = self.tokens.next()?; + let site = self.site; + let length = self.spans.next()?; + let string = self.strings.next()?; + + self.site += length; + + Some(Self::Item { + token, + site, + length, + string, + }) + } +} + +impl FusedIterator for TokenBufferIntoIter {} + +pub struct TokenBufferIter<'buffer, T: Token> { + site: Site, + tokens: Iter<'buffer, T>, + spans: Iter<'buffer, Length>, + strings: Iter<'buffer, String>, +} + +impl<'sequence, T: Token> Iterator for TokenBufferIter<'sequence, T> { + type Item = ChunkRef<'sequence, T>; + + #[inline(always)] + fn next(&mut self) -> Option { + let token = self.tokens.next()?; + let site = self.site; + let length = *self.spans.next()?; + let string = self.strings.next()?; + + self.site += length; + + Some(Self::Item { + token, + site, + length, + string, + }) + } +} + +impl<'sequence, T: Token> FusedIterator for TokenBufferIter<'sequence, T> {} diff --git a/work/crates/main/src/lexis/chunks.rs b/work/crates/main/src/lexis/chunks.rs new file mode 100644 index 0000000..d830515 --- /dev/null +++ b/work/crates/main/src/lexis/chunks.rs @@ -0,0 +1,103 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + lexis::{Length, Site, Token}, + std::*, +}; + +/// A Token metadata ownership object. +/// +/// This object holds the Token instance itself, and the metadata of the source code substring +/// this token belongs to. +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct Chunk { + /// Token instance. + /// + /// This instance is supposed to describe lexical kind of the "token", and possible additional + /// generic semantic metadata inside this instance. + pub token: T, + + /// Token's substring absolute UTF-8 character offset inside the source code text. + pub site: Site, + + /// Token's substring UTF-8 characters count. + pub length: Length, + + /// Token's original substring inside the source code text. + pub string: String, +} + +/// A Token metadata borrow object. +/// +/// This object borrows reference into the Token instance, and the metadata of the source code +/// substring this token belongs to. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub struct ChunkRef<'source, T: Token> { + /// Token instance reference. + /// + /// This instance is supposed to describe lexical kind of the "token", and possible additional + /// generic semantic metadata inside this instance. + pub token: &'source T, + + /// Token's substring absolute UTF-8 character offset inside the source code text. + pub site: Site, + + /// Token's substring UTF-8 characters count. + pub length: Length, + + /// Token's original substring reference inside the source code text. + pub string: &'source str, +} + +impl<'source, T: Token> ChunkRef<'source, T> { + /// Turns reference object into owned [Chunk] instance. + /// + /// This operation clones both the Token instance and the Token's substring. + #[inline(always)] + pub fn to_owned(&self) -> Chunk + where + T: Clone, + { + Chunk { + token: self.token.clone(), + site: self.site, + length: self.length, + string: self.string.to_string(), + } + } +} diff --git a/work/crates/main/src/lexis/code.rs b/work/crates/main/src/lexis/code.rs new file mode 100644 index 0000000..4986541 --- /dev/null +++ b/work/crates/main/src/lexis/code.rs @@ -0,0 +1,239 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Identifiable, Ref}, + lexis::{Length, Site, SiteRef, ToSpan, Token, TokenCount, TokenCursor}, + std::*, + syntax::{transduce, Node, Transducer}, +}; + +/// A low-level interface to access and inspect lexical data of the compilation unit. +/// +/// SourceCode by convenient should be implemented for the compilation unit management object such +/// as [Document](crate::Document) and [TokenBuffer](crate::lexis::TokenBuffer) objects that +/// supposed to manage code's lexical grammar structure. +/// +/// This trait: +/// 1. Specifies lexical grammar through the [Token](crate::lexis::SourceCode::Token) associative +/// type. +/// 2. Provides general source code meta information such as text's +/// [character count](crate::lexis::SourceCode::length), +/// [token count](crate::lexis::SourceCode::token_count), etc. +/// 3. Provides low-level interface to resolve higher-level weak references(such as +/// [TokenRef](crate::lexis::TokenRef) or [SiteRef](crate::lexis::SiteRef)). +/// 4. Provides low-level access to the the source code Tokens through the low-level +/// iterator-alike [TokenCursor](crate::lexis::TokenCursor) interface. +/// 5. Provides an entry point to the [Transducers](crate::syntax::Transducer) interface. +/// +/// In practice an API user interacts with a small subset of this functionality directly. +/// +/// To traverse token chunks or to access substrings of arbitrary spans the user can utilize a +/// higher-level [CodeContent](crate::lexis::CodeContent) auto-implemented extension over the +/// SourceCode. +/// +/// To implement an extension library to this Crate with the source code storages of alternative +/// designs, you can implement this trait over these objects. In this case these new objects will be +/// able to interact with existing [Token](crate::lexis::Token) implementations, and the weak +/// references belong to them will work transparently with other conventional weak references. +pub trait SourceCode: Identifiable { + /// Specifies programming language lexical grammar. + /// + /// See [Token](crate::lexis::Token) for details. + type Token: Token; + + /// Specifies a low-level iterator-alike type that traverses through the source code tokens. + /// + /// See [TokenCursor](crate::lexis::TokenCursor) for details. + type Cursor<'code>: TokenCursor<'code, Token = Self::Token> + where + Self: 'code; + + /// Returns `true` if the token referred by specified low-level `chunk_ref` weak reference + /// exists in this source code instance. + /// + /// This is a low-level API used by the higher-level [TokenRef](crate::lexis::TokenRef) and + /// [SiteRef](crate::lexis::SiteRef) weak references under the hood. An API user normally don't + /// need to call this function directly. + fn contains(&self, chunk_ref: &Ref) -> bool; + + /// Immutably dereferences a [Token](crate::lexis::Token) instance by specified low-level + /// `chunk_ref` weak reference. + /// + /// Returns [None] if referred Token Chunk does not exist in this instance. + /// + /// This is a low-level API used by the higher-level [TokenRef](crate::lexis::TokenRef) + /// weak reference under the hood. An API user normally does not need to call this function + /// directly. + fn get_token(&self, chunk_ref: &Ref) -> Option<&Self::Token>; + + /// Mutably dereferences a [Token](crate::lexis::Token) instance by specified low-level + /// `chunk_ref` weak reference. + /// + /// Returns [None] if referred Token Chunk does not exist in this instance. + /// + /// Even though the SourceCode provides a way to mutate Token instances inside the source code + /// lexical structure, it is recommended to avoid replacing of these instances with the token + /// instances of different lexical kinds(with different enum variants in particular). + /// Such replacement is not an undefined behavior, but it could lead to logical errors in + /// further syntax parsing/re-parsing stages. + /// + /// This is a low-level API used by the higher-level [TokenRef](crate::lexis::TokenRef) + /// weak reference under the hood. An API user normally does not need to call this function + /// directly. + fn get_token_mut(&mut self, chunk_ref: &Ref) -> Option<&mut Self::Token>; + + /// Returns absolute character index of the [Token](crate::lexis::Token) substring inside this + /// source code text by specified low-level `chunk_ref` weak reference. + /// + /// Returns [None] if referred Token Chunk does not exist in this instance. + /// + /// This is a low-level API used by the higher-level [TokenRef](crate::lexis::TokenRef) and + /// [SiteRef](crate::lexis::SiteRef) weak reference under the hood. An API user normally does + /// not need to call this function directly. + fn get_site(&self, chunk_ref: &Ref) -> Option; + + /// Returns a substring of the [Token](crate::lexis::Token) inside this source code text by + /// specified low-level `chunk_ref` weak reference. + /// + /// Returns [None] if referred Token Chunk does not exist in this instance. + /// + /// This is a low-level API used by the higher-level [TokenRef](crate::lexis::TokenRef) + /// weak reference under the hood. An API user normally does not need to call this function + /// directly. + fn get_string(&self, chunk_ref: &Ref) -> Option<&str>; + + /// Returns character count of the [Token](crate::lexis::Token) substring inside this + /// source code text by specified low-level `chunk_ref` weak reference. + /// + /// Returns [None] if referred Token Chunk does not exist in this instance. + /// + /// This is a low-level API used by the higher-level [TokenRef](crate::lexis::TokenRef) + /// weak reference under the hood. An API user normally does not need to call this function + /// directly. + fn get_length(&self, chunk_ref: &Ref) -> Option; + + /// Returns a [TokenCursor](crate::lexis::TokenCursor) instance to traverse tokens and + /// their metadata that "touch" specified `span`. + /// + /// Span "touching" means such tokens that their substring characters lie inside, intersect + /// with, or adjacent to this [Span](crate::lexis::ToSpan). + /// + /// ```rust + /// use lady_deirdre::lexis::{TokenBuffer, SourceCode, SimpleToken, TokenCursor}; + /// + /// let buf = TokenBuffer::::from("foo bar baz"); + /// + /// // `..` span covers all tokens. + /// assert_eq!(collect(buf.cursor(..)), vec!["foo", " ", "bar", " ", "baz"]); + /// + /// // `0..0` span is adjacent to the first token only. + /// assert_eq!(collect(buf.cursor(0..0)), vec!["foo"]); + /// + /// // `3..5` span is adjacent to the first token, covers the second token, and intersects with + /// // the third token. + /// assert_eq!(collect(buf.cursor(3..5)), vec!["foo", " ", "bar"]); + /// + /// fn collect(mut cursor: as SourceCode>::Cursor<'_>) -> Vec + /// { + /// let mut result = Vec::new(); + /// + /// while let Some(string) = cursor.string(0) { + /// result.push(string.to_string()); + /// let _ = cursor.advance(); + /// } + /// + /// result + /// } + /// ``` + /// + /// This is a low-level API function. To iterate through the spanned chunks an API user + /// encouraged to use a higher-level [CodeContent::chunks](crate::lexis::CodeContent::chunks) + /// function instead that returns a more convenient iterator over the + /// [ChunkRef](crate::lexis::ChunkRef) objects. + /// + /// ```rust + /// use lady_deirdre::lexis::{TokenBuffer, CodeContent, SimpleToken, TokenCursor, ChunkRef}; + /// + /// let buf = TokenBuffer::::from("foo bar baz"); + /// + /// assert_eq!( + /// buf + /// .chunks(3..5) + /// .map(|chunk_ref: ChunkRef<'_, SimpleToken>| chunk_ref.string.to_string()) + /// .collect::>(), + /// vec!["foo", " ", "bar"], + /// ); + /// ``` + fn cursor(&self, span: impl ToSpan) -> Self::Cursor<'_>; + + /// Returns a [SiteRef](crate::lexis::SiteRef) instance that always valid and always resolves to + /// the source code [length](crate::lexis::SourceCode::length) value. + #[inline(always)] + fn end_site_ref(&self) -> SiteRef { + SiteRef::new_code_end(*self.id()) + } + + /// Returns a total number of UTF-8 characters inside the source code text. + fn length(&self) -> Length; + + /// Returns a total number of tokens inside the source code lexical structure. + fn token_count(&self) -> TokenCount; + + /// Returns `true` if the source code text is empty string. + /// + /// If the source code is empty, there are no tokens held by this instance. + #[inline(always)] + fn is_empty(&self) -> bool { + self.length() == 0 + } + + /// Runs provided `transducer` over this SourceCode content returning Transducer's result + /// value. + /// + /// The [Transducers Framework](crate::syntax::Transducer) in particular provides a way to + /// implement source code formatters. + #[inline(always)] + fn transduce(&self, transducer: Tr) -> R + where + Self: Sized, + N: Node, + Tr: Transducer, + { + transduce(self, transducer) + } +} diff --git a/work/crates/main/src/lexis/content.rs b/work/crates/main/src/lexis/content.rs new file mode 100644 index 0000000..cf6c680 --- /dev/null +++ b/work/crates/main/src/lexis/content.rs @@ -0,0 +1,278 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable}, + lexis::{ByteIndex, ChunkRef, Site, SiteSpan, SourceCode, ToSpan, TokenCursor}, + std::*, +}; + +/// A high-level extension interface to inspect lexical data of the source code. +/// +/// This trait is auto-implemented for any [SourceCode](crate::lexis::SourceCode) object. +/// An API user normally does not need to implement it manually. +/// +/// The interface provides three high-level helper functions to access the source code lecical +/// structure: +/// - A [chunks](CodeContent::chunks) function that returns an iterator over the +/// [ChunkRef](crate::lexis::ChunkRef) token metadata objects "touched" by specified +/// [Span](crate::lexis::ToSpan). +/// - A [chars](CodeContent::chars) function that returns an iterator over Unicode characters +/// of the source code text in specified Span. +/// - A [substring](CodeContent::substring) that returns a clone of the source code substring +/// in specified Span. +pub trait CodeContent: SourceCode { + /// An iterator over the [ChunkRef](crate::lexis::ChunkRef) token metadata objects "touched" + /// by specified [Span](crate::lexis::ToSpan), + type ChunkIterator<'code>: Iterator::Token>> + + FusedIterator + + Identifiable + + 'code + where + Self: 'code; + + /// An iterator over the Unicode characters of the source code text in specified + /// [Span](crate::lexis::ToSpan). + type CharIterator<'code>: Iterator + FusedIterator + Identifiable + 'code + where + Self: 'code; + + /// Returns an iterator over the [ChunkRef](crate::lexis::ChunkRef) token metadata objects "touched" + /// by specified `span`. + /// + /// Span "touching" means such tokens that their substring characters lie inside, intersect + /// with, or adjacent to this [Span](crate::lexis::ToSpan) object. + /// + /// ```rust + /// use lady_deirdre::lexis::{TokenBuffer, CodeContent, SimpleToken, ChunkRef}; + /// + /// let buf = TokenBuffer::::from("foo bar baz"); + /// + /// assert_eq!( + /// buf + /// // Second whitespace token " " is adjacent to site 4. + /// // Third identifier token "bar" covered by `4..7` span. + /// // Fourth whitespace token " " is adjacent to site 7. + /// .chunks(4..7) + /// .map(|chunk_ref: ChunkRef<'_, SimpleToken>| (chunk_ref.token, chunk_ref.string)) + /// .collect::>(), + /// vec![ + /// (&SimpleToken::Whitespace, " "), + /// (&SimpleToken::Identifier, "bar"), + /// (&SimpleToken::Whitespace, " "), + /// ], + /// ); + /// ``` + fn chunks(&self, span: impl ToSpan) -> Self::ChunkIterator<'_>; + + /// Returns an iterator over the Unicode characters of the source code text in specified + /// [span](crate::lexis::ToSpan). + /// + /// ```rust + /// use lady_deirdre::lexis::{TokenBuffer, CodeContent, SimpleToken, ChunkRef}; + /// + /// let buf = TokenBuffer::::from("foo bar baz"); + /// + /// assert_eq!( + /// buf.chars(4..7).map(|ch| ch.to_string().to_uppercase()).collect::>().join("."), + /// "B.A.R", + /// ); + /// ``` + fn chars(&self, span: impl ToSpan) -> Self::CharIterator<'_>; + + /// Returns a substring of the source code text in [span](crate::lexis::ToSpan). + /// + /// ```rust + /// use lady_deirdre::lexis::{TokenBuffer, CodeContent, SimpleToken, Position}; + /// + /// let mut buf = TokenBuffer::::default(); + /// + /// buf.append("First line\n"); + /// buf.append("Second line\n"); + /// buf.append("Third line\n"); + /// + /// assert_eq!( + /// buf.substring(Position::new(1, 7)..=Position::new(3, 5)), + /// "line\nSecond line\nThird", + /// ); + /// ``` + #[inline(always)] + fn substring(&self, span: impl ToSpan) -> String { + self.chars(span).collect() + } +} + +impl CodeContent for C { + type ChunkIterator<'code> = ChunkIterator<'code, Self::Cursor<'code>> + where + Self: 'code; + + type CharIterator<'code> = CharIterator<'code, Self::Cursor<'code>> + where + Self: 'code; + + #[inline(always)] + fn chunks(&self, span: impl ToSpan) -> Self::ChunkIterator<'_> { + let span = match span.to_span(self) { + None => panic!("Specified span is invalid."), + Some(span) => span, + }; + + let cursor = self.cursor(span.clone()); + + Self::ChunkIterator { + cursor, + _code_lifetime: PhantomData::default(), + } + } + + #[inline(always)] + fn chars(&self, span: impl ToSpan) -> Self::CharIterator<'_> { + let span = match span.to_span(self) { + None => panic!("Specified span is invalid."), + Some(span) => span, + }; + + let cursor = self.cursor(span.clone()); + + Self::CharIterator { + span, + cursor, + site: 0, + byte: 0, + _code_lifetime: PhantomData::default(), + } + } +} + +#[repr(transparent)] +pub struct ChunkIterator<'code, C: TokenCursor<'code>> { + cursor: C, + _code_lifetime: PhantomData<&'code ()>, +} + +impl<'code, C: TokenCursor<'code>> Identifiable for ChunkIterator<'code, C> { + #[inline(always)] + fn id(&self) -> &Id { + self.cursor.id() + } +} + +impl<'code, C: TokenCursor<'code>> Iterator for ChunkIterator<'code, C> { + type Item = ChunkRef<'code, >::Token>; + + #[inline] + fn next(&mut self) -> Option { + let token = self.cursor.token(0)?; + let site = self.cursor.site(0)?; + let length = self.cursor.length(0)?; + let string = self.cursor.string(0)?; + + if !self.cursor.advance() { + return None; + } + + Some(Self::Item { + token, + site, + length, + string, + }) + } +} + +impl<'code, C: TokenCursor<'code>> FusedIterator for ChunkIterator<'code, C> {} + +pub struct CharIterator<'code, C: TokenCursor<'code>> { + span: SiteSpan, + cursor: C, + site: Site, + byte: ByteIndex, + _code_lifetime: PhantomData<&'code ()>, +} + +impl<'code, C: TokenCursor<'code>> Identifiable for CharIterator<'code, C> { + #[inline(always)] + fn id(&self) -> &Id { + self.cursor.id() + } +} + +impl<'code, C: TokenCursor<'code>> Iterator for CharIterator<'code, C> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option { + loop { + let site = self.cursor.site(0)?; + + if self.site + site >= self.span.end { + return None; + } + + let length = self.cursor.length(0)?; + + if site + length < self.span.start || self.site >= length { + let _ = self.cursor.advance(); + self.site = 0; + self.byte = 0; + continue; + } + + let string = self.cursor.string(0)?; + + let character = unsafe { + string + .get_unchecked(self.byte..) + .chars() + .next() + .unwrap_unchecked() + }; + + self.site += 1; + self.byte += character.len_utf8(); + + if self.site + site <= self.span.start { + continue; + } + + return Some(character); + } + } +} + +impl<'code, C: TokenCursor<'code>> FusedIterator for CharIterator<'code, C> {} diff --git a/work/crates/main/src/lexis/cursor.rs b/work/crates/main/src/lexis/cursor.rs new file mode 100644 index 0000000..6c2f68e --- /dev/null +++ b/work/crates/main/src/lexis/cursor.rs @@ -0,0 +1,455 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, RefIndex, Sequence}, + lexis::{ + Length, + Site, + SiteRef, + SiteSpan, + SourceCode, + Token, + TokenBuffer, + TokenCount, + TokenRef, + }, + std::*, +}; + +/// A lookahead iterator over the source code Tokens. +/// +/// This is a low-level API that provides access features to the subset of the source code tokens +/// sequence. For a higher-level iteration access you can use a +/// [`CodeContent::chunks`](crate::lexis::CodeContent::chunks) function instead that returns a more +/// convenient [Iterator](::std::iter::iterator) over the [ChunkRef](crate::lexis::ChunkRef) +/// objects. +/// +/// TokenCursor is the main access gate to the [SourceCode](crate::lexis::SourceCode) underlying +/// lexical data. An API user receives this object by calling a +/// [SourceCode::cursor](crate::lexis::SourceCode::cursor) function. Also, TokenCursor is a base +/// interface of the [SyntaxSession](crate::syntax::SyntaxSession) that iterates over a subset of +/// tokens in specified parsing context. +/// +/// TokenCursor is an iterator-alike structure. This object provides functions to access +/// particular tokens and their metadata with lookahead capabilities from the current +/// inner [Site](crate::lexis::Site), and a function to [advance](TokenCursor::advance) the +/// inner Site. +/// +/// Note that even though the lookahead operations do not advance the inner Cursor position, it's +/// SyntaxSession extension may track the lookahead distance to calculate the final syntax parsing +/// lookahead. This final value affects incremental re-parsing algorithm, and should be minimized +/// to gain the best performance results. +/// +/// ```rust +/// use lady_deirdre::lexis::{TokenBuffer, SimpleToken, SourceCode, TokenCursor, ToSite}; +/// +/// let buf = TokenBuffer::::from("(foo bar)"); +/// +/// // A cursor over the "foo bar" substring tokens "touched" by the `4..5` span: +/// // - Token "foo" is adjacent to the Site 4. +/// // - Token " " is covered by 4..5 span. +/// // - Token "bar" is adjacent to the Site 5. +/// // +/// // In the beginning the inner Cursor Site set to the beginning of the "foo" token(Site 1). +/// let mut cursor = buf.cursor(4..5); +/// +/// // Looking ahead from the beginning. +/// assert_eq!(cursor.token(0).unwrap(), &SimpleToken::Identifier); +/// assert_eq!(cursor.site(0).unwrap(), 1); // Token "foo" starts from Site 1. +/// assert_eq!(cursor.string(0).unwrap(), "foo"); +/// assert_eq!(cursor.string(1).unwrap(), " "); +/// assert_eq!(cursor.string(2).unwrap(), "bar"); +/// assert!(cursor.string(3).is_none()); +/// +/// // Advances cursor Site to the beginning of the next token " ". +/// assert!(cursor.advance()); +/// +/// assert_eq!(cursor.site(0).unwrap(), 4); // Token " " starts from Site 4. +/// assert_eq!(cursor.string(0).unwrap(), " "); +/// assert_eq!(cursor.string(1).unwrap(), "bar"); +/// assert!(cursor.string(2).is_none()); +/// +/// // Advances cursor Site to the beginning of the last token "bar". +/// assert!(cursor.advance()); +/// +/// assert_eq!(cursor.site(0).unwrap(), 5); // Token "bar" starts from Site 5. +/// assert_eq!(cursor.string(0).unwrap(), "bar"); +/// assert!(cursor.string(1).is_none()); +/// +/// // Advances cursor Site to the end of the last token "bar". +/// assert!(cursor.advance()); +/// assert!(cursor.site(0).is_none()); // There are no more tokens in front of the Cursor. +/// assert!(cursor.string(0).is_none()); +/// +/// // Further advancement is not possible. +/// assert!(!cursor.advance()); +/// +/// // Since there are no more tokens in front of the Cursor Site the "site_ref" function returns +/// // a reference to the beginning of the next token "(" which is the Site of the end of +/// // the Cursor covered tokens. +/// let site_ref = cursor.site_ref(0); +/// +/// assert_eq!(site_ref.to_site(&buf).unwrap(), 8); +/// ``` +pub trait TokenCursor<'code>: Identifiable { + /// A type of the [Token](crate::lexis::Token) of the [SourceCode](crate::lexis::SourceCode) + /// instance this Cursor belongs to. + type Token: Token; + + /// Advances TokenCursor inner [Site](crate::lexis::Site). + /// + /// If in front of the inner site there is a token covered by this TokenCursor, the inner site + /// advances by the token's substring [Length](crate::lexis::Length), and the function + /// returns `true`. + /// + /// Otherwise this function does nothing and returns `false`. + fn advance(&mut self) -> bool; + + /// Looks ahead of the [Token](crate::lexis::Token) in front of the TokenCursor inner + /// [Site](crate::lexis::Site). + /// + /// If there are `distance` number of tokens covered by the TokenCursor in front of the + /// TokenCursor inner site, this function returns [Some] reference to this Token, + /// otherwise returns [None]. + /// + /// `distance` is zero-based argument. Number `0` refers to the first token in front of the + /// current inner site. `1` refers to the second token, and so on. + /// + /// This function does not advance TokenCursor inner site, but it could track an overall + /// lookahead distance of the [parsing session](crate::syntax::SyntaxSession) that affects + /// incremental re-parsing capabilities. An API user should prefer to minimize the lookahead + /// distance to gain the best performance. + fn token(&mut self, distance: TokenCount) -> Option<&'code Self::Token>; + + /// Looks ahead of the [Token](crate::lexis::Token)'s start [Site](crate::lexis::Site) in front + /// of the TokenCursor inner Site. + /// + /// If there are `distance` number of tokens covered by the TokenCursor in front of the + /// TokenCursor inner site, this function returns Token's [Some] Site, otherwise returns [None]. + /// + /// `distance` is zero-based argument. Number `0` refers to the first token in front of the + /// current inner site. `1` refers to the second token, and so on. + /// + /// In particular, `site(0)` would return the current TokenCursor inner site if there are + /// covered tokens left in front of the site. + /// + /// This function does not advance TokenCursor inner site, but it could track an overall + /// lookahead distance of the [parsing session](crate::syntax::SyntaxSession) that affects + /// incremental re-parsing capabilities. An API user should prefer to minimize the lookahead + /// distance to gain the best performance. + fn site(&mut self, distance: TokenCount) -> Option; + + /// Looks ahead of the [Token](crate::lexis::Token)'s string [Length](crate::lexis::Length) + /// in front of the TokenCursor inner [Site](crate::lexis::Site). + /// + /// If there are `distance` number of tokens covered by the TokenCursor in front of the + /// TokenCursor inner site, this function returns Token's string [Some] Length, + /// otherwise returns [None]. + /// + /// `distance` is zero-based argument. Number `0` refers to the first token in front of the + /// current inner site. `1` refers to the second token, and so on. + /// + /// This function does not advance TokenCursor inner site, but it could track an overall + /// lookahead distance of the [parsing session](crate::syntax::SyntaxSession) that affects + /// incremental re-parsing capabilities. An API user should prefer to minimize the lookahead + /// distance to gain the best performance. + fn length(&mut self, distance: TokenCount) -> Option; + + /// Looks ahead of the [Token](crate::lexis::Token)'s string in front of the TokenCursor inner + /// [Site](crate::lexis::Site). + /// + /// If there are `distance` number of tokens covered by the TokenCursor in front of the + /// TokenCursor inner site, this function returns Token's [Some] string slice, + /// otherwise returns [None]. + /// + /// `distance` is zero-based argument. Number `0` refers to the first token in front of the + /// current inner site. `1` refers to the second token, and so on. + /// + /// This function does not advance TokenCursor inner site, but it could track an overall + /// lookahead distance of the [parsing session](crate::syntax::SyntaxSession) that affects + /// incremental re-parsing capabilities. An API user should prefer to minimize the lookahead + /// distance to gain the best performance. + fn string(&mut self, distance: TokenCount) -> Option<&'code str>; + + /// Looks ahead of the [Token](crate::lexis::Token) in front of the TokenCursor inner + /// [Site](crate::lexis::Site), and returns a [weak reference](crate::lexis::TokenRef) to this + /// token. + /// + /// If there are `distance` number of tokens covered by the TokenCursor in front of the + /// TokenCursor inner site, this function returns [Some] weak reference to this Token, + /// otherwise returns [None]. + /// + /// `distance` is zero-based argument. Number `0` refers to the first token in front of the + /// current inner site. `1` refers to the second token, and so on. + /// + /// This function does not advance TokenCursor inner site, but it could track an overall + /// lookahead distance of the [parsing session](crate::syntax::SyntaxSession) that affects + /// incremental re-parsing capabilities. An API user should prefer to minimize the lookahead + /// distance to gain the best performance. + fn token_ref(&mut self, distance: TokenCount) -> TokenRef; + + /// Returns [weak reference](crate::lexis::SiteRef) to the source code + /// [Site](crate::lexis::Site) in front of the TokenCursor inner site. + /// + /// If there are `distance` number of tokens covered by the TokenCursor in front of the + /// TokenCursor inner site, this function returns a weak reference to the start site of that + /// token, otherwise this function returns a SiteRef pointing to the end Site of the covered + /// token sequence. + /// + /// `distance` is zero-based argument. Number `0` refers to the first token in front of the + /// current inner site. `1` refers to the second token, and so on. In particular, `site_ref(0)` + /// returns a weak reference to the current TokenCursor inner site. + /// + /// Note, that in contrast to [TokenCursor::site](crate::lexis::TokenCursor::site) function this + /// function always returns meaningful valid SiteRef even if there are no tokens in from of the + /// inner site, and if there are no tokens covered by this TokenCursor. + /// + /// This function does not advance TokenCursor inner site, but it could track an overall + /// lookahead distance of the [parsing session](crate::syntax::SyntaxSession) that affects + /// incremental re-parsing capabilities. An API user should prefer to minimize the lookahead + /// distance to gain the best performance. + fn site_ref(&mut self, distance: TokenCount) -> SiteRef; + + /// Returns a [weak reference](crate::lexis::SiteRef) pointing to the source code + /// [Site](crate::lexis::Site) in the end of the TokenCursor covered token sequence. + /// + /// Note that this function always returns meaningful valid SiteRef regardless to the + /// TokenCursor inner site, and even if there are no tokens covered by this TokenCursor + /// instance. + /// + /// This function does not advance TokenCursor inner site, but it could track an overall + /// lookahead distance of the [parsing session](crate::syntax::SyntaxSession) that affects + /// incremental re-parsing capabilities. In particular, this function sets max lookahead + /// to the end of the TokenCursor covered tokens. An API user should prefer to minimize the + /// lookahead distance to gain the best performance. + fn end_site_ref(&mut self) -> SiteRef; +} + +pub struct TokenBufferCursor<'code, T: Token> { + buffer: &'code TokenBuffer, + next: RefIndex, + end_site: Site, + end_site_ref: SiteRef, +} + +impl<'code, T: Token> Identifiable for TokenBufferCursor<'code, T> { + #[inline(always)] + fn id(&self) -> &Id { + self.buffer.id() + } +} + +impl<'code, T: Token> TokenCursor<'code> for TokenBufferCursor<'code, T> { + type Token = T; + + #[inline] + fn advance(&mut self) -> bool { + if self.next >= self.buffer.token_count() { + return false; + } + + let next_site = unsafe { *self.buffer.sites.inner().get_unchecked(self.next) }; + + if next_site > self.end_site { + return false; + } + + self.next += 1; + + true + } + + #[inline] + fn token(&mut self, mut distance: TokenCount) -> Option<&'code Self::Token> { + distance += self.next; + + if distance >= self.buffer.token_count() { + return None; + } + + let peek_site = unsafe { *self.buffer.sites.inner().get_unchecked(distance) }; + + if peek_site > self.end_site { + return None; + } + + Some(unsafe { self.buffer.tokens.inner().get_unchecked(distance) }) + } + + #[inline] + fn site(&mut self, mut distance: TokenCount) -> Option { + distance += self.next; + + if distance >= self.buffer.token_count() { + return None; + } + + let peek_site = unsafe { *self.buffer.sites.inner().get_unchecked(distance) }; + + if peek_site > self.end_site { + return None; + } + + Some(peek_site) + } + + #[inline] + fn length(&mut self, mut distance: TokenCount) -> Option { + distance += self.next; + + if distance >= self.buffer.token_count() { + return None; + } + + let peek_site = unsafe { *self.buffer.sites.inner().get_unchecked(distance) }; + + if peek_site > self.end_site { + return None; + } + + Some(*unsafe { self.buffer.spans.inner().get_unchecked(distance) }) + } + + #[inline] + fn string(&mut self, mut distance: TokenCount) -> Option<&'code str> { + distance += self.next; + + if distance >= self.buffer.token_count() { + return None; + } + + let peek_site = unsafe { *self.buffer.sites.inner().get_unchecked(distance) }; + + if peek_site > self.end_site { + return None; + } + + Some(unsafe { self.buffer.strings.inner().get_unchecked(distance).as_str() }) + } + + #[inline] + fn token_ref(&mut self, mut distance: TokenCount) -> TokenRef { + distance += self.next; + + if distance >= self.buffer.token_count() { + return TokenRef::nil(); + } + + let peek_site = unsafe { *self.buffer.sites.inner().get_unchecked(distance) }; + + if peek_site > self.end_site { + return TokenRef::nil(); + } + + TokenRef { + id: *self.buffer.id(), + chunk_ref: Sequence::::make_ref(distance), + } + } + + #[inline(always)] + fn site_ref(&mut self, distance: TokenCount) -> SiteRef { + let token_ref = self.token_ref(distance); + + if token_ref.is_nil() { + return self.end_site_ref(); + } + + token_ref.site_ref() + } + + #[inline] + fn end_site_ref(&mut self) -> SiteRef { + if self.end_site_ref.is_nil() { + let mut index = self.next; + + loop { + if index >= self.buffer.token_count() { + self.end_site_ref = SiteRef::new_code_end(*self.buffer.id()); + break; + } + + let peek_site = unsafe { *self.buffer.sites.inner().get_unchecked(index) }; + + if peek_site > self.end_site { + self.end_site_ref = TokenRef { + id: *self.buffer.id(), + chunk_ref: Sequence::::make_ref(index), + } + .site_ref(); + break; + } + + index += 1; + } + } + + self.end_site_ref + } +} + +impl<'code, T: Token> TokenBufferCursor<'code, T> { + #[inline(always)] + pub(super) fn new(buffer: &'code TokenBuffer, span: SiteSpan) -> Self { + let mut next = 0; + + while next < buffer.token_count() { + let site = unsafe { *buffer.sites.inner().get_unchecked(next) }; + let length = unsafe { *buffer.spans.inner().get_unchecked(next) }; + + if site + length < span.start { + next += 1; + continue; + } + + break; + } + + let end_site_ref = match span.end >= buffer.length() { + true => SiteRef::new_code_end(*buffer.id()), + false => SiteRef::nil(), + }; + + Self { + buffer, + end_site: span.end, + next, + end_site_ref, + } + } +} diff --git a/work/crates/main/src/lexis/mod.rs b/work/crates/main/src/lexis/mod.rs new file mode 100644 index 0000000..6bc25ac --- /dev/null +++ b/work/crates/main/src/lexis/mod.rs @@ -0,0 +1,68 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +#![doc = include_str!("readme.md")] + +mod buffer; +mod chunks; +mod code; +mod content; +mod cursor; +mod position; +mod session; +mod simple; +mod site; +mod span; +mod token; +pub(crate) mod utils; + +pub(crate) const CHUNK_SIZE: Length = 3; + +pub(crate) use crate::lexis::site::SiteRefInner; +pub use crate::lexis::{ + buffer::TokenBuffer, + chunks::{Chunk, ChunkRef}, + code::SourceCode, + content::CodeContent, + cursor::TokenCursor, + position::{Column, Line, Position, ToPosition}, + session::LexisSession, + simple::SimpleToken, + site::{ByteIndex, Length, Site, SiteRef, ToSite}, + span::{PositionSpan, SiteRefSpan, SiteSpan, ToSpan}, + token::{Token, TokenCount, TokenRef}, +}; diff --git a/work/crates/main/src/lexis/position.rs b/work/crates/main/src/lexis/position.rs new file mode 100644 index 0000000..a4e9bae --- /dev/null +++ b/work/crates/main/src/lexis/position.rs @@ -0,0 +1,310 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + lexis::{CodeContent, Site, SourceCode, ToSite}, + std::*, +}; + +/// A one-based line number inside the source code. +/// +/// In contrast to [Sites](crate::lexis::Site), Lines numeration starts with `1`. Number `1` means +/// the first line in the source code. Number `2` means the second line in the source code, +/// and so on. Number `0` is a valid value that means the first line too. +pub type Line = usize; + +/// A one-based Unicode character number inside the source code line string. +/// +/// In contrast to [Sites](crate::lexis::Site), Columns numeration starts with `1`. Number `1` means +/// the first UTF-8 character in the source code line. Number `2` means the second UTF-8 character +/// in the source code line, and so on. Number `0` is a valid value that means the first character +/// inside the source code line too. +pub type Column = usize; + +/// A line-column index object into the source code text. +/// +/// This object interprets the source code text as a table of UTF-8 characters, where the rows are +/// text lines, and the columns are UTF-8 characters inside lines. +/// +/// Lines separated either by `\n`, or `\r\n`, or `\n\r` character sequences. +/// Line-break/Caret-return symbols interpretation is encoding-independent to some extent. +/// +/// This object implements [ToSite](crate::lexis::ToSite) trait. Any Position value is always +/// [valid to resolve](crate::lexis::ToSite::is_valid_site), but resolution complexity is linear +/// to the entire source code text size. An API user should take into account this performance +/// characteristic in the end compilation system design. [AddAssign](::std::ops::AddAssign)(`+=`) +/// operation that incrementally moves Position into specified string symbols forward could help +/// in resolving possible performance bottlenecks when the Position object is supposed to be used +/// frequently. +/// +/// Also, the companion auto-implemented trait [ToPosition](crate::lexis::ToPosition) allows turning +/// of any `ToSite` implementation back to Position. In Particular [Site](crate::lexis::Site) or +/// [SiteRef](crate::lexis::SiteRef) can be turned into Position instance. +/// +/// ```rust +/// use lady_deirdre::lexis::{ +/// Position, ToSite, ToPosition, SimpleToken, TokenBuffer, CodeContent +/// }; +/// +/// let mut code = TokenBuffer::::default(); +/// +/// code.append("First line\n"); +/// code.append("Second line\n"); +/// code.append("Third line\n"); +/// +/// assert_eq!(code.substring(Position::new(1, 1)..=Position::new(1, 100)), "First line\n"); +/// assert_eq!(code.substring(Position::new(2, 1)..=Position::new(2, 100)), "Second line\n"); +/// assert_eq!(code.substring(Position::new(3, 1)..=Position::new(3, 100)), "Third line\n"); +/// +/// assert!(Position::new(2, 8) < Position::new(3, 6)); +/// assert_eq!(code.substring(Position::new(2, 8)..Position::new(3, 6)), "line\nThird"); +/// +/// let site = Position::new(2, 8).to_site(&code).unwrap(); +/// let mut position = site.to_position(&code).unwrap(); +/// +/// assert_eq!(position, Position::new(2, 8)); +/// +/// position += "line\nThird".chars(); +/// +/// assert_eq!(position, Position::new(3, 6)); +/// ``` +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Position { + /// A number of the line inside the Source code. + /// + /// Numeration is One-based. Line `1` is the first line. Line `2` is the second line, and so on. + /// Number `0` is a valid value that means the first line too. + /// + /// If the `line` number is greater than the total number of lines inside the source code, this + /// number will be interpreted as a source code text end. + pub line: Line, + + /// A number of the UTF-8 character inside the `line` of the Source code. + /// + /// Numeration is One-based. Colum `1` is the first character. Line `2` is the character, and + /// so on. Number `0` is a valid value that means the first character too. + /// + /// If the `column` number is greater than the total number of characters inside this `line`, + /// this number will be interpreted as the line string end. + pub column: Column, +} + +impl Ord for Position { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + if self.line < other.line { + return Ordering::Less; + } + + if self.line > other.line { + return Ordering::Greater; + } + + self.column.cmp(&other.column) + } +} + +impl PartialOrd for Position { + #[inline(always)] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Default for Position { + #[inline(always)] + fn default() -> Self { + Self { line: 1, column: 1 } + } +} + +impl Display for Position { + #[inline(always)] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + formatter.write_fmt(format_args!("[{}:{}]", self.line, self.column)) + } +} + +impl> AddAssign for Position { + #[inline] + fn add_assign(&mut self, rhs: I) { + for character in rhs { + match character { + '\n' => { + self.line += 1; + self.column = 1; + } + + _ => { + self.column += 1; + } + } + } + } +} + +unsafe impl ToSite for Position { + fn to_site(&self, code: &impl SourceCode) -> Option { + if self.line == 0 { + return Some(0); + } + + let mut line = 1; + let mut column = 0; + + for (site, character) in code.chars(..).enumerate() { + match character { + '\r' => { + if line == self.line { + return Some(site); + } + + column = 0; + } + + '\n' => { + if line == self.line { + return Some(site); + } + + line += 1; + column = 0; + } + + _ => { + column += 1; + + if self.line == line && self.column <= column { + return Some(site); + } + } + } + } + + Some(code.length()) + } + + #[inline(always)] + fn is_valid_site(&self, _code: &impl SourceCode) -> bool { + true + } +} + +impl Position { + /// A helper shortcut constructor of the Position object. + #[inline(always)] + pub fn new(line: Line, column: Column) -> Self { + Self { line, column } + } + + pub(super) fn from_site(code: &impl SourceCode, site: Site) -> Self { + if site == 0 { + return Self::default(); + } + + let mut line = 1; + let mut column = 1; + let mut candidate = 0; + let mut slice = code.chars(..); + + loop { + let next = match slice.next() { + None => break, + Some(character) => character, + }; + + candidate += 1; + + match next { + '\n' => { + line += 1; + column = 1; + } + + _ => { + column += 1; + } + } + + if candidate >= site { + break; + } + } + + Self { line, column } + } +} + +/// An auto-implemented trait that turns any [ToSite](crate::lexis::ToSite) implementation +/// into [Position](crate::lexis::Position). +/// +/// ```rust +/// use lady_deirdre::lexis::{ToPosition, TokenBuffer, SimpleToken, Position}; +/// +/// let mut code = TokenBuffer::::default(); +/// +/// code.append("First line\n"); +/// code.append("Second line\n"); +/// code.append("Third line\n"); +/// +/// // The third character "c" in the "Second line\n" substring. +/// let site = 13; +/// let position = site.to_position(&code).unwrap(); +/// +/// assert_eq!(position, Position::new(2, 3)); +/// ``` +pub trait ToPosition: ToSite { + /// Turns an object that implements a [ToSite](crate::lexis::ToSite) trait to Position. + /// + /// Returns [Some] value if and only if `self.is_valid_site(code)`. + /// + /// This operation performs in linear time of the entire source code size. + fn to_position(&self, code: &impl SourceCode) -> Option; +} + +impl ToPosition for S { + #[inline(always)] + fn to_position(&self, code: &impl SourceCode) -> Option { + let site = match self.to_site(code) { + None => return None, + + Some(site) => site, + }; + + Some(Position::from_site(code, site)) + } +} diff --git a/work/crates/main/src/lexis/readme.md b/work/crates/main/src/lexis/readme.md new file mode 100644 index 0000000..33e20ab --- /dev/null +++ b/work/crates/main/src/lexis/readme.md @@ -0,0 +1,95 @@ +# Lexis analysis features. + +This module provides functionality to organize the source code lexis analysis +system. + +The source code is a string of UTF-8 characters. This text builds up a sequence +of substrings of usually small sizes called Tokens. For example, "foo bar" is a +text consists of three tokens: an Identifier "foo", a Whitespace " ", and an +Identifier "bar". Splitting the source code text into token substrings and +associating them with lexical metadata is a Lexical analysis stage of +Compilation System. + +An API user encouraged to implement a [Token](crate::lexis::Token) trait +on the Rust enum types. The variants of this enum would represent token types. +And the [`Token::new`](crate::lexis::Token::new) function would define +a Programming Language lexical grammar parser, an algorithm that divides the +source text into tokens. Under the hood this function performs lexical +parsing of the UTF-8 text by interacting with the low-level +[LexisSession](crate::lexis::LexisSession) interface. + +Normally an API user does not need to implement Token interface manually. You +can utilize [Token](::lady_deirdre_derive::Token) derive macro instead to +specify lexical grammar of the PL directly on the enum variants through the +macro attributes. + +Individual Token metadata called [Chunk](crate::lexis::Chunk) consists of four +fields: + - An instance of the Token type that represents a group of tokens this + particular "token" belongs too. This field, in particular, is supposed to be + used on the further lexical and syntax analysis stages. The Token type could + also contain additional semantic metadata. + - An absolute UTF-8 character index of the first character of the token string + inside the original source code text. This "index" called + [Site](crate::lexis::Site). + - A [Length](crate::lexis::Length) of the token's string. This is the number + of the UTF-8 characters in the string. + - A UTF-8 token [String](std::string::String). This is a substring of the + original source text that was scanned by the lexical parser to recognize + corresponding token. + +Objects that store a source code lexical structure implement a +[SourceCode](crate::lexis::SourceCode) trait. This trait provides functions to +access and to inspect lexical structure such as individual Tokens, token Chunks, +and to dereference weak references of the Tokens and the token Chunk fields. +Unless you work on a Crate extension, you don't need to implement this trait +manually. + +The default implementation of the SourceCode is a +[TokenBuffer](crate::lexis::TokenBuffer) object. This object provides an +efficient way to load and lexically parse of the text loaded from file, and +is supposed to be either used directly for non-incremental compilation mode, or +to be further turned into a [Document](crate::Document) incremental storage. + +To traverse individual tokens of the source code, the SourceCode trait provides +a [`SourceCode::cursor`](crate::lexis::SourceCode::cursor) function that returns +a low-level iterator-alike interface over the token metadata called +[TokenCursor](crate::lexis::TokenCursor). + +To inspect particular features of the source code content such as arbitrary +substrings or to iterate token Chunks in a more convenient way, an API user +encouraged to use a higher-level [CodeContent](crate::lexis::CodeContent) +extension interface. This interface is auto-implemented for all SourceCode +implementations such as TokenBuffer or Document. + +To index into arbitrary characters of the source code text characters, the +module provides a low-level [ToSite](crate::lexis::ToSite) trait. This trait +was designed to transform custom index objects to the source code character +Sites. ToSite is implemented for the [Site](crate::lexis::Site) type(a [usize] +UTF-8 character absolute index) itself, but is also implemented for +[Position](crate::lexis::Position) object that holds a text index in terms of +the source code lines and columns, and is implemented for +the [SiteRef](crate::lexis::SiteRef) source code changes history independent +weak reference. + +Depending on the end compilation system design needs you can implement this +trait manually for custom indexing objects. + +To specify arbitrary spans of the source code text to be indexed to, the module +provides a low-level [ToSpan](crate::lexis::ToSpan) trait. This interface is +auto-implemented for all types of the Rust standard range types(such as +[Range](::std::ops::Range) or [RangeTo](::std::ops::RangeTo)) over the ToSite +objects. As such an API user can specify, for example, a span in form of the +Site range `8..12`, or using Position objects +`Position::new(3, 5)..=Position::new(12, 1)`. + +The end incremental compilation system is supposed to resolve semantic +information in lazy changes-independent fashion. For this purpose an API user +encouraged to utilize weak references into the source code chunks metadata. +This module provides two high-level API interfaces for such references: +the [TokenRef](crate::lexis::TokenRef) reference object to index particular +tokens and its chunk fields, and the [SiteRef](crate::lexis::SiteRef) +reference object to index particular Sites inside the source code text. + +See [Arena](crate::arena) module documentation to read more about the weak +reference system. diff --git a/work/crates/main/src/lexis/session.rs b/work/crates/main/src/lexis/session.rs new file mode 100644 index 0000000..839cfd9 --- /dev/null +++ b/work/crates/main/src/lexis/session.rs @@ -0,0 +1,332 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + lexis::{ + utils::{get_lexis_character, NULL}, + ByteIndex, + Site, + Token, + TokenBuffer, + }, + std::*, +}; + +/// An interface to the source code lexical parsing/re-parsing session. +/// +/// This is a low-level API. +/// +/// Lexical parsing architecture decoupled into two independent components: +/// - The Source Code Manager that organizes a lexical data storage, and that provides access +/// operations to the lexical structure. This component implements a +/// [SourceCode](crate::lexis::SourceCode) trait. +/// - The Lexical Scanner of particular programming language. This component is unaware about the +/// lexical structure memory management process, and about the source of scanning. +/// +/// Both components of this architecture are unaware about each other, and they use a +/// [LexisSession] trait as an input/output "thin" interaction interface. +/// +/// The Source Code Manager passes a mutable reference to LexisSession object to the +/// [`Token::new`](crate::lexis::Token::new) function to initiate lexical scanning procedure in +/// specified context. And, in turn, the `Token::new` function uses this object to read +/// Unicode input character, and to drive the scanning process. +/// +/// You can implement this trait as well as the [SourceCode](crate::lexis::SourceCode) trait to +/// create a custom lexis manager of the compilation unit that would be able to work with +/// existing lexical grammar definitions seamlessly. +/// +/// As long as the the [Token](crate::lexis::Token) trait implementation follows +/// [`Algorithm Specification`](crate::syntax::Node::new), the +/// intercommunication between the Lexical Scanner and the Source Code Manager works correctly. +/// +/// ```rust +/// use lady_deirdre::lexis::{LexisSession, SimpleToken, Token, ByteIndex}; +/// +/// // A lexis session object that simply obtains the first token from string. +/// struct First<'a> { +/// // An input string. +/// input: &'a str, +/// // An internal cursor into the `input`. +/// cursor: ByteIndex, +/// // An input parse start cursor. +/// start: ByteIndex, +/// // A submitted parse end cursor. +/// end: ByteIndex +/// }; +/// +/// impl<'a> LexisSession for First<'a> { +/// fn advance(&mut self) { +/// if self.cursor >= self.input.len() { return; } +/// self.cursor += self.input[self.cursor..].chars().next().unwrap().len_utf8(); +/// } +/// +/// fn character(&self) -> char { +/// if self.cursor >= self.input.len() { return '\0'; } +/// +/// let character = self.input[self.cursor..].chars().next().unwrap(); +/// +/// if character == '\0' { return char::REPLACEMENT_CHARACTER; } +/// +/// character +/// } +/// +/// fn submit(&mut self) { self.end = self.cursor; } +/// +/// fn substring(&mut self) -> &str { &self.input[self.start..self.end] } +/// } +/// +/// impl<'a> First<'a> { +/// fn run(input: &'a str) -> (T, &'a str) { +/// let mut session = First { +/// input, +/// cursor: 0, +/// start: 0, +/// end: 0, +/// }; +/// +/// let token = T::new(&mut session); +/// +/// // Token scanner didn't submit anything. +/// // Then the `token` value is "Mismatch" token type. +/// // Entering mismatch recovery loop. +/// if session.end == 0 { +/// while session.start < input.len() { +/// session.start += input[session.start..].chars().next().unwrap().len_utf8(); +/// session.cursor = session.start; +/// +/// let _ = T::new(&mut session); +/// +/// if session.end > session.start { break; } +/// } +/// +/// return (token, &input[0..session.start]); +/// } +/// +/// (token, &input[0..session.end]) +/// } +/// } +/// +/// assert_eq!(First::run::(""), (SimpleToken::Mismatch, "")); +/// assert_eq!(First::run::("лексема bar baz"), (SimpleToken::Mismatch, "лексема")); +/// assert_eq!(First::run::("foo bar baz"), (SimpleToken::Identifier, "foo")); +/// assert_eq!(First::run::("123 bar baz"), (SimpleToken::Number, "123")); +/// ``` +pub trait LexisSession { + /// Tells the iterator to move to the next input character. + /// + /// This function does nothing if there are no more characters in the input sequence. + fn advance(&mut self); + + /// Returns current character of the input sequence. + /// + /// This function does not [advance](LexisSession::advance) Session's internal cursor. + /// + /// If the current character is a Null character(`'\0'`), the function returns + /// [replacement character](::std::char::REPLACEMENT_CHARACTER) instead. + /// + /// If there are no more characters in the input sequences(the Session has reach the end of + /// input) this function returns Null character. + fn character(&self) -> char; + + /// Tells the iterator that the sequence of characters scanned prior to the current + /// characters(excluding the current character) build up complete token. + /// + /// The Algorithm can call this function multiple times. In this case the Session will ignore + /// all previous "submissions" in favor to the last one. + /// + /// If the Algorithm never invokes this function, or the Algorithm never invokes + /// [advance](LexisSession::advance) function during the scanning session, the input sequence + /// considered to be lexically incorrect. + /// + /// This function does not advance Session's internal cursor. + fn submit(&mut self); + + /// Returns a substring of the input text from the beginning of the scanning session till the + /// latest [submitted](LexisSession::submit) character(excluding that submitted character). + /// + /// This function does not [advance](LexisSession::advance) Session's internal cursor. + fn substring(&mut self) -> &str; +} + +pub(super) struct SequentialLexisSession<'code, T: Token> { + pub(super) buffer: &'code mut TokenBuffer, + pub(super) next_cursor: Cursor, + pub(super) begin_cursor: Cursor, + pub(super) start_cursor: Cursor, + pub(super) end_cursor: Cursor, +} + +impl<'code, T: Token> LexisSession for SequentialLexisSession<'code, T> { + #[inline(always)] + fn advance(&mut self) { + self.next_cursor.advance(self.buffer); + } + + #[inline(always)] + fn character(&self) -> char { + self.next_cursor.character + } + + #[inline(always)] + fn submit(&mut self) { + self.end_cursor = self.next_cursor; + } + + #[inline(always)] + fn substring(&mut self) -> &str { + unsafe { + self.buffer + .tail + .get_unchecked(self.start_cursor.byte_index..self.end_cursor.byte_index) + } + } +} + +impl<'code, T: Token> SequentialLexisSession<'code, T> { + #[inline] + pub(super) fn run(buffer: &'code mut TokenBuffer, site: Site) + where + T: Token, + { + let cursor = Cursor { + site, + byte_index: 0, + character: unsafe { get_lexis_character(buffer.tail.get_unchecked(0..).chars()) }, + }; + + let mut session = Self { + buffer, + next_cursor: cursor, + begin_cursor: cursor, + start_cursor: cursor, + end_cursor: cursor, + }; + + loop { + let token = T::new(&mut session); + + if session.start_cursor.site != session.end_cursor.site { + session + .buffer + .push(token, &session.start_cursor, &session.end_cursor); + + if session.end_cursor.character == NULL { + break; + } + + session.reset(); + + continue; + } + + if session.enter_mismatch_loop(token) { + break; + } + } + } + + // Returns true if the parsing process supposed to stop + #[inline] + fn enter_mismatch_loop(&mut self, mismatch: T) -> bool + where + T: Token, + { + loop { + self.start_cursor.advance(&self.buffer); + self.next_cursor = self.start_cursor; + + if self.start_cursor.character == NULL { + self.buffer + .push(mismatch, &self.begin_cursor, &self.start_cursor); + + return true; + } + + let token = T::new(self); + + if self.start_cursor.site < self.end_cursor.site { + self.buffer + .push(mismatch, &self.begin_cursor, &self.start_cursor); + + self.buffer + .push(token, &self.start_cursor, &self.end_cursor); + + if self.end_cursor.character == NULL { + return true; + } + + self.reset(); + + return false; + } + } + } + + #[inline(always)] + fn reset(&mut self) { + self.begin_cursor = self.end_cursor; + self.start_cursor = self.end_cursor; + self.next_cursor = self.end_cursor; + } +} + +#[derive(Clone, Copy)] +pub(super) struct Cursor { + pub(super) site: Site, + pub(super) byte_index: ByteIndex, + pub(super) character: char, +} + +impl Cursor { + #[inline] + fn advance(&mut self, buffer: &TokenBuffer) { + if self.character == NULL { + return; + } + + self.site += 1; + self.byte_index += self.character.len_utf8(); + + if self.byte_index == buffer.tail.len() { + self.character = NULL; + return; + } + + self.character = + unsafe { get_lexis_character(buffer.tail.get_unchecked(self.byte_index..).chars()) }; + } +} diff --git a/work/crates/main/src/lexis/simple.rs b/work/crates/main/src/lexis/simple.rs new file mode 100644 index 0000000..0fe5fb8 --- /dev/null +++ b/work/crates/main/src/lexis/simple.rs @@ -0,0 +1,118 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{lexis::Token, std::*}; + +/// A common generic lexis. +/// +/// You can use this Token type when particular source code grammar is unknown or does not +/// matter(e.g. if the end user opens a custom .txt file in the code editor window), but the text +/// needs to be split into tokens in some reasonable way. +/// +/// Also, you can use a companion [SimpleNode](crate::syntax::SimpleNode) syntax implementation +/// that parses parens nesting on top of this lexis. +#[derive(Token, Clone, Copy, Debug, PartialEq, Eq)] +#[define(ALPHABET = ['a'..'z', 'A'..'Z'])] +#[define(NUM = ['0'..'9'])] +#[define(ALPHANUM = ALPHABET | NUM)] +#[define(SYMBOL = [ + '!', '@', '#', '$', '%', '^', '&', '*', '-', '+', '=', '/', '|', ':', ';', '.', + ',', '<', '>', '?', '~', '`' +])] +pub enum SimpleToken { + /// A numerical literal. Either integer or a floating point(e.g. `12345` or `1234.56`). + #[rule(NUM+ & ('.' & NUM+)?)] + Number, + + /// All keyboard terminal character(e.g. `@` or `%`) except paren terminals. + #[rule(SYMBOL | '\\')] + Symbol, + + /// An open parenthesis(`(`) terminal. + #[rule('(')] + ParenOpen, + + /// A close parenthesis(`)`) terminal. + #[rule(')')] + ParenClose, + + /// An open bracket(`[`) terminal. + #[rule('[')] + BracketOpen, + + /// A close bracket(`]`) terminal. + #[rule(']')] + BracketClose, + + /// An open brace(`{`) terminal. + #[rule('{')] + BraceOpen, + + /// A close brace(`}`) terminal. + #[rule('}')] + BraceClose, + + /// An English alphanumeric word that does not start with digit(e.g. `hello_World123`). + #[rule(ALPHABET & (ALPHANUM | '_')*)] + Identifier, + + /// A string literal surrounded by `"` characters that allows any characters inside including + /// the characters escaped by `\` prefix(e.g. `"hello \" \n world"`). + #[rule('"' & ('\\' & . | ^['\\', '\"'])* & '"')] + String, + + /// A single character literal surrounded by `'` characters that allows any character inside + /// including the characters escaped by `\` prefix(e.g. `'A'`, or `'\A'`, or `'\''`) + #[rule('\'' & ('\\' & . | ^['\\', '\'']) & '\'')] + Char, + + /// A sequence of whitespace characters as defined in + /// [`char::is_ascii_whitespace()`](char::is_ascii_whitespace). + #[rule([' ', '\t', '\n', '\x0c', '\r']+)] + Whitespace, + + /// Any other token that does not fit this lexical grammar. + #[mismatch] + Mismatch, +} + +impl Display for SimpleToken { + #[inline(always)] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + Debug::fmt(self, formatter) + } +} diff --git a/work/crates/main/src/lexis/site.rs b/work/crates/main/src/lexis/site.rs new file mode 100644 index 0000000..fb99ad7 --- /dev/null +++ b/work/crates/main/src/lexis/site.rs @@ -0,0 +1,317 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable}, + lexis::{SourceCode, TokenCursor, TokenRef}, + std::*, +}; + +/// A number of Unicode characters in the text. +pub type Length = usize; + +/// A number of Unicode characters behind specified character in the source code text. +pub type Site = usize; + +/// A number of bytes behind specified Unicode character in the source code text. +pub type ByteIndex = usize; + +unsafe impl ToSite for Site { + #[inline(always)] + fn to_site(&self, code: &impl SourceCode) -> Option { + Some(code.length().min(*self)) + } + + #[inline(always)] + fn is_valid_site(&self, _code: &impl SourceCode) -> bool { + true + } +} + +/// A weak reference of the [Site] inside the source code text. +/// +/// This object "pins" particular Site inside the source code text, and this "pin" can survive write +/// operations in the text happening aside of this "pin"(before or after referred Site) resolving +/// to relevant pinned Site after the source code mutations. +/// +/// An API user is encouraged to use SiteRef to fix particular bounds of the source code snippets +/// for later use. For example, one can use a [Range](::std::ops::Range) of the +/// SiteRefs(a [SiteRefSpan](crate::lexis::SiteRefSpan) object) to refer particular bounds of the +/// Syntax or Semantic error inside the code. +/// +/// In practice SiteRef could refer Tokens start and end bounds only. +/// +/// An API user constructs this object either from the +/// [`TokenRef::site_ref`](crate::lexis::TokenRef::site_ref) to refer Token's start Site or from the +/// [`SourceCode::end_site_ref`](crate::lexis::SourceCode::end_site_ref) to refer source code's end +/// Site. +/// +/// SiteRef implements [ToSite] trait, and as such can be used as an index into the source code. +/// +/// SiteRef is a cheap to [Copy] and cheap to dereference object. +/// +/// ```rust +/// use lady_deirdre::{ +/// Document, +/// lexis::{SimpleToken, SourceCode, TokenCursor, ToSite, CodeContent}, +/// syntax::NoSyntax, +/// }; +/// +/// let mut doc = Document::>::from("foo bar baz"); +/// +/// // Obtaining the beginning Site weak reference to the third token("bar"). +/// let site_ref = doc.cursor(..).site_ref(2); +/// +/// // "bar" starts on the fifth character. +/// assert_eq!(site_ref.to_site(&doc).unwrap(), 4); +/// +/// // Write something in the beginning of the text. +/// doc.write(0..0, "123"); +/// assert_eq!(doc.substring(..), "123foo bar baz"); +/// +/// // From now on "bar" starts on the 8th character. +/// assert_eq!(site_ref.to_site(&doc).unwrap(), 7); +/// +/// // But if we erase the entire source code, "site_ref" turns to invalid reference. +/// doc.write(.., "123456"); +/// assert_eq!(doc.substring(..), "123456"); +/// +/// assert!(!site_ref.is_valid_site(&doc)); +/// assert_eq!(site_ref.to_site(&doc), None); +/// ``` +/// +/// For details on the Weak references framework design see [Arena](crate::arena) module +/// documentation. +#[derive(Clone, Copy, PartialEq, Eq)] +#[repr(transparent)] +pub struct SiteRef(SiteRefInner); + +impl Debug for SiteRef { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + match &self.0 { + SiteRefInner::CodeEnd(id) => formatter.write_fmt(format_args!("SiteRef({:?})", id)), + SiteRefInner::ChunkStart(reference) => match reference.is_nil() { + false => formatter.write_fmt(format_args!("SiteRef({:?})", reference.id())), + true => formatter.write_str("SiteRef(Nil)"), + }, + } + } +} + +impl Identifiable for SiteRef { + #[inline(always)] + fn id(&self) -> &Id { + match &self.0 { + SiteRefInner::ChunkStart(reference) => reference.id(), + SiteRefInner::CodeEnd(code_id) => code_id, + } + } +} + +unsafe impl ToSite for SiteRef { + #[inline(always)] + fn to_site(&self, code: &impl SourceCode) -> Option { + match &self.0 { + SiteRefInner::ChunkStart(token_ref) => code.get_site(&token_ref.chunk_ref), + + SiteRefInner::CodeEnd(id) => match id == code.id() { + false => None, + true => Some(code.length()), + }, + } + } + + #[inline(always)] + fn is_valid_site(&self, code: &impl SourceCode) -> bool { + match &self.0 { + SiteRefInner::ChunkStart(reference) => reference.is_valid_ref(code), + SiteRefInner::CodeEnd(id) => id == code.id(), + } + } +} + +impl SiteRef { + /// Returns an invalid instance of the SiteRef. + /// + /// This instance never resolves to a valid [Site](crate::lexis::Site). + #[inline(always)] + pub const fn nil() -> Self { + Self(SiteRefInner::ChunkStart(TokenRef::nil())) + } + + #[inline(always)] + pub(crate) const fn new_code_end(code_id: Id) -> Self { + Self(SiteRefInner::CodeEnd(code_id)) + } + + #[inline(always)] + pub(super) const fn new_chunk_start(reference: TokenRef) -> Self { + Self(SiteRefInner::ChunkStart(reference)) + } + + /// Returns `true` if this instance will never resolve to a valid [Site](crate::lexis::Site). + /// + /// It is guaranteed that `SiteRef::nil().is_nil()` is always `true`, but in general if + /// this function returns `false` it is not guaranteed that provided instance is a valid + /// reference. + /// + /// To determine reference validity per specified [SourceCode](crate::lexis::SourceCode) + /// instance use [is_valid_site](crate::lexis::ToSite::is_valid_site) function instead. + #[inline(always)] + pub const fn is_nil(&self) -> bool { + match &self.0 { + SiteRefInner::ChunkStart(reference) => reference.is_nil(), + SiteRefInner::CodeEnd(_) => false, + } + } + + #[inline(always)] + pub(crate) const fn inner(&self) -> &SiteRefInner { + &self.0 + } +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) enum SiteRefInner { + ChunkStart(TokenRef), + CodeEnd(Id), +} + +/// An interface of the source code character index. +/// +/// The underlying object may be a valid or invalid index for particular +/// [SourceCode](crate::lexis::SourceCode) instance. If the object considered to be not valid, +/// [is_valid_site](ToSite::is_valid_site) function returns `false`, and +/// [to_site](ToSite::to_site), [to_byte_index](ToSite::to_byte_index) functions return [None]. +/// Otherwise "is_valid_site" function returns `true` and other two functions return meaningful +/// [Some] values. +/// +/// It is up to implementation to decide whether particular instance considered to be valid or not. +/// +/// The most trivial implementation of this trait is a [Site] type(a UTF-8 character absolute +/// offset). Sites are always valid indices. For the sake of simplicity ToSite implementation of +/// the Site type always clamps Site's value to the source code character +/// [length](crate::lexis::SourceCode::length). +/// +/// Another two implementations provided for an API user out of the box are +/// [Position](crate::lexis::Position) and [SiteRef](crate::lexis::SiteRef). Positions are always +/// valid values, and SiteRefs could be invalid if referred site does not belong to specified `code` +/// reference, or if the SiteRef obsolete. +/// +/// **Safety:** +/// - If [to_site](ToSite::to_site) function returns [Some] value, this value is always within +/// the `0..=SourceCode::length(code)` range. +/// - If [to_byte_index](ToSite::to_byte_index) function returns Some value, this value is +/// Unicode-valid character byte offset within the `code` underlying text. +pub unsafe trait ToSite { + /// Resolves index object into a valid [Site](crate::lexis::Site) index for specified `code` + /// instance. + /// + /// This function returns [Some] value if and only if + /// [is_valid_site](crate::lexis::ToSite::is_valid_site) function returns `true`. + /// + /// Returned Some value is always within the `0..=SourceCode::length(code)` range. + fn to_site(&self, code: &impl SourceCode) -> Option; + + /// Resolves index object into a valid Unicode [byte index](crate::lexis::ByteIndex) for + /// specified `code` instance text. + /// + /// This function returns [Some] value if and only if + /// [is_valid_site](crate::lexis::ToSite::is_valid_site) function returns `true`. + /// + /// Returned Some value is always Unicode-valid byte offset into the `code` underlying text. + /// + /// **Safety:** + /// - The default implementation of this function is safe as long as + /// [to_site](ToSite::to_site) function follows trait's general safety requirements. + fn to_byte_index(&self, code: &impl SourceCode) -> Option { + let mut site = match self.to_site(code) { + None => return None, + Some(site) => site, + }; + + if site == 0 { + return Some(0); + } + + let mut cursor = code.cursor(..); + let mut byte_index = 0; + let mut token_index = 0; + + loop { + let length = match cursor.length(token_index) { + None => break, + Some(length) => length, + }; + + let string = match cursor.string(token_index) { + None => break, + Some(string) => string, + }; + + if site > length { + site -= length; + byte_index += string.len(); + token_index += 1; + continue; + } + + if site == 0 { + break; + } + + for character in string.chars() { + site -= 1; + byte_index += character.len_utf8(); + + if site == 0 { + break; + } + } + + break; + } + + Some(byte_index) + } + + /// Returns `true` if this index object could be resolved to valid + /// [Site](crate::lexis::Site) and [ByteIndex](crate::lexis::ByteIndex) values for specified + /// `code` instance. + fn is_valid_site(&self, code: &impl SourceCode) -> bool; +} diff --git a/work/crates/main/src/lexis/span.rs b/work/crates/main/src/lexis/span.rs new file mode 100644 index 0000000..b9e6908 --- /dev/null +++ b/work/crates/main/src/lexis/span.rs @@ -0,0 +1,356 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + lexis::{Position, Site, SiteRef, SourceCode, ToPosition, ToSite}, + std::*, +}; + +/// A range of [Sites](crate::lexis::Site). +/// +/// The [ToSpan](crate::lexis::ToSpan) trait is auto-implemented for this object, and as such it +/// can be used to specify Spans. +/// +/// Also, SiteSpan is a base Span representation. Any valid ToSpan object casts to SiteSpan. +/// +/// SiteSpan bounds are zero-based index ranges. Bound value `0` means the first character, bound +/// value `1` is the second character, and so on up to the source code text +/// [length](crate::lexis::SourceCode::length). +/// +/// SiteSpan may be an empty span(start bound equals to end bound). In this case the Span +/// represents a single [Site](crate::lexis::Site) inside the source code text. For example, if +/// an API user [writes](crate::Document::write) a text into the Document specifying empty span, +/// the Write operation becomes Insertion operations to specified Site. +/// +/// SiteSpan is a valid span for any [SourceCode](crate::lexis::SourceCode) as long as its start +/// bound is lesser or equal to the end bound. If any of the Range bounds greater than the source +/// code text length, this bound will be clamped to the text character length value. This behavior +/// stems from the [ToSite](crate::lexis::ToSite) trait specification. +/// +/// You can think about SiteSpan as a range of the text selected inside the code editor. +/// +/// ```rust +/// use lady_deirdre::{Document, syntax::SimpleNode, lexis::CodeContent}; +/// +/// let doc = Document::::from("foo bar baz"); +/// +/// /// A substring of all characters starting from character number 4 inclusive till character +/// /// 7 exclusive. +/// assert_eq!(doc.substring(4..7), "bar"); +/// ``` +pub type SiteSpan = Range; + +/// A range of [SiteRefs](crate::lexis::SiteRef). +/// +/// The [ToSpan](crate::lexis::ToSpan) trait is auto-implemented for this object, and as such it +/// can be used to specify Spans. +/// +/// ```rust +/// use lady_deirdre::{ +/// Document, +/// syntax::SimpleNode, +/// lexis::{CodeContent, SourceCode, TokenCursor}, +/// }; +/// +/// let doc = Document::::from("foo bar baz"); +/// +/// let start = doc.cursor(..).site_ref(2); +/// let end = doc.cursor(..).site_ref(3); +/// +/// assert_eq!(doc.substring(start..end), "bar"); +/// ``` +pub type SiteRefSpan = Range; + +/// A range of [Positions](crate::lexis::Position). +/// +/// The [ToSpan](crate::lexis::ToSpan) trait is auto-implemented for this object, and as such it +/// can be used to specify Spans. +/// +/// ```rust +/// use lady_deirdre::{Document, syntax::SimpleNode, lexis::{CodeContent, Position}}; +/// +/// let doc = Document::::from("foo bar baz"); +/// +/// assert_eq!(doc.substring(Position::new(1, 5)..Position::new(1, 8)), "bar"); +/// ``` +pub type PositionSpan = Range; + +/// An interface of the source code character index range("Span"). +/// +/// The underlying object may be a valid or invalid Span representation for particular +/// [SourceCode](crate::lexis::SourceCode) instance. If the object considered to be not valid, +/// [is_valid_span](ToSpan::is_valid_span) function returns `false`, and +/// [to_span](ToSpan::to_span) function returns [None]. Otherwise "is_valid_span" returns `true`, +/// and "to_span" returns meaningful [SiteSpan](crate::lexis::SiteSpan) value. +/// +/// It is up to implementation to decide whether particular instance considered to be valid or not. +/// +/// This trait is implemented for the [RangeFull](::std::ops::RangeFull) object(a `..` shortcut) +/// that is always valid and always resolves to the SiteSpan covering the entire source code text. +/// +/// For any type that implements a [ToSite](crate::lexis::ToSite) trait, the ToSpan trait is +/// auto-implemented for all variants of Rust's standard ranges(Range, RangeFrom, etc) over +/// this type. As such if an API user implements ToSite trait, the user receives ToSpan range +/// implementations over this type out of the box. Such ToSpan auto-implementations considered to be +/// valid Spans as long as the original range bounds are +/// [`site-valid`](crate::lexis::ToSite::is_valid_site) values, and if the start Site bound +/// does not exceed the end Site bound. +/// +/// ```rust +/// use lady_deirdre::{ +/// Document, +/// syntax::SimpleNode, +/// lexis::{CodeContent, ToSpan, ToSite, SourceCode, SiteSpan, Site}, +/// }; +/// +/// let doc = Document::::from("foo bar baz"); +/// +/// /// A substring of all characters starting from character number 4 inclusive till character +/// /// 7 exclusive. +/// assert_eq!(doc.substring(4..7), "bar"); +/// +/// /// A custom Span implementation that resolves to the first half of the source code text. +/// struct Half; +/// +/// // This is safe because "to_span" always returns SiteSpan within the source code text +/// // character bounds. +/// unsafe impl ToSpan for Half { +/// fn to_span(&self, code: &impl SourceCode) -> Option { +/// Some(0..code.length() / 2) +/// } +/// +/// fn is_valid_span(&self, _code: &impl SourceCode) -> bool { true } +/// } +/// +/// assert_eq!(doc.substring(Half), "foo b"); +/// +/// // A custom one-based Site index. +/// struct OneBaseSite(usize); +/// +/// // This is safe because "to_site" implementation checks underlying value . +/// unsafe impl ToSite for OneBaseSite { +/// fn to_site(&self, code: &impl SourceCode) -> Option { +/// if self.0 == 0 || self.0 > code.length() { return None } +/// +/// Some(self.0 - 1) +/// } +/// +/// fn is_valid_site(&self, code: &impl SourceCode) -> bool { +/// self.0 > 0 && self.0 <= code.length() +/// } +/// } +/// +/// // Since ToSite implemented for the OneBaseSite object, all types of ranges over this +/// // object are ToSpan as well. +/// assert_eq!(doc.substring(OneBaseSite(5)..OneBaseSite(8)), "bar"); +/// assert_eq!(doc.substring(OneBaseSite(5)..=OneBaseSite(7)), "bar"); +/// ``` +/// +/// **Safety:** +/// - If the [to_span](ToSite::to_span) function returns [Some] range, the range start bound value +/// does not exceed range end bound value, and the range's end bound value does not exceed +/// [`SourceCode::length`](crate::lexis::SourceCode::length) value. +pub unsafe trait ToSpan { + /// Returns valid [SiteSpan](crate::lexis::SiteSpan) representation of this Span object + /// if the Span object is valid Span for specified `code` parameter. Otherwise returns [None]. + /// + /// The validity of the Span object is implementation specific. + /// + /// The returning SiteSpan [Range](::std::ops::Range) start bound value does not exceed end + /// bound value, and the range's end bound does not exceed `SourceCode::length(code)` value. + fn to_span(&self, code: &impl SourceCode) -> Option; + + /// Returns `true` if and only if the [to_span](ToSpan::to_span) function would return + /// [Some] value for specified `code` parameter. + fn is_valid_span(&self, code: &impl SourceCode) -> bool; + + /// A helper function to format specified Span. + /// + /// This function tries to resolve spanned object and to format its bounds in form of + /// [PositionSpan](crate::lexis::PositionSpan). If resolution is not possible the function + /// returns `"?"` string. + /// + /// ```rust + /// use lady_deirdre::{Document, syntax::SimpleNode, lexis::{ToSpan, CodeContent}}; + /// + /// let doc = Document::::from("foo\nbar baz"); + /// + /// assert_eq!(doc.substring(2..7), "o\nbar"); + /// assert_eq!((2..7).format(&doc), "[1:3] - [2:3]"); + /// ``` + #[inline] + fn format(&self, code: &impl SourceCode) -> Cow<'static, str> { + let Range { start, end } = match self.to_span(code) { + None => return Cow::from("?"), + Some(span) => span, + }; + + let mut result = unsafe { start.to_position(code).unwrap_unchecked() }.to_string(); + + if start + 1 < end { + result.push_str(" - "); + result.push_str(&unsafe { (end - 1).to_position(code).unwrap_unchecked() }.to_string()); + } + + Cow::from(result) + } +} + +unsafe impl ToSpan for RangeFull { + #[inline(always)] + fn to_span(&self, code: &impl SourceCode) -> Option { + Some(0..code.length()) + } + + #[inline(always)] + fn is_valid_span(&self, _code: &impl SourceCode) -> bool { + true + } +} + +unsafe impl ToSpan for Range { + #[inline] + fn to_span(&self, code: &impl SourceCode) -> Option { + let start = self.start.to_site(code); + let end = self.end.to_site(code); + + match (start, end) { + (Some(start), Some(end)) if start <= end => Some(start..end), + _ => None, + } + } + + #[inline] + fn is_valid_span(&self, code: &impl SourceCode) -> bool { + let start = self.start.to_site(code); + let end = self.end.to_site(code); + + match (start, end) { + (Some(start), Some(end)) if start <= end => true, + _ => false, + } + } +} + +unsafe impl ToSpan for RangeInclusive { + #[inline] + fn to_span(&self, code: &impl SourceCode) -> Option { + let start = self.start().to_site(code); + let end = self.end().to_site(code); + + match (start, end) { + (Some(start), Some(mut end)) if start <= end => { + if end < code.length() && end < usize::MAX { + end += 1; + } + + Some(start..end) + } + _ => None, + } + } + + #[inline] + fn is_valid_span(&self, code: &impl SourceCode) -> bool { + let start = self.start().to_site(code); + let end = self.end().to_site(code); + + match (start, end) { + (Some(start), Some(end)) if start <= end => true, + _ => false, + } + } +} + +unsafe impl ToSpan for RangeFrom { + #[inline] + fn to_span(&self, code: &impl SourceCode) -> Option { + let start = match self.start.to_site(code) { + None => return None, + Some(site) => site, + }; + + let end = code.length(); + + Some(start..end) + } + + #[inline(always)] + fn is_valid_span(&self, code: &impl SourceCode) -> bool { + self.start.is_valid_site(code) + } +} + +unsafe impl ToSpan for RangeTo { + #[inline] + fn to_span(&self, code: &impl SourceCode) -> Option { + let end = match self.end.to_site(code) { + None => return None, + Some(site) => site, + }; + + Some(0..end) + } + + #[inline(always)] + fn is_valid_span(&self, code: &impl SourceCode) -> bool { + self.end.is_valid_site(code) + } +} + +unsafe impl ToSpan for RangeToInclusive { + #[inline] + fn to_span(&self, code: &impl SourceCode) -> Option { + let end = match self.end.to_site(code) { + None => return None, + Some(site) => { + if site < code.length() && site < usize::MAX { + site + 1 + } else { + site + } + } + }; + + Some(0..end) + } + + #[inline(always)] + fn is_valid_span(&self, code: &impl SourceCode) -> bool { + self.end.is_valid_site(code) + } +} diff --git a/work/crates/main/src/lexis/token.rs b/work/crates/main/src/lexis/token.rs new file mode 100644 index 0000000..42bc482 --- /dev/null +++ b/work/crates/main/src/lexis/token.rs @@ -0,0 +1,510 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +extern crate lady_deirdre_derive; + +pub use lady_deirdre_derive::Token; + +use crate::{ + arena::{Id, Identifiable, Ref}, + lexis::{ChunkRef, Length, LexisSession, Site, SiteRef, SourceCode, TokenBuffer}, + std::*, +}; + +/// A number of Tokens. +pub type TokenCount = usize; + +/// A trait that specifies Token's kind and provides a lexical grammar parser. +/// +/// An API user implements this trait to specify Programming Language lexical grammar and the +/// lexis unit type(a "Token"). +/// +/// This trait is supposed to be implemented on the Rust enum type with variants representing +/// token kinds, but this is not a strict requirement. From the functional sense the main purpose +/// of the Token implementation is to provide a lexical parser that will re-parse sequences of +/// Unicode character by interacting with arbitrary [LexisSession](crate::lexis::LexisSession) +/// interface that, in turn, manages parsing process. +/// +/// An API user is encouraged to implement this trait using helper +/// [Token](::lady_deirdre_derive::Token) macro-derive on enum types by specifying lexical +/// grammar directly on enum variants through the macros attributes. +/// +/// ```rust +/// use lady_deirdre::lexis::{Token, TokenBuffer, CodeContent, ChunkRef}; +/// +/// #[derive(Token, PartialEq, Debug)] +/// enum MyToken { +/// // Exact string "FOO". +/// #[rule("FOO")] +/// Foo, +/// +/// // Exact string "bar". +/// #[rule("bar")] +/// Bar, +/// +/// // An unlimited non empty sequence of '_' characters. +/// #[rule('_'+)] +/// LowDashes, +/// +/// // Character sequences that don't fit this grammar. +/// #[mismatch] +/// Mismatch, +/// } +/// +/// let mut buf = TokenBuffer::::from("FOO___bar_mismatch__FOO"); +/// +/// assert_eq!( +/// buf.chunks(..).map(|chunk: ChunkRef<'_, MyToken>| chunk.token).collect::>(), +/// vec![ +/// &MyToken::Foo, +/// &MyToken::LowDashes, +/// &MyToken::Bar, +/// &MyToken::LowDashes, +/// &MyToken::Mismatch, +/// &MyToken::LowDashes, +/// &MyToken::Foo, +/// ], +/// ); +/// ``` +/// +/// The Token enum object may keep additional semantic metadata inside variants' fields, but +/// optimization-wise you will gain the best performance if the Token would require as little +/// allocated memory as possible(ideally one byte). +/// +/// An API user can implement the Token trait manually too. For example, using 3rd party lexical +/// scanner libraries. See [`Token::new`](crate::lexis::Token::new) function specification for +/// details. +pub trait Token: Sized + 'static { + /// Parses a single token from the source code text, and returns a Token instance that + /// represents this token kind. + /// + /// This is a low-level API function. + /// + /// An API user encouraged to use [Token](::lady_deirdre_derive::Token) macro-derive to + /// implement this trait automatically based on a set of Regular Expressions, + /// but you can implement it manually too. + /// + /// You need to call this function manually only if you want to implement an extension API to + /// this crate. In this case you should also prepare a custom implementation of the LexisSession + /// trait. See [LexisSession](crate::lexis::LexisSession) specification for details. + /// + /// The function implements a + /// [Finite-State Machine](https://en.wikipedia.org/wiki/Finite-state_machine) that reads + /// as many [characters](char) from input sequence of [String](::std::ops::String) as needed to + /// decide about the read substring token kind. Each time the function reads the next character, + /// it advances `session` internal cursor. + /// + /// As the function implements a FSM it should not look of more than a single character ahead + /// to make a decision on each Algorithm step. Failure to do so could lead to logical + /// errors during incremental re-parsing. + /// + /// **Algorithm Specification:** + /// - The Algorithm invokes [`session.character()`](crate::lexis::LexisSession::character) + /// function to fetch the character that the Session's internal cursor currently looking at. + /// This function does not advance internal cursor. In the beginning the cursor points to + /// the first character of input String. + /// + /// If this function returns a Null-character(`'\0'`) that means that the Session cursor has + /// reached the end of input. This character is not a part of the input sequence, an + /// Algorithm should ignore this character, but it should make a final decision and return + /// a Token instance. Note that if the original input sequence(a source code text) contains + /// Null-character, the `session.character()` yields a + /// [replacement character](::std::char::REPLACEMENT_CHARACTER) instead. + /// - The Algorithm invokes [`session.advance()`](crate::lexis::LexisSession::advance) + /// function to advance the Session's internal cursor to the next character in the input + /// character sequence. + /// - If the Algorithm decides that the input substring prior to the current character + /// contains complete token, it should invoke a + /// [`session.submit()`](crate::lexis::LexisSession::submit) function. In this case the + /// Algorithm could either return a Token instance, or to continue scanning process. The + /// LexisSession ignores all calls of Submit function happening before the last one. + /// + /// Note that "submitted" character is not going to be a part of the parsed token substring. + /// By calling a Submit function the Algorithm submits the character sequence prior + /// to the current character. + /// - It is assumed that the Token type defines special kind of Token that would specify + /// a lexically-unrecognizable input sequences. If the Algorithm cannot recognize a + /// lexically valid token it should never call a `session.submit()` function, and in the end + /// it should return an instance of such "mismatched" Token. + /// - The Algorithm can optionally obtain a slice of the input string from the beginning of + /// the scanning session till submitted character exclusively by calling a + /// [`session.substring()`](crate::lexis::LexisSession::substring) function.The Algorithm + /// can use this substring to analise and store additional metadata inside the returning + /// Token instance. + /// + /// ```rust + /// use lady_deirdre::lexis::{Token, TokenBuffer, CodeContent, LexisSession, ChunkRef}; + /// + /// // Represents integer numbers or lower case alphabetic words. + /// #[derive(PartialEq, Debug)] + /// enum NumOrWord { + /// Num(usize), + /// Word(String), + /// Mismatch, + /// } + /// + /// impl Token for NumOrWord { + /// fn new(session: &mut impl LexisSession) -> Self { + /// if session.character() >= 'a' && session.character() <= 'z' { + /// loop { + /// session.advance(); + /// if session.character() < 'a' || session.character() > 'z' { break; } + /// } + /// + /// session.submit(); + /// + /// return Self::Word(session.substring().to_string()); + /// } + /// + /// if session.character() == '0' { + /// session.advance(); + /// session.submit(); + /// return Self::Num(0); + /// } + /// + /// if session.character() >= '1' && session.character() <= '9' { + /// loop { + /// session.advance(); + /// if session.character() < '0' || session.character() > '9' { break; } + /// } + /// + /// session.submit(); + /// + /// return Self::Num(session.substring().parse::().unwrap()); + /// } + /// + /// Self::Mismatch + /// } + /// } + /// + /// let buf = TokenBuffer::::from("foo123_bar"); + /// + /// assert_eq!( + /// buf + /// .chunks(..) + /// .map(|chunk_ref: ChunkRef| chunk_ref.token) + /// .collect::>(), + /// vec![ + /// &NumOrWord::Word(String::from("foo")), + /// &NumOrWord::Num(123), + /// &NumOrWord::Mismatch, + /// &NumOrWord::Word(String::from("bar")), + /// ], + /// ); + /// ``` + fn new(session: &mut impl LexisSession) -> Self; + + /// A helper function to lexically scan provided `string`. + /// + /// This function is a shortcut to the `TokenBuffer::from(string)` call. + #[inline(always)] + fn parse(string: impl Borrow) -> TokenBuffer { + let mut buffer = TokenBuffer::default(); + + buffer.append(string.borrow()); + + buffer + } +} + +/// A weak reference of the [Token] and its [Chunk](crate::lexis::Chunk) metadata inside the source +/// code. +/// +/// This objects represents a long-lived lifetime independent and type independent cheap to +/// [Copy](::std::marker::Copy) safe weak reference into the source code lexical structure. +/// +/// TokenRef is capable to survive source code incremental changes happening aside of the referred +/// Token. +/// +/// ```rust +/// use lady_deirdre::{ +/// Document, +/// lexis::{TokenRef, SimpleToken, SourceCode, TokenCursor, CodeContent}, +/// syntax::NoSyntax, +/// }; +/// +/// let mut doc = Document::>::from("foo bar baz"); +/// +/// // Reference to the "bar" token. +/// let bar_token: TokenRef = doc.cursor(..).token_ref(2); +/// +/// assert!(bar_token.is_valid_ref(&doc)); +/// assert_eq!(bar_token.deref(&doc).unwrap(), &SimpleToken::Identifier); +/// assert_eq!(bar_token.string(&doc).unwrap(), "bar"); +/// +/// // Prepend the source code text. +/// doc.write(0..0, "123"); +/// assert_eq!(doc.substring(..), "123foo bar baz"); +/// +/// // "bar" token is still dereferancible since the changes has happened aside of this token. +/// assert_eq!(bar_token.string(&doc).unwrap(), "bar"); +/// +/// // Writing inside of the "bar" token will obsolete prior TokenRef. +/// doc.write(7..8, "B"); +/// assert_eq!(doc.substring(..), "123foo Bar baz"); +/// +/// assert!(!bar_token.is_valid_ref(&doc)); +/// ``` +/// +/// An API user normally does not need to inspect TokenRef inner fields manually or to construct +/// a TokenRef manually unless you are working on the Crate API Extension. +/// +/// For details on the Weak references framework design see [Arena](crate::arena) module +/// documentation. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct TokenRef { + /// An [identifier](crate::arena::Id) of the [SourceCode](crate::lexis::SourceCode) instance + /// this weakly referred Token belongs to. + pub id: Id, + + /// An internal weak reference of the token's Chunk into + /// the [SourceCode](crate::lexis::SourceCode) instance. + /// + /// This low-level [Ref](crate::arena::Ref) object used by the TokenRef under the hood to + /// fetch particular values from the SourceCode dereferencing functions(e.g. + /// [`SourceCode::get_token`](crate::lexis::SourceCode::get_token)). + pub chunk_ref: Ref, +} + +impl Debug for TokenRef { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + match self.is_nil() { + false => formatter.write_fmt(format_args!("TokenRef({:?})", self.id())), + true => formatter.write_str("TokenRef(Nil)"), + } + } +} + +impl Identifiable for TokenRef { + #[inline(always)] + fn id(&self) -> &Id { + &self.id + } +} + +impl TokenRef { + /// Returns an invalid instance of the TokenRef. + /// + /// This instance never resolves to valid [Token] or [token metadata](crate::lexis::Chunk). + #[inline(always)] + pub const fn nil() -> Self { + Self { + id: *Id::nil(), + chunk_ref: Ref::Nil, + } + } + + /// Returns `true` if this instance will never resolve to valid [Token] or + /// [token metadata](crate::lexis::Chunk). + /// + /// It is guaranteed that `TokenRef::nil().is_nil()` is always `true`, but in general if + /// this function returns `false` it is not guaranteed that provided instance is a valid + /// reference. + /// + /// To determine reference validity per specified [SourceCode](crate::lexis::SourceCode) + /// instance use [is_valid_ref](crate::lexis::TokenRef::is_valid_ref) function instead. + #[inline(always)] + pub const fn is_nil(&self) -> bool { + self.id.is_nil() || self.chunk_ref.is_nil() + } + + /// Immutably dereferences weakly referred [Token](crate::lexis::Token) of specified + /// [SourceCode](crate::lexis::SourceCode). + /// + /// Returns [None] if this TokenRef is not valid reference for specified `code` instance. + /// + /// Use [is_valid_ref](crate::lexis::TokenRef::is_valid_ref) to check TokenRef validity. + /// + /// This function uses [`SourceCode::get_token`](crate::lexis::SourceCode::get_token) function + /// under the hood. + #[inline(always)] + pub fn deref<'code, T: Token>( + &self, + code: &'code impl SourceCode, + ) -> Option<&'code T> { + if &self.id != code.id() { + return None; + } + + code.get_token(&self.chunk_ref) + } + + /// Mutably dereferences weakly referred [Token](crate::lexis::Token) of specified + /// [SourceCode](crate::lexis::SourceCode). + /// + /// Returns [None] if this TokenRef is not valid reference for specified `code` instance. + /// + /// Use [is_valid_ref](crate::lexis::TokenRef::is_valid_ref) to check TokenRef validity. + /// + /// Even though this function provides a way to mutate a Token instance, it is not recommended + /// to mutate the token in a way that would change its lexical group. In other words, for enum + /// Token implementations it is fine to change variant's inner fields, but is not recommended + /// to replace one variant with another variant. + /// + /// Such mutations do not lead to undefined behavior, but they could corrupt syntax parser + /// correctness. + /// + /// This function uses [`SourceCode::get_token_mut`](crate::lexis::SourceCode::get_token) + /// function under the hood. + #[inline(always)] + pub fn deref_mut<'code, T: Token>( + &self, + code: &'code mut impl SourceCode, + ) -> Option<&'code mut T> { + if &self.id != code.id() { + return None; + } + + code.get_token_mut(&self.chunk_ref) + } + + /// Returns a [ChunkRef](crate::lexis::ChunkRef) overall token metadata object of the weakly + /// referred token of specified [SourceCode](crate::lexis::SourceCode). + /// + /// Returns [None] if this TokenRef is not valid reference for specified `code` instance. + /// + /// Use [is_valid_ref](crate::lexis::TokenRef::is_valid_ref) to check TokenRef validity. + /// + /// If an API user needs just a small subset of fields from returning object it is recommended + /// to use more specialized functions of the TokenRef instead. + #[inline] + pub fn chunk<'code, T: Token>( + &self, + code: &'code impl SourceCode, + ) -> Option> { + if &self.id != code.id() { + return None; + } + + let token = code.get_token(&self.chunk_ref)?; + let site = code.get_site(&self.chunk_ref)?; + let length = code.get_length(&self.chunk_ref)?; + let string = code.get_string(&self.chunk_ref)?; + + Some(ChunkRef { + token, + site, + length, + string, + }) + } + + /// Returns an absolute Unicode character index of the first character of the weakly referred + /// token's string into specified [source code text](crate::lexis::SourceCode). + /// + /// Returns [None] if this TokenRef is not valid reference for specified `code` instance. + /// + /// Use [is_valid_ref](crate::lexis::TokenRef::is_valid_ref) to check TokenRef validity. + /// + /// This function uses [`SourceCode::get_site`](crate::lexis::SourceCode::get_site) + /// function under the hood. + #[inline(always)] + pub fn site(&self, code: &impl SourceCode) -> Option { + if &self.id != code.id() { + return None; + } + + code.get_site(&self.chunk_ref) + } + + /// Returns a token string of the weakly referred token from specified + /// [source code text](crate::lexis::SourceCode). + /// + /// Returns [None] if this TokenRef is not valid reference for specified `code` instance. + /// + /// Use [is_valid_ref](crate::lexis::TokenRef::is_valid_ref) to check TokenRef validity. + /// + /// This function uses [`SourceCode::get_string`](crate::lexis::SourceCode::get_string) + /// function under the hood. + #[inline(always)] + pub fn string<'code, T: Token>( + &self, + code: &'code impl SourceCode, + ) -> Option<&'code str> { + if &self.id != code.id() { + return None; + } + + code.get_string(&self.chunk_ref) + } + + /// Returns a number of Unicode characters of the string of the weakly referred token from + /// specified [source code text](crate::lexis::SourceCode). + /// + /// Returns [None] if this TokenRef is not valid reference for specified `code` instance. + /// + /// Use [is_valid_ref](crate::lexis::TokenRef::is_valid_ref) to check TokenRef validity. + /// + /// This function uses [`SourceCode::get_length`](crate::lexis::SourceCode::get_length) + /// function under the hood. + #[inline(always)] + pub fn length(&self, code: &impl SourceCode) -> Option { + if &self.id != code.id() { + return None; + } + + code.get_length(&self.chunk_ref) + } + + /// Returns `true` if and only if referred weak Token reference belongs to specified + /// [SourceCode](crate::lexis::SourceCode), and referred Token exists in this SourceCode + /// instance. + /// + /// If this function returns `true`, all dereference function would return meaningful [Some] + /// values, otherwise these functions return [None]. + /// + /// This function uses [`SourceCode::contains`](crate::lexis::SourceCode::contains) + /// function under the hood. + #[inline(always)] + pub fn is_valid_ref(&self, code: &impl SourceCode) -> bool { + &self.id == code.id() && code.contains(&self.chunk_ref) + } + + /// Turns this weak reference into the Token string first character weak reference of the + /// [site index](lexis::crate::ToSite). + /// + /// The returning [SiteRef](lexis::crate::SiteRef) weak reference is a valid reference if and + /// only if TokenRef is a valid weak reference too. + /// + /// This function never fails, it is fine to call it on invalid references(and on the + /// [Nil](crate::lexis::TokenRef::nil) references in particular). + #[inline(always)] + pub fn site_ref(self) -> SiteRef { + SiteRef::new_chunk_start(self) + } +} diff --git a/work/crates/main/src/lexis/utils.rs b/work/crates/main/src/lexis/utils.rs new file mode 100644 index 0000000..9e5c4ea --- /dev/null +++ b/work/crates/main/src/lexis/utils.rs @@ -0,0 +1,100 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{lexis::Site, std::*}; + +pub(crate) const NULL: char = '\0'; + +// Safety: `characters` is not empty. +#[inline(always)] +pub(crate) unsafe fn get_lexis_character(mut characters: Chars<'_>) -> char { + match characters.next() { + Some(character) => { + if character == NULL { + return char::REPLACEMENT_CHARACTER; + } + + character + } + + None => { + #[cfg(debug_assertions)] + { + unreachable!("Internal error. Empty characters iterator."); + } + + #[allow(unreachable_code)] + unsafe { + unreachable_unchecked() + } + } + } +} + +#[inline] +pub(crate) fn split_left(string: &str, mut site: Site) -> &str { + if site == 0 { + return ""; + } + + for (index, _) in string.char_indices() { + if site == 0 { + return unsafe { string.get_unchecked(0..index) }; + } + + site -= 1; + } + + string +} + +#[inline] +pub(crate) fn split_right(string: &str, mut site: Site) -> &str { + if site == 0 { + return string; + } + + for (index, _) in string.char_indices() { + if site == 0 { + return unsafe { string.get_unchecked(index..string.len()) }; + } + + site -= 1; + } + + "" +} diff --git a/work/crates/main/src/lib.rs b/work/crates/main/src/lib.rs new file mode 100644 index 0000000..661b40b --- /dev/null +++ b/work/crates/main/src/lib.rs @@ -0,0 +1,56 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +#![doc = include_str!("../readme.md")] +//TODO check warnings regularly +#![allow(warnings)] +#![allow(unused_unsafe)] +#![deny(missing_docs)] +#![no_implicit_prelude] +#![cfg_attr(not(feature = "std"), no_std)] + +pub mod arena; +mod std; +//todo remove +// mod incremental; +mod incremental; +pub mod lexis; +pub mod syntax; + +pub use crate::incremental::Document; + +extern crate self as lady_deirdre; diff --git a/work/crates/main/src/std.rs b/work/crates/main/src/std.rs new file mode 100644 index 0000000..ce93124 --- /dev/null +++ b/work/crates/main/src/std.rs @@ -0,0 +1,198 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +//TODO cleanup unused reexports. + +#[cfg(not(feature = "std"))] +extern crate alloc; +#[cfg(not(feature = "std"))] +extern crate core; +#[cfg(not(feature = "std"))] +pub use alloc::{ + borrow::{Borrow, Cow}, + boxed::Box, + collections::{ + btree_map::Iter as BTreeMapIter, + btree_map::Range as BTreeMapRange, + btree_map::RangeMut as BTreeMapRangeMut, + BTreeMap, + LinkedList, + VecDeque, + }, + format, + rc::{Rc, Weak as SyncWeak}, + string::{String, ToString}, + sync::Arc, + vec::{IntoIter, Vec}, +}; +#[cfg(not(feature = "std"))] +pub use core::{ + any::{Any, TypeId}, + assert_eq, + borrow::{Borrow, BorrowMut, Cow, ToOwned}, + cell::UnsafeCell, + clone::Clone, + cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd}, + convert::{AsRef, From, Into}, + debug_assert, + debug_assert_eq, + debug_assert_ne, + default::Default, + fmt::{Debug, Display, Formatter, Result as FmtResult}, + hint::unreachable_unchecked, + iter::{ + DoubleEndedIterator, + Enumerate, + ExactSizeIterator, + FilterMap, + FromIterator, + FusedIterator, + IntoIterator, + Iterator, + Map, + }, + marker::{PhantomData, Sized, Sync}, + matches, + mem::{forget, replace, take, transmute, ManuallyDrop, MaybeUninit}, + num::NonZeroUsize, + ops::{ + AddAssign, + Deref, + Drop, + Fn, + FnMut, + Index, + IndexMut, + Range, + RangeFrom, + RangeFull, + RangeInclusive, + RangeTo, + RangeToInclusive, + }, + option::Option, + option::Option::*, + panic, + ptr::{copy, copy_nonoverlapping, swap, NonNull}, + result::Result, + result::Result::{Err, Ok}, + slice::Iter, + str::Chars, + sync::atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering}, + todo, + unimplemented, + unreachable, +}; +#[cfg(feature = "std")] +extern crate std; +#[cfg(feature = "std")] +pub use std::{ + any::{Any, TypeId}, + assert_eq, + borrow::{Borrow, BorrowMut, Cow, ToOwned}, + boxed::Box, + cell::UnsafeCell, + clone::Clone, + cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd}, + collections::{ + btree_map::Iter as BTreeMapIter, + btree_map::Range as BTreeMapRange, + btree_map::RangeMut as BTreeMapRangeMut, + BTreeMap, + LinkedList, + VecDeque, + }, + convert::{AsRef, From, Into}, + debug_assert, + debug_assert_eq, + debug_assert_ne, + default::Default, + fmt::{Debug, Display, Error as FmtError, Formatter, Result as FmtResult}, + format, + hint::unreachable_unchecked, + iter::{ + DoubleEndedIterator, + Enumerate, + ExactSizeIterator, + FilterMap, + FromIterator, + FusedIterator, + IntoIterator, + Iterator, + Map, + }, + marker::{Copy, PhantomData, Sized, Sync}, + matches, + mem::{forget, replace, take, transmute, ManuallyDrop, MaybeUninit}, + num::NonZeroUsize, + ops::{ + AddAssign, + Deref, + Drop, + Fn, + FnMut, + Index, + IndexMut, + Range, + RangeFrom, + RangeFull, + RangeInclusive, + RangeTo, + RangeToInclusive, + }, + option::Option, + option::Option::*, + panic, + println, + ptr::{copy, copy_nonoverlapping, swap, NonNull}, + rc::{Rc, Weak as SyncWeak}, + result::Result, + result::Result::{Err, Ok}, + slice::Iter, + str::Chars, + string::{String, ToString}, + sync::{ + atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering}, + Arc, + Weak as AsyncWeak, + }, + todo, + unimplemented, + unreachable, + vec::IntoIter, + vec::Vec, +}; diff --git a/work/crates/main/src/syntax/buffer.rs b/work/crates/main/src/syntax/buffer.rs new file mode 100644 index 0000000..2429da5 --- /dev/null +++ b/work/crates/main/src/syntax/buffer.rs @@ -0,0 +1,222 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, Ref, Repository, RepositoryIterator}, + lexis::TokenCursor, + std::*, + syntax::{ + session::SequentialSyntaxSession, + Cluster, + Node, + NodeRef, + SyntaxSession, + SyntaxTree, + ROOT_RULE, + }, +}; + +/// A non-incrementally managed syntax structure of a compilation unit. +/// +/// SyntaxBuffer is a simple implementation of the [SyntaxTree](crate::syntax::SyntaxTree) interface +/// that runs a syntax grammar parser over the sequence of tokens just once to produce and to store +/// a syntax structure of a compilation unit. In contrast to [Document](crate::Document), +/// SyntaxBuffer does not provide source code mutation operations(incremental re-parsing +/// operations). However the syntax structure stored by this object is still a mutable structure by +/// itself, an API user can mutate its nodes manually using [Cluster](crate::syntax::Cluster) and +/// similar mutation operations. +/// +/// The syntax grammar of the programming language and the syntax structure type specified by the +/// SyntaxBuffer's generic parameter of [Node](crate::syntax::Node) type. +/// +/// To crate a SyntaxBuffer use [`Node::parse`](crate::syntax::Node::parse) function. +/// +/// ```rust +/// use lady_deirdre::{ +/// lexis::{TokenBuffer, SimpleToken, SourceCode, Token}, +/// syntax::{SyntaxBuffer, SimpleNode, SyntaxTree, NodeRef, Node}, +/// }; +/// +/// let token_buffer = SimpleToken::parse("foo({bar}[baz])"); +/// let syntax_buffer = SimpleNode::parse(token_buffer.cursor(..)); +/// +/// fn format(tree: &impl SyntaxTree, node: &NodeRef) -> String { +/// let node = node.deref(tree).unwrap(); +/// +/// let inner = node +/// .inner() +/// .iter() +/// .map(|inner_node_ref: &NodeRef| format(tree, inner_node_ref)) +/// .collect::>() +/// .join(""); +/// +/// match node { +/// SimpleNode::Root { .. } => inner, +/// SimpleNode::Parenthesis { .. } => format!("({})", inner), +/// SimpleNode::Brackets { .. } => format!("[{}]", inner), +/// SimpleNode::Braces { .. } => format!("{{{}}}", inner), +/// } +/// } +/// +/// assert_eq!("({}[])", format(&syntax_buffer, syntax_buffer.root())); +/// ``` +pub struct SyntaxBuffer { + id: Id, + root: NodeRef, + cluster: Cluster, +} + +impl PartialEq for SyntaxBuffer { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + self.id.eq(&other.id) + } +} + +impl Eq for SyntaxBuffer {} + +impl Debug for SyntaxBuffer { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + formatter + .debug_struct("SyntaxBuffer") + .field("id", &self.id()) + .finish_non_exhaustive() + } +} + +impl Identifiable for SyntaxBuffer { + #[inline(always)] + fn id(&self) -> &Id { + &self.id + } +} + +impl SyntaxTree for SyntaxBuffer { + type Node = N; + + type ErrorIterator<'tree> = BufferErrorIterator<'tree, Self::Node>; + + #[inline(always)] + fn root(&self) -> &NodeRef { + &self.root + } + + #[inline(always)] + fn errors(&self) -> Self::ErrorIterator<'_> { + BufferErrorIterator { + id: &self.id, + inner: (&self.cluster.errors).into_iter(), + } + } + + #[inline(always)] + fn contains(&self, cluster_ref: &Ref) -> bool { + match cluster_ref { + Ref::Primary => true, + _ => false, + } + } + + #[inline(always)] + fn get_cluster(&self, cluster_ref: &Ref) -> Option<&Cluster> { + match cluster_ref { + Ref::Primary => Some(&self.cluster), + + _ => None, + } + } + + #[inline(always)] + fn get_cluster_mut(&mut self, cluster_ref: &Ref) -> Option<&mut Cluster> { + match cluster_ref { + Ref::Primary => Some(&mut self.cluster), + + _ => None, + } + } +} + +impl SyntaxBuffer { + pub(super) fn new<'code>( + token_cursor: impl TokenCursor<'code, Token = ::Token>, + ) -> Self { + let id = Id::new(); + + let mut session = SequentialSyntaxSession { + id, + primary: None, + nodes: Repository::with_capacity(1), + errors: Repository::default(), + token_cursor, + _code_lifetime: Default::default(), + }; + + let root = session.descend(ROOT_RULE); + + let cluster = Cluster { + primary: unsafe { session.primary.unwrap_unchecked() }, + nodes: session.nodes, + errors: session.errors, + }; + + Self { id, root, cluster } + } +} + +pub struct BufferErrorIterator<'tree, N: Node> { + pub(super) id: &'tree Id, + pub(super) inner: RepositoryIterator<'tree, N::Error>, +} + +impl<'tree, N: Node> Identifiable for BufferErrorIterator<'tree, N> { + #[inline(always)] + fn id(&self) -> &Id { + self.id + } +} + +impl<'tree, N: Node> Iterator for BufferErrorIterator<'tree, N> { + type Item = &'tree N::Error; + + #[inline(always)] + fn next(&mut self) -> Option { + self.inner.next() + } +} + +impl<'tree, N: Node> FusedIterator for BufferErrorIterator<'tree, N> {} diff --git a/work/crates/main/src/syntax/cluster.rs b/work/crates/main/src/syntax/cluster.rs new file mode 100644 index 0000000..4959b8c --- /dev/null +++ b/work/crates/main/src/syntax/cluster.rs @@ -0,0 +1,332 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, Ref, Repository}, + std::*, + syntax::{ErrorRef, Node, NodeRef, SyntaxTree}, +}; + +/// An ownership object of a part of the syntax structure data. +/// +/// This a lower-level API that organizes syntax structure persistence. An API user usually does not +/// need to interact with Cluster directly or to inspect its fields. For a higher-level access use +/// [NodeRef](crate::syntax::NodeRef), [ErrorRef](crate::syntax::ErrorRef), or +/// [ClusterRef](crate::syntax::ClusterRef). +/// +/// Syntax structure consists of a set of instance of [Node](crate::syntax::Node) objects and the +/// syntax/semantic error objects belong to these nodes. These objects could be split into groups +/// called Clusters. It is up to the syntax structure manager's design to decide on how to spread +/// these instances between clusters, and about the number of clusters per a single compilation +/// unit. In general, Cluster serves as a unit of caching of the syntax structure of the +/// compilation unit. It is assumed that if an incremental reparser obsoletes a single Node of the +/// syntax tree, it obsoletes the entire Cluster this Node belongs to altogether. +/// +/// For example, since the [SyntaxBuffer](crate::syntax::SyntaxBuffer) does not provide any +/// incremental reparsing capabilities, it uses only a single Cluster to store all of the Nodes and +/// the syntax/semantic error objects of the syntax tree. Whereas the [Document](crate::Document) +/// object, being an incrementally managed compiler with reparsing operations, splits nodes and +/// syntax/semantic errors between many clusters more granularly. +/// +/// If you are developing an incrementally compiled system, in general you should not relay on +/// particular granularity of the system of clusters. Your system should expect that any node or an +/// error object could obsolete at any time. The syntax structure manager does not have to provide +/// particular splitting guarantees. +/// +/// Note that regardless of incremental capabilities of a compilation unit manager, the Cluster +/// object is a mutable object, as well as all of the mutable operations of related weak +/// references(such as [ClusterRef](crate::syntax::ClusterRef), [NodeRef](crate::syntax::NodeRef), +/// and the [ErrorRef](crate::syntax::ErrorRef)). +/// +/// The object consists of two [Repositories](crate::arena::Repository) that store nodes and errors +/// in arbitrary order that considered to be "secondary" objects to this Cluster, and one single +/// "primary" Node instance. +pub struct Cluster { + /// A single "selected" node of the cluster that considered to be a primary descriptive node of + /// this cluster data. + /// + /// All other nodes of the Cluster considered to be helper nodes that together with the Primary + /// one build up a part of the syntax tree. + /// + /// There are no particular rules on how to select this node, but it is assumed that the + /// [`secondary nodes`](Cluster::nodes) are logically closely related to the Primary one. + /// + /// By convention this node is referred within the [`Ref::Primary`](crate::arena::Ref::Primary) + /// low-level reference. + pub primary: N, + + /// A set of the "secondary" nodes that together with the [primary](Cluster::primary) build up + /// a part of the syntax tree. + /// + /// By convention this node is referred within the + /// [`Ref::Repository`](crate::arena::Ref::Repository) low-level reference. + pub nodes: Repository, + + /// A set of syntax and semantic errors logically related to the nodes of this cluster. + /// + /// By convention this node is referred within the + /// [`Ref::Repository`](crate::arena::Ref::Repository) low-level reference. + pub errors: Repository, +} + +impl Debug for Cluster { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + formatter + .debug_struct("Cluster") + .field("primary", &self.primary) + .field("nodes", &self.nodes) + .field("errors", &self.errors) + .finish_non_exhaustive() + } +} + +/// A weak reference of the [Cluster] inside the syntax tree. +/// +/// This objects represents a long-lived lifetime independent and type independent cheap to +/// [Copy](::std::marker::Copy) safe weak reference into the source code syntax structure. +/// +/// ```rust +/// use lady_deirdre::{Document, syntax::{SimpleNode, SyntaxTree, NodeRef, Cluster}}; +/// +/// let doc = Document::::from("[]{}()"); +/// +/// let braces_node_ref = &doc.root().deref(&doc).unwrap().inner()[1]; +/// let braces_cluster_ref = braces_node_ref.cluster(); +/// let braces_cluster = braces_cluster_ref.deref(&doc).unwrap(); +/// +/// assert_eq!(&braces_cluster.primary, braces_node_ref.deref(&doc).unwrap()); +/// ``` +/// +/// An API user normally does not need to inspect ClusterRef inner fields manually or to construct +/// a ClusterRef manually unless you are working on the Crate API Extension. +/// +/// For details on the Weak references framework design see [Arena](crate::arena) module +/// documentation. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct ClusterRef { + /// An [identifier](crate::arena::Id) of the [SyntaxTree](crate::syntax::SyntaxTree) instance + /// this weakly referred Cluster belongs to. + pub id: Id, + + /// An internal weak reference of the cluster into the [SyntaxTree](crate::syntax::SyntaxTree) + /// instance. + /// + /// This low-level [Ref](crate::arena::Ref) object used by the ClusterRef under the hood to + /// fetch particular values from the SyntaxTree dereferencing functions(e.g. + /// [`SyntaxTree::get_cluster`](crate::syntax::SyntaxTree::get_cluster)). + pub cluster_ref: Ref, +} + +impl Debug for ClusterRef { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + match self.is_nil() { + false => formatter.write_fmt(format_args!("ClusterRef({:?})", self.id())), + true => formatter.write_str("ClusterRef(Nil)"), + } + } +} + +impl Identifiable for ClusterRef { + #[inline(always)] + fn id(&self) -> &Id { + &self.id + } +} + +impl ClusterRef { + /// Returns an invalid instance of the ClusterRef. + /// + /// This instance never resolves to valid [Cluster]. + #[inline(always)] + pub const fn nil() -> Self { + Self { + id: *Id::nil(), + cluster_ref: Ref::Nil, + } + } + + /// Returns `true` if this instance will never resolve to a valid [Cluster]. + /// + /// It is guaranteed that `ClusterRef::nil().is_nil()` is always `true`, but in general + /// if this function returns `false` it is not guaranteed that provided instance is a valid + /// reference. + /// + /// To determine reference validity per specified [SyntaxTree](crate::syntax::SyntaxTree) + /// instance use [is_valid_ref](crate::syntax::ClusterRef::is_valid_ref) function instead. + #[inline(always)] + pub const fn is_nil(&self) -> bool { + self.id.is_nil() || self.cluster_ref.is_nil() + } + + /// Immutably dereferences weakly referred [Cluster] of specified + /// [SyntaxTree](crate::syntax::SyntaxTree). + /// + /// Returns [None] if this ClusterRef is not valid reference for specified `tree` instance. + /// + /// Use [is_valid_ref](crate::syntax::ClusterRef::is_valid_ref) to check ClusterRef validity. + /// + /// This function uses [`SyntaxTree::get_cluster`](crate::syntax::SyntaxTree::get_cluster) + /// function under the hood. + #[inline(always)] + pub fn deref<'tree, N: Node>( + &self, + tree: &'tree impl SyntaxTree, + ) -> Option<&'tree Cluster> { + if &self.id != tree.id() { + return None; + } + + tree.get_cluster(&self.cluster_ref) + } + + /// Mutably dereferences weakly referred [Cluster] of specified + /// [SyntaxTree](crate::syntax::SyntaxTree). + /// + /// Returns [None] if this ClusterRef is not valid reference for specified `tree` instance. + /// + /// Use [is_valid_ref](crate::syntax::ClusterRef::is_valid_ref) to check ClusterRef validity. + /// + /// This function uses + /// [`SyntaxTree::get_cluster_mut`](crate::syntax::SyntaxTree::get_cluster_mut) function under + /// the hood. + #[inline(always)] + pub fn deref_mut<'tree, N: Node>( + &self, + tree: &'tree mut impl SyntaxTree, + ) -> Option<&'tree mut Cluster> { + if &self.id != tree.id() { + return None; + } + + tree.get_cluster_mut(&self.cluster_ref) + } + + /// Adds new `node` into the weakly referred [Cluster] of specified `tree` instance. + /// + /// This function consumes `node` value, and adds it to the + /// [`Cluster::nodes`](crate::syntax::Cluster::nodes) secondary nodes repository. + /// + /// Returns valid [NodeRef](crate::syntax::NodeRef) if this ClusterRef weak reference is a valid + /// reference into specified `tree` instance. Otherwise returns invalid NodeRef. Use + /// [is_valid_ref](crate::syntax::ClusterRef::is_valid_ref) to check ClusterRef validity + /// beforehand. + /// + /// This function uses + /// [`SyntaxTree::get_cluster_mut`](crate::syntax::SyntaxTree::get_cluster_mut) function under + /// the hood. + /// + /// Note that added node(or any other secondary node of the cluster) could be later removed from + /// the cluster using [`NodeRef::unlink`](crate::syntax::NodeRef::unlink) function. + #[inline] + pub fn link_node(&self, tree: &mut impl SyntaxTree, node: N) -> NodeRef { + if &self.id != tree.id() { + return NodeRef::nil(); + } + + let cluster = match tree.get_cluster_mut(&self.cluster_ref) { + Some(cluster) => cluster, + + None => return NodeRef::nil(), + }; + + let node_ref = cluster.nodes.insert(node); + + NodeRef { + id: self.id, + cluster_ref: self.cluster_ref, + node_ref, + } + } + + /// Adds new `error` into the weakly referred [Cluster] of specified `tree` instance. + /// + /// This function consumes `error` value, and adds it to the + /// [`Cluster::error`](crate::syntax::Cluster::error) syntax/semantic errors repository. + /// + /// Returns valid [ErrorRef](crate::syntax::ErrorRef) if this ClusterRef weak reference is a + /// valid reference into specified `tree` instance. Otherwise returns invalid ErrorRef. Use + /// [is_valid_ref](crate::syntax::ClusterRef::is_valid_ref) to check ClusterRef validity + /// beforehand. + /// + /// This function uses + /// [`SyntaxTree::get_cluster_mut`](crate::syntax::SyntaxTree::get_cluster_mut) function under + /// the hood. + /// + /// Note that added error could be later removed from + /// the cluster using [`ErrorRef::unlink`](crate::syntax::ErrorRef::unlink) function. + #[inline] + pub fn link_error( + &self, + tree: &mut impl SyntaxTree, + error: N::Error, + ) -> ErrorRef { + if &self.id != tree.id() { + return ErrorRef::nil(); + } + + let cluster = match tree.get_cluster_mut(&self.cluster_ref) { + Some(cluster) => cluster, + + None => return ErrorRef::nil(), + }; + + let error_ref = cluster.errors.insert(error); + + ErrorRef { + id: self.id, + cluster_ref: self.cluster_ref, + error_ref, + } + } + + /// Returns `true` if and only if referred weak Cluster reference belongs to specified + /// [SyntaxTree](crate::syntax::SyntaxTree), and referred Cluster exists in this SyntaxTree + /// instance. + /// + /// This function uses [`SyntaxTree::contains`](crate::syntax::SyntaxTree::contains) + /// function under the hood. + #[inline(always)] + pub fn is_valid_ref(&self, tree: &impl SyntaxTree) -> bool { + if &self.id != tree.id() { + return false; + } + + tree.contains(&self.cluster_ref) + } +} diff --git a/work/crates/main/src/syntax/error.rs b/work/crates/main/src/syntax/error.rs new file mode 100644 index 0000000..cf9e64c --- /dev/null +++ b/work/crates/main/src/syntax/error.rs @@ -0,0 +1,489 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, Ref}, + lexis::SiteRefSpan, + std::*, + syntax::{ClusterRef, Node, SyntaxTree}, +}; + +/// A base syntax parse error object. +/// +/// All custom syntax/semantic errors must be [From](::std::convert::From) this object. +/// +/// SyntaxError implements [Display](::std::fmt::Display) trait to provide default syntax error +/// formatter, but an API user is encouraged to implement custom formatter to better represent +/// semantic of particular programming language. +/// +/// ```rust +/// use lady_deirdre::syntax::SyntaxError; +/// +/// enum CustomError { +/// SyntaxError(SyntaxError), +/// SemanticError(&'static str), +/// } +/// +/// impl From for CustomError { +/// fn from(err: SyntaxError) -> Self { +/// Self::SyntaxError(err) +/// } +/// } +/// ``` +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum SyntaxError { + /// A parse rule `context` did not expect continuation of the token input sequence. + /// + /// Usually this parse error indicates that the parser (semi-)successfully parsed + /// input sequence, but in the end it has matched tail tokens that do not fit any top level + /// parse rules. + /// + /// **Example:** + /// + /// ```text + /// fn main() { foo(); } + /// + /// fn foo() {} + /// + /// bar + /// ^^^ Unexpected end of input. + /// ``` + UnexpectedEndOfInput { + /// A [site](crate::lexis::Site) reference span of where the rule has failed. + /// + /// Usually this span is the tail of input site. + span: SiteRefSpan, + + /// A name of the rule that has failed. + context: &'static str, + }, + + /// A parse rule `context` expected a `token` in specified `span`. + /// + /// Usually this parse error indicates that specific parse rule expected particular token in + /// particular place, and decided to recover this error using "insert" recovery + /// strategy(by virtually skipping this unambiguous sub-rule switching to the next sub-rule). + /// + /// **Example:** + /// + /// ```text + /// fn main() { foo(10 20); } + /// ^^^ Missing token ",". + /// + /// fn foo(x: usize, y: usize) {} + /// ``` + MissingToken { + /// A [site](crate::lexis::Site) reference span of where the rule has failed. + /// + /// Usually this span is just a single Site. + span: SiteRefSpan, + + /// A name of the rule that has failed. + context: &'static str, + + /// A name of expected mismatched token. + token: &'static str, + }, + + /// A parse rule `context` expected a `token` in specified `span`. + /// + /// Usually this parse error indicates that specific parse rule expected particular named rule + /// in particular place to be descend to, and decided to recover this error using "insert" + /// recovery strategy(by virtually skipping this unambiguous sub-rule switching to the next + /// sub-rule). + /// + /// **Example:** + /// + /// ```text + /// fn main() { foo(10, ); } + /// ^^^ Missing rule "Rust expression". + /// + /// fn foo(x: usize, y: usize) {} + /// ``` + MissingRule { + /// A [site](crate::lexis::Site) reference span of where the rule has failed. + /// + /// Usually this span is just a single Site. + span: SiteRefSpan, + + /// A name of the rule that has failed. + context: &'static str, + + /// A name of expected mismatched rule. + rule: &'static str, + }, + + /// A parse rule `context` expected a set of tokens and/or a set of parse rules in specified + /// `span`. + /// + /// Usually this parse error indicates that specific parse rule failed to match specific set of + /// possible tokens and/or named rules to be descend to due to ambiguity between possible rules + /// in specified parse position. The rule decided to recover from this error using "panic" + /// recovery strategy(by virtually skipping a number of tokens ahead until expected token was + /// found, or just by skipping a number of tokens in some parse context and then skipping + /// specified sub-rule). + /// + /// **Example:** + /// + /// ```text + /// fn main() { foo(10, 20; } + /// ^ Mismatch. ")" or any other expression operator expected, + /// but ";" found. + /// + /// fn foo(x: usize, y: usize) {} + /// ``` + Mismatch { + /// A [site](crate::lexis::Site) reference span of where the rule has failed. + span: SiteRefSpan, + + /// A name of the rule that has failed. + context: &'static str, + + /// A set of tokens that the parser was expected. + /// + /// Possibly empty set. + expected_tokens: Vec<&'static str>, + + /// A set of named rules that the parser was expected to be descend to. + /// + /// Possibly empty set. + expected_rules: Vec<&'static str>, + }, +} + +impl Display for SyntaxError { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + match self { + Self::UnexpectedEndOfInput { context, .. } => { + formatter.write_str(&format!("{} unexpected end of input.", context)) + } + + Self::MissingToken { context, token, .. } => { + formatter.write_str(&format!("Missing ${} in {}.", token, context)) + } + + Self::MissingRule { context, rule, .. } => { + formatter.write_str(&format!("Missing {} in {}.", rule, context)) + } + + Self::Mismatch { + context, + expected_tokens, + expected_rules, + .. + } => { + let mut expected_tokens = expected_tokens + .iter() + .map(|token| format!("${}", token)) + .collect::>(); + expected_tokens.sort(); + + let mut expected_rules = expected_rules + .iter() + .map(|rule| rule.to_string()) + .collect::>(); + expected_rules.sort(); + + let expected_len = expected_tokens.len() + expected_rules.len(); + + let expected = expected_rules + .into_iter() + .chain(expected_tokens.into_iter()); + + formatter.write_str(context)?; + formatter.write_str(" format mismatch.")?; + + if expected_len > 0 { + formatter.write_str(" Expected ")?; + + let last = expected_len - 1; + + let is_multi = last > 1; + + for (index, expected) in expected.enumerate() { + let is_first = index == 0; + let is_last = index == last; + + match (is_first, is_last, is_multi) { + (true, _, _) => (), + (false, false, _) => formatter.write_str(", ")?, + (false, true, true) => formatter.write_str(", or ")?, + (false, true, false) => formatter.write_str(" or ")?, + } + + formatter.write_str(&expected)?; + } + + formatter.write_str(".")?; + } + + Ok(()) + } + } + } +} + +impl SyntaxError { + /// A [site](crate::lexis::Site) reference span of where the rule has failed. + #[inline(always)] + pub fn span(&self) -> &SiteRefSpan { + match self { + Self::UnexpectedEndOfInput { span, .. } => span, + Self::MissingToken { span, .. } => span, + Self::MissingRule { span, .. } => span, + Self::Mismatch { span, .. } => span, + } + } + + /// A name of the rule that has failed. + #[inline(always)] + pub fn context(&self) -> &'static str { + match self { + Self::UnexpectedEndOfInput { context, .. } => context, + Self::MissingToken { context, .. } => context, + Self::MissingRule { context, .. } => context, + Self::Mismatch { context, .. } => context, + } + } +} + +/// A weak reference of the syntax/semantic error object inside the syntax tree. +/// +/// This objects represents a long-lived lifetime independent and type independent cheap to +/// [Copy](::std::marker::Copy) safe weak reference into the syntax structure of the source code. +/// +/// ErrorRef is capable to survive source code incremental changes happening aside of a part of the +/// syntax tree this error belongs to. +/// +/// ```rust +/// use lady_deirdre::{ +/// Document, +/// syntax::{SimpleNode, SyntaxTree, SyntaxError}, +/// lexis::SiteRef, +/// }; +/// +/// let mut doc = Document::::from("foo bar"); +/// +/// let new_custom_error_ref = doc.root().cluster().link_error( +/// &mut doc, +/// SyntaxError::UnexpectedEndOfInput { +/// span: SiteRef::nil()..SiteRef::nil(), +/// context: "BAZ", +/// }, +/// ); +/// +/// assert_eq!( +/// new_custom_error_ref.deref(&doc).unwrap().to_string(), +/// "BAZ unexpected end of input.", +/// ); +/// +/// // This change touches "root" node of the syntax tree(the only node of the tree), as such +/// // referred error will not survive. +/// doc.write(0..0, "123"); +/// +/// assert!(!new_custom_error_ref.is_valid_ref(&doc)); +/// ``` +/// +/// An API user normally does not need to inspect ErrorRef inner fields manually or to construct +/// an ErrorRef manually unless you are working on the Crate API Extension. +/// +/// For details on the Weak references framework design see [Arena](crate::arena) module +/// documentation. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct ErrorRef { + /// An [identifier](crate::arena::Id) of the [SyntaxTree](crate::syntax::SyntaxTree) instance + /// this weakly referred error object belongs to. + pub id: Id, + + /// An internal weak reference of the error object's [Cluster](crate::syntax::Cluster) of the + /// [SyntaxTree](crate::syntax::SyntaxTree) instance. + pub cluster_ref: Ref, + + /// An internal weak reference of the error object in the + /// [`Cluster::errors`](crate::syntax::Cluster::errors) repository. + pub error_ref: Ref, +} + +impl Debug for ErrorRef { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + match self.is_nil() { + false => formatter.write_fmt(format_args!("ErrorRef({:?})", self.id())), + true => formatter.write_str("ErrorRef(Nil)"), + } + } +} + +impl Identifiable for ErrorRef { + #[inline(always)] + fn id(&self) -> &Id { + &self.id + } +} + +impl ErrorRef { + /// Returns an invalid instance of the ErrorRef. + /// + /// This instance never resolves to valid error object. + #[inline(always)] + pub const fn nil() -> Self { + Self { + id: *Id::nil(), + cluster_ref: Ref::Nil, + error_ref: Ref::Nil, + } + } + + /// Returns `true` if this instance will never resolve to valid error object. + /// + /// It is guaranteed that `ErrorRef::nil().is_nil()` is always `true`, but in general if + /// this function returns `false` it is not guaranteed that provided instance is a valid + /// reference. + /// + /// To determine reference validity per specified [SyntaxTree](crate::syntax::SyntaxTree) + /// instance use [is_valid_ref](crate::syntax::ErrorRef::is_valid_ref) function instead. + #[inline(always)] + pub const fn is_nil(&self) -> bool { + self.id.is_nil() || self.cluster_ref.is_nil() || self.error_ref.is_nil() + } + + /// Immutably dereferences weakly referred error object of specified + /// [SyntaxTree](crate::syntax::SyntaxTree). + /// + /// Returns [None] if this ErrorRef is not valid reference for specified `tree` instance. + /// + /// Use [is_valid_ref](crate::syntax::ErrorRef::is_valid_ref) to check ErrorRef validity. + /// + /// This function uses [`SyntaxTree::get_cluster`](crate::syntax::SyntaxTree::get_cluster) + /// function under the hood. + #[inline(always)] + pub fn deref<'tree, N: Node>( + &self, + tree: &'tree impl SyntaxTree, + ) -> Option<&'tree ::Error> { + if &self.id != tree.id() { + return None; + } + + match tree.get_cluster(&self.cluster_ref) { + None => None, + Some(cluster) => cluster.errors.get(&self.error_ref), + } + } + + /// Mutably dereferences weakly referred error object of specified + /// [SyntaxTree](crate::syntax::SyntaxTree). + /// + /// Returns [None] if this ErrorRef is not valid reference for specified `tree` instance. + /// + /// Use [is_valid_ref](crate::syntax::ErrorRef::is_valid_ref) to check ErrorRef validity. + /// + /// This function uses + /// [`SyntaxTree::get_cluster_mut`](crate::syntax::SyntaxTree::get_cluster_mut) function under + /// the hood. + #[inline(always)] + pub fn deref_mut<'tree, N: Node>( + &self, + tree: &'tree mut impl SyntaxTree, + ) -> Option<&'tree mut ::Error> { + if &self.id != tree.id() { + return None; + } + + match tree.get_cluster_mut(&self.cluster_ref) { + None => None, + Some(data) => data.errors.get_mut(&self.error_ref), + } + } + + /// Creates a weak reference of the [Cluster](crate::syntax::Cluster) of referred error object. + #[inline(always)] + pub fn cluster(&self) -> ClusterRef { + ClusterRef { + id: self.id, + cluster_ref: self.cluster_ref, + } + } + + /// Removes an instance of the error object from the [SyntaxTree](crate::syntax::SyntaxTree) + /// that is weakly referred by this reference. + /// + /// Returns [Some] value of the error object if this weak reference is a valid reference of + /// existing error object inside `tree` instance. Otherwise returns [None]. + /// + /// Use [is_valid_ref](crate::syntax::ErrorRef::is_valid_ref) to check ErrorRef validity. + /// + /// This function uses + /// [`SyntaxTree::get_cluster_mut`](crate::syntax::SyntaxTree::get_cluster_mut) function under + /// the hood. + #[inline(always)] + pub fn unlink( + &self, + tree: &mut impl SyntaxTree, + ) -> Option<::Error> { + if &self.id != tree.id() { + return None; + } + + match tree.get_cluster_mut(&self.cluster_ref) { + None => None, + Some(data) => data.errors.remove(&self.error_ref), + } + } + + /// Returns `true` if and only if weakly referred error object belongs to specified + /// [SyntaxTree](crate::syntax::SyntaxTree), and referred error object exists in this SyntaxTree + /// instance. + /// + /// If this function returns `true`, all dereference function would return meaningful [Some] + /// values, otherwise these functions return [None]. + /// + /// This function uses [`SyntaxTree::get_cluster`](crate::syntax::SyntaxTree::get_cluster) + /// function under the hood. + #[inline(always)] + pub fn is_valid_ref(&self, tree: &impl SyntaxTree) -> bool { + if &self.id != tree.id() { + return false; + } + + match tree.get_cluster(&self.cluster_ref) { + None => false, + Some(cluster) => cluster.errors.contains(&self.error_ref), + } + } +} diff --git a/work/crates/main/src/syntax/mod.rs b/work/crates/main/src/syntax/mod.rs new file mode 100644 index 0000000..de46f31 --- /dev/null +++ b/work/crates/main/src/syntax/mod.rs @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +#![doc = include_str!("readme.md")] + +mod buffer; +mod cluster; +mod error; +mod no; +mod node; +mod session; +mod simple; +mod transducer; +mod tree; + +pub use crate::syntax::{ + buffer::SyntaxBuffer, + cluster::{Cluster, ClusterRef}, + error::{ErrorRef, SyntaxError}, + no::NoSyntax, + node::{Node, NodeRef}, + session::{SyntaxRule, SyntaxSession, ROOT_RULE}, + simple::SimpleNode, + transducer::{ParseContext, TransduceRef, Transducer}, + tree::SyntaxTree, +}; +pub(crate) use crate::syntax::{session::NON_ROOT_RULE, transducer::transduce}; diff --git a/work/crates/main/src/syntax/no.rs b/work/crates/main/src/syntax/no.rs new file mode 100644 index 0000000..997d37c --- /dev/null +++ b/work/crates/main/src/syntax/no.rs @@ -0,0 +1,100 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + lexis::Token, + std::*, + syntax::{Node, SyntaxError, SyntaxRule, SyntaxSession}, +}; + +/// A special marker that forcefully skips syntax parsing stage. +/// +/// This object implements [Node](crate::syntax::Node) interface, but does not produce any syntax +/// data, and skips syntax parsing stage from the beginning. +/// +/// You can use this object when the syntax manager(e.g. [Document](crate::Document)) requires full +/// syntax specification, but you only need a lexical data to be managed. +/// +/// ```rust +/// use lady_deirdre::{ +/// syntax::{NoSyntax, SyntaxTree}, +/// lexis::SimpleToken, +/// Document, +/// }; +/// +/// use std::mem::size_of; +/// +/// let doc = Document::>::from("foo bar baz"); +/// +/// // Resolves to a single instance of NoSyntax of zero size. +/// assert!(doc.root().deref(&doc).is_some()); +/// assert_eq!(size_of::>(), 0) +/// ``` +#[derive(Clone, PartialEq, Eq)] +#[repr(transparent)] +pub struct NoSyntax { + _token: PhantomData, +} + +impl Debug for NoSyntax { + #[inline(always)] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + formatter.write_str("NoSyntax") + } +} + +impl Node for NoSyntax { + type Token = T; + type Error = SyntaxError; + + #[inline(always)] + fn new<'code>( + _rule: SyntaxRule, + _session: &mut impl SyntaxSession<'code, Node = Self>, + ) -> Self { + Self::nil() + } +} + +impl NoSyntax { + #[inline(always)] + pub(crate) fn nil() -> Self { + Self { + _token: PhantomData::default(), + } + } +} diff --git a/work/crates/main/src/syntax/node.rs b/work/crates/main/src/syntax/node.rs new file mode 100644 index 0000000..d14054b --- /dev/null +++ b/work/crates/main/src/syntax/node.rs @@ -0,0 +1,501 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +extern crate lady_deirdre_derive; + +pub use lady_deirdre_derive::Node; + +use crate::{ + arena::{Id, Identifiable, Ref}, + lexis::{Token, TokenCursor}, + std::*, + syntax::{ClusterRef, SyntaxBuffer, SyntaxError, SyntaxRule, SyntaxSession, SyntaxTree}, +}; + +/// A trait that specifies syntax tree node kind and provides a syntax grammar parser. +/// +/// An API user implements this trait to specify Programming Language syntax grammar and the +/// type of the syntax tree node. +/// +/// This trait is supposed to be implemented on the Rust enum type with variants representing +/// tree node kinds, but this is not a strict requirement. From the functional sense the main +/// purpose of the Node implementation is to provide a syntax parser that will re-parse sequences of +/// [Tokens](crate::lexis::Token) by interacting with arbitrary +/// [SyntaxSession](crate::syntax::SyntaxSession) interface that, in turn, manages parsing process. +/// +/// An API user is encouraged to implement this trait using helper +/// [Node](::lady_deirdre_derive::Node) macro-derive on enum types by specifying syntax +/// grammar directly on enum variants through the macros attributes. +/// +/// ```rust +/// use lady_deirdre::{ +/// syntax::{Node, SyntaxError, SyntaxTree}, +/// lexis::{SimpleToken, TokenRef}, +/// Document, +/// }; +/// +/// #[derive(Node, PartialEq, Debug)] +/// #[token(SimpleToken)] +/// #[error(SyntaxError)] +/// #[skip($Whitespace)] +/// enum NumbersInParens { +/// #[root] +/// #[rule($ParenOpen & (numbers: $Number)*{$Symbol} & $ParenClose)] +/// Root { +/// numbers: Vec, +/// }, +/// } +/// +/// let doc = Document::::from("(3, 4, 5)"); +/// +/// let root = doc.root().deref(&doc).unwrap(); +/// +/// match root { +/// NumbersInParens::Root { numbers } => { +/// assert_eq!( +/// numbers.iter().map(|num| num.string(&doc).unwrap()).collect::>(), +/// vec!["3", "4", "5"], +/// ); +/// }, +/// } +/// ``` +/// +/// An API user can implement the Node trait manually too. For example, using 3rd party parser +/// libraries. See [`Node::new`](crate::syntax::Node::new) function specification for details. +pub trait Node: Sized + 'static { + /// Describes programming language's lexical grammar. + type Token: Token; + + /// Describes syntax/semantic error type of this programming language grammar. + type Error: From + Sized + 'static; + + /// Parses a branch of the syntax tree from the sequence of [Tokens](crate::lexis::Token) using + /// specified parse `rule`, and returns an instance of the top node of the branch. + /// + /// This is a low-level API function. + /// + /// An API user encouraged to use [Node](::lady_deirdre_derive::Node) macro-derive to + /// implement this trait automatically based on a set of LL(1) grammar rules, + /// but you can implement it manually too. + /// + /// You need to call this function manually only if you want to implement an extension API to + /// this crate. In this case you should also prepare a custom implementation of the + /// SyntaxSession trait. See [SyntaxSession](crate::syntax::SyntaxSession) documentation for + /// details. + /// + /// **Algorithm Specification:** + /// - The Algorithm lay behind this implementation is a + /// [Top-down Parser](https://en.wikipedia.org/wiki/Top-down_parsing) that parses + /// a context-free language of [LL grammar class](https://en.wikipedia.org/wiki/LL_grammar) + /// with potentially unlimited lookahead. Note, that due to unlimited lookahead + /// characteristic it could be a wide class of recursive-descending grammars including + /// [PEG grammars](https://en.wikipedia.org/wiki/Parsing_expression_grammar). + /// - The Algorithm reads as many tokens from the input sequence as needed using `session`'s + /// [TokenCursor](crate::lexis::TokenCursor) lookahead operations to recognize + /// appropriate parse `rule`. + /// - The Algorithm [advances](crate::lexis::Tokens::advance) TokenCursor to as many tokens + /// as needed to exactly match parsed `rule`. + /// - To descend into a parsing subrule the Algorithm calls `session`'s + /// [`descend`](crate::syntax::SyntaxSession::descend) function that consumes subrule's + /// [kind](crate::syntax::SyntaxRule) and returns a [`weak reference`](NodeRef) into the + /// rule's parsed Node. + /// - The Algorithm never calls [`descend`](crate::syntax::SyntaxSession::descend) function + /// with [ROOT_RULE](crate::syntax::ROOT_RULE). The Root Rule is not a recursive rule + /// by design. + /// - The Specification does not limit the way the Algorithm maps `rule` values to + /// specific parsing function under the hood. This mapping is fully encapsulated by the + /// Algorithm internals. In other words the "external" caller of the function `new` does not + /// have to be aware of the mapping between the `rule` values and the types of produced + /// nodes. The only exception from this is a [ROOT_RULE](crate::syntax::ROOT_RULE) + /// value. If the "external" caller invokes `new` function with the ROOT_RULE parameter, the + /// Algorithm guaranteed to enter the entire syntax tree parsing procedure. + /// - When the function `new` invoked, the Algorithm guaranteed to complete parsing procedure + /// regardless of input sequence, and to return a valid instance of [Node]. If the input + /// sequence contains syntax errors, the Algorithm recovers these error in a way that is + /// not specified. In this case the Algorithm could call `session`'s + /// [error](crate::syntax::SyntaxSession::error) function to register syntax error. + /// + /// ```rust + /// use lady_deirdre::{ + /// syntax::{Node, NodeRef, SyntaxSession, SyntaxRule, SyntaxError, SyntaxTree, ROOT_RULE}, + /// lexis::{SimpleToken, TokenCursor}, + /// Document, + /// }; + /// + /// // A syntax of embedded parentheses: `(foo (bar) baz)`. + /// enum Parens { + /// Root { inner: Vec }, + /// Parens { inner: Vec }, + /// Other, + /// }; + /// + /// const PARENS_RULE: SyntaxRule = &1; + /// const OTHER_RULE: SyntaxRule = &2; + /// + /// impl Node for Parens { + /// type Token = SimpleToken; + /// type Error = SyntaxError; + /// + /// fn new<'code>( + /// rule: SyntaxRule, + /// session: &mut impl SyntaxSession<'code, Node = Self>, + /// ) -> Self { + /// // Rule dispatcher that delegates parsing control flow to specialized parse + /// // functions. + /// + /// if rule == ROOT_RULE { + /// return Self::parse_root(session); + /// } + /// + /// if rule == PARENS_RULE { + /// return Self::parse_parens(session); + /// } + /// + /// // Otherwise the `rule` is an `OTHER_RULE`. + /// + /// Self::parse_other(session) + /// } + /// + /// } + /// + /// impl Parens { + /// fn parse_root<'code>(session: &mut impl SyntaxSession<'code, Node = Self>) -> Self { + /// let mut inner = vec![]; + /// + /// loop { + /// // Analysing of the next incoming token. + /// match session.token(0) { + /// Some(&SimpleToken::ParenOpen) => { + /// inner.push(session.descend(PARENS_RULE)); + /// } + /// + /// Some(_) => { + /// inner.push(session.descend(OTHER_RULE)); + /// } + /// + /// None => break, + /// } + /// } + /// + /// Self::Root { inner } + /// } + /// + /// // Parsing a pair of parenthesis(`(...)`). + /// fn parse_parens<'code>(session: &mut impl SyntaxSession<'code, Node = Self>) -> Self { + /// let mut inner = vec![]; + /// + /// // The first token is open parenthesis("("). Consuming it. + /// session.advance(); + /// + /// loop { + /// // Analysing of the next incoming token. + /// match session.token(0) { + /// Some(&SimpleToken::ParenOpen) => { + /// inner.push(session.descend(PARENS_RULE)); + /// } + /// + /// // Close parenthesis(")") found. Parsing process finished successfully. + /// Some(&SimpleToken::ParenClose) => { + /// // Consuming this token. + /// session.advance(); + /// + /// return Self::Parens { inner }; + /// } + /// + /// Some(_) => { + /// inner.push(session.descend(OTHER_RULE)); + /// } + /// + /// None => break, + /// } + /// } + /// + /// // Parse process has failed. We didn't find closing parenthesis. + /// + /// // Registering a syntax error. + /// let span = session.site_ref(0)..session.site_ref(0); + /// session.error(SyntaxError::UnexpectedEndOfInput { + /// span, + /// context: "Parse Parens", + /// }); + /// + /// // Returning what we have parsed so far. + /// Self::Parens { inner } + /// } + /// + /// // Parsing any sequence of tokens except parenthesis(`foo bar`). + /// fn parse_other<'code>(session: &mut impl SyntaxSession<'code, Node = Self>) -> Self { + /// // The first token is not a parenthesis token. Consuming it. + /// session.advance(); + /// + /// loop { + /// // Analysing of the next incoming token. + /// match session.token(0) { + /// Some(&SimpleToken::ParenOpen) | Some(&SimpleToken::ParenClose) | None => { + /// break; + /// } + /// + /// Some(_) => { + /// // The next token is not a parenthesis token. Consuming it. + /// session.advance(); + /// }, + /// } + /// } + /// + /// Self::Other + /// } + /// } + /// + /// let doc = Document::::from("foo (bar (baz) (aaa) ) bbb"); + /// + /// // The input text has been parsed without errors. + /// assert_eq!(doc.errors().count(), 0); + /// ``` + fn new<'code>(rule: SyntaxRule, session: &mut impl SyntaxSession<'code, Node = Self>) -> Self; + + /// A helper function to immediately parse a subsequent of tokens in non-incremental way. + /// + /// ```rust + /// use lady_deirdre::{ + /// lexis::{SimpleToken, Token, SourceCode}, + /// syntax::{SimpleNode, Node, SyntaxTree}, + /// }; + /// + /// let tokens = SimpleToken::parse("(foo bar)"); + /// + /// let sub_sequence = tokens.cursor(0..5); // A cursor into the "(foo bar" substring. + /// + /// let syntax = SimpleNode::parse(sub_sequence); + /// + /// // Close parenthesis is missing in this subsequence, so the syntax tree of the subsequence + /// // has syntax errors. + /// assert!(syntax.errors().count() > 0); + /// ``` + #[inline(always)] + fn parse<'code>(cursor: impl TokenCursor<'code, Token = Self::Token>) -> SyntaxBuffer { + SyntaxBuffer::new(cursor) + } +} + +/// A weak reference of the [Node] and its metadata inside the syntax structure of the compilation +/// unit. +/// +/// This objects represents a long-lived lifetime independent and type independent cheap to +/// [Copy](::std::marker::Copy) safe weak reference into the syntax tree. +/// +/// NodeRef is capable to survive source code incremental changes happening aside of the referred +/// Node. +/// +/// An API user normally does not need to inspect NodeRef inner fields manually or to construct +/// a NodeRef manually unless you are working on the Crate API Extension. +/// +/// For details on the Weak references framework design see [Arena](crate::arena) module +/// documentation. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct NodeRef { + /// An [identifier](crate::arena::Id) of the [SyntaxTree](crate::syntax::SyntaxTree) instance + /// this weakly referred [Node] belongs to. + pub id: Id, + + /// An internal weak reference of the node's [Cluster](crate::syntax::Cluster) of the + /// [SyntaxTree](crate::syntax::SyntaxTree) instance. + pub cluster_ref: Ref, + + /// An internal weak reference of the Node object in the + /// [Cluster](crate::syntax::Cluster). + /// + /// If `node_ref` is a [`Ref::Primary`](crate::arena::Ref::Primary) variant, the NodeRef object + /// refers [`Cluster::primary`](crate::syntax::Cluster::primary) object. Otherwise `node_ref` is + /// a [`Ref::Repository`] variant that refers an object from the + /// [`Cluster::nodes`](crate::syntax::Cluster::nodes) repository. + pub node_ref: Ref, +} + +impl Debug for NodeRef { + #[inline] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + match self.is_nil() { + false => formatter.write_fmt(format_args!("NodeRef({:?})", self.id())), + true => formatter.write_str("NodeRef(Nil)"), + } + } +} + +impl Identifiable for NodeRef { + #[inline(always)] + fn id(&self) -> &Id { + &self.id + } +} + +impl NodeRef { + /// Returns an invalid instance of the NodeRef. + /// + /// This instance never resolves to valid [Node]. + #[inline(always)] + pub const fn nil() -> Self { + Self { + id: *Id::nil(), + cluster_ref: Ref::Nil, + node_ref: Ref::Nil, + } + } + + /// Returns `true` if this instance will never resolve to valid [Node]. + /// + /// It is guaranteed that `NodeRef::nil().is_nil()` is always `true`, but in general if + /// this function returns `false` it is not guaranteed that provided instance is a valid + /// reference. + /// + /// To determine reference validity per specified [SyntaxTree](crate::syntax::SyntaxTree) + /// instance use [is_valid_ref](NodeRef::is_valid_ref) function instead. + #[inline(always)] + pub const fn is_nil(&self) -> bool { + self.id.is_nil() || self.cluster_ref.is_nil() || self.node_ref.is_nil() + } + + /// Immutably dereferences weakly referred [Node] of specified + /// [SyntaxTree](crate::syntax::SyntaxTree). + /// + /// Returns [None] if this NodeRef is not valid reference for specified `tree` instance. + /// + /// Use [is_valid_ref](NodeRef::is_valid_ref) to check NodeRef validity. + /// + /// This function uses [`SyntaxTree::get_cluster`](crate::syntax::SyntaxTree::get_cluster) + /// function under the hood. + #[inline(always)] + pub fn deref<'tree, N: Node>( + &self, + tree: &'tree impl SyntaxTree, + ) -> Option<&'tree N> { + if &self.id != tree.id() { + return None; + } + + match tree.get_cluster(&self.cluster_ref) { + Some(cluster) => match &self.node_ref { + Ref::Primary => Some(&cluster.primary), + + _ => cluster.nodes.get(&self.node_ref), + }, + + _ => None, + } + } + + /// Mutably dereferences weakly referred [Node] of specified + /// [SyntaxTree](crate::syntax::SyntaxTree). + /// + /// Returns [None] if this NodeRef is not valid reference for specified `tree` instance. + /// + /// Use [is_valid_ref](NodeRef::is_valid_ref) to check NodeRef validity. + /// + /// This function uses + /// [`SyntaxTree::get_cluster_mut`](crate::syntax::SyntaxTree::get_cluster_mut) function under + /// the hood. + #[inline(always)] + pub fn deref_mut<'tree, N: Node>( + &self, + tree: &'tree mut impl SyntaxTree, + ) -> Option<&'tree mut N> { + if &self.id != tree.id() { + return None; + } + + match tree.get_cluster_mut(&self.cluster_ref) { + None => None, + Some(data) => match &self.node_ref { + Ref::Primary => Some(&mut data.primary), + + _ => data.nodes.get_mut(&self.node_ref), + }, + } + } + + /// Creates a weak reference of the [Cluster](crate::syntax::Cluster) of referred [Node]. + #[inline(always)] + pub fn cluster(&self) -> ClusterRef { + ClusterRef { + id: self.id, + cluster_ref: self.cluster_ref, + } + } + + /// Removes an instance of the [Node] from the [SyntaxTree](crate::syntax::SyntaxTree) + /// that is weakly referred by this reference. + /// + /// Returns [Some] value of the Node if this weak reference is a valid reference of + /// existing node inside `tree` instance. Otherwise returns [None]. + /// + /// Use [is_valid_ref](NodeRef::is_valid_ref) to check NodeRef validity. + /// + /// This function uses + /// [`SyntaxTree::get_cluster_mut`](crate::syntax::SyntaxTree::get_cluster_mut) function under + /// the hood. + #[inline(always)] + pub fn unlink(&self, tree: &mut impl SyntaxTree) -> Option { + if &self.id != tree.id() { + return None; + } + + match tree.get_cluster_mut(&self.cluster_ref) { + None => None, + Some(data) => data.nodes.remove(&self.node_ref), + } + } + + /// Returns `true` if and only if weakly referred Node belongs to specified + /// [SyntaxTree](crate::syntax::SyntaxTree), and referred Node exists in this SyntaxTree + /// instance. + /// + /// If this function returns `true`, all dereference function would return meaningful [Some] + /// values, otherwise these functions return [None]. + /// + /// This function uses [`SyntaxTree::get_cluster`](crate::syntax::SyntaxTree::get_cluster) + /// function under the hood. + #[inline(always)] + pub fn is_valid_ref(&self, tree: &impl SyntaxTree) -> bool { + if &self.id != tree.id() { + return false; + } + + match tree.get_cluster(&self.cluster_ref) { + None => false, + Some(cluster) => cluster.nodes.contains(&self.node_ref), + } + } +} diff --git a/work/crates/main/src/syntax/readme.md b/work/crates/main/src/syntax/readme.md new file mode 100644 index 0000000..599faa3 --- /dev/null +++ b/work/crates/main/src/syntax/readme.md @@ -0,0 +1,81 @@ +# Syntax analysis features. + +This module contains a set of features to construct and analyze syntax +structure of the source code. + +The syntax structure of the source code is represented by a syntax tree. The +syntax tree could serve as a Parse Tree, Abstract Syntax Tree, Semantic +resolution structure, and it could also contain syntax and semantic errors +information at the same time. + +The Syntax Tree is an abstract mutable structure that could be altered by an +API user at any stage of the end compilation system. + +The Tree consists of a set of Nodes connected to each other through the system +of weak references. It is assumed that the [Node](crate::syntax::Node) interface +would be implemented on the Rust enum type with variants +representing kinds of the parse/syntax/semantic tree nodes, and with the variant +fields that would contain weak references and other semantic resolution metadata +to other nodes related to this node(to the child nodes in particular). + +The [`Node::new`](crate::syntax::Node::new) function defines a Programming +language syntax grammar parser, an algorithm that constructs the syntax tree +by a sequence of the source code tokens. Under the hood this function performs +parsing of the source code tokens by interacting with the low-level +[SyntaxSession](crate::syntax::SyntaxSession) interface. These two interfaces +could express a parsing algorithm of the `LL(*)` class(unlimited lookahead) +with syntax error recovery capabilities. + +An API user is encouraged to utilize a [Node](::lady_deirdre_derive::Node) +derive macro on the enum type to define an `LL(1)` syntax parser. Using this +macro an API user specifies parse rule through the macro attributes directly +on the enum variants. This macro implements a parsing algorithm with error +recovery capabilities using heuristic techniques automatically. + +Object that stores compilation unit syntax structure should implement +a [SyntaxTree](crate::syntax::SyntaxTree) trait. This interface provides an API +user with access to the Syntax Tree root node, and to iterator through all +syntax errors of this unit. Unless you work on a Crate extension, you don't need +to implement this trait manually. + +[SyntaxBuffer](crate::syntax::SyntaxBuffer) is default implementation of the +SyntaxTree trait. This object supposed to be used for non-incremental parsing +scenario. For incremental parsing one can use [Document](crate::Document) which +is also a SyntaxTree implementation. + +The Crate does not propose a unified way to traverse syntax structure of a +compilation unit. An API user receives a weak reference to the root node of the +syntax tree using [`SyntaxTree::root`](crate::syntax::SyntaxTree::root) +function. Actual traversing approaches are up to the user-defined Node type +structure. + +The instances of the syntax tree nodes and the instances of the syntax/semantic +errors related to these nodes reside in memory in so called +[Clusters](crate::syntax::Cluster) that own these instances in undetermined +order. A set of such clusters build up the entire syntax structure of the +compilation unit. It is up to the [SyntaxTree](crate::syntax::SyntaxTree) +implementation on how to split the syntax structure between a set of clusters. +For example, the SyntaxBuffer stores the entire syntax structure in a single +cluster, whereas the Document splits syntax structure between a set of clusters +more granularly using each cluster as a unit of incremental caching. The +Document tends to group a set of nodes in a single cluster when all nodes +of this cluster has been produced by a single syntax parser rule(e.g. all of +these nodes lexically start from the same token), but this is not a strict +rule an API user could relay on. In general, an API user should assume that +the nodes of a cluster are logically "close" to each other, and during the +incremental reparsing nodes of a single cluster could obsolete altogether. + +A Cluster is a mutable structure. An API user could add, remove or modify nodes +and errors inside the cluster at any stage of the compilation system as long as +the user has mutable access to particular cluster. + +This module provides a system of high level weak references to deal with the +syntax structure instances. This includes [NodeRef](crate::syntax::NodeRef), +[ClusterRef](crate::syntax::ClusterRef) and [ErrorRef](crate::syntax::ErrorRef). +See [Arena](crate::arena) module documentation to read more about the weak +reference framework. + +Finally, this module provides a mechanism of +[Transducers](crate::syntax::Transducer) to turn the source code of a +compilation unit into a different form. In particular, you can use Transducers +to implement source code formatters. diff --git a/work/crates/main/src/syntax/session.rs b/work/crates/main/src/syntax/session.rs new file mode 100644 index 0000000..7df0444 --- /dev/null +++ b/work/crates/main/src/syntax/session.rs @@ -0,0 +1,225 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, Ref, Repository}, + lexis::{Length, Site, SiteRef, TokenCount, TokenCursor, TokenRef}, + std::*, + syntax::{ErrorRef, Node, NodeRef}, +}; + +/// An interface to the source code syntax parsing/re-parsing session. +/// +/// This is a low-level API. +/// +/// Syntax parsing architecture decoupled into two independent components: +/// - The Syntax Tree Manager that organizes a syntax structure storage, and that provides access +/// operations to the syntax structure objects. This component implements a +/// [SyntaxTree](crate::syntax::SyntaxTree) trait. +/// - The Syntax Parser of particular programming language. This component is unaware about the +/// syntax structure memory management process, and about the source of parsing. +/// +/// Both components of this architecture are unaware about each other, and they use a +/// [SyntaxSession] trait as an input/output "thin" interaction interface. +/// +/// The Syntax Tree Manager passes a mutable reference to SyntaxSession object to the +/// [`Node::new`](crate::syntax::Node::new) function to initiate syntax parsing procedure in +/// specified context. And, in turn, the `Node::new` function uses this object to read +/// [Tokens](crate::lexis::Token) from the input sequence, and to drive the parsing process. +/// +/// You can implement this trait as well as the [SyntaxTree](crate::syntax::SyntaxTree) trait to +/// create a custom syntax tree manager of the compilation unit that would be able to work with +/// existing syntax grammar definitions seamlessly. +/// +/// As long as the the [Node](crate::syntax::Node) trait implementation follows +/// [`Algorithm Specification`](crate::syntax::Node::new), the +/// intercommunication between the Syntax Parser and the Syntax Tree Manager works correctly too. +/// +/// The SyntaxSession inherits [TokenCursor](crate::lexis::TokenCursor) trait that provides +/// input [Token](crate::lexis::Token) sequence read operations to be parsed by the Syntax Parser. +pub trait SyntaxSession<'code>: TokenCursor<'code, Token = ::Token> { + /// Specifies programming language grammar. + type Node: Node; + + /// Performs descend operation into the syntax grammar subrule from the current + /// [TokenCursor](crate::lexis::TokenCursor) inner [Site](crate::lexis::Site). + /// + /// Depending on implementation this function may recursively invoke + /// [`Node::new`](crate::syntax::Node::new) function under the hood to process specified `rule`, + /// or get previously parsed value from the Syntax Tree Manager internal cache. + /// + /// The function returns a [`weak reference`](crate::syntax::NodeRef) into the parsed Node. + /// + /// The `Node::new` algorithm should prefer to call this function to recursively descend into + /// the syntax grammar rules instead of the direct recursive invocation of the `Node::new`. + /// + /// By the [`Algorithm Specification`](crate::syntax::Node::new) the `Node::new` function should + /// avoid of calling of this function with the [ROOT_RULE](crate::syntax::ROOT_RULE) value. + fn descend(&mut self, rule: SyntaxRule) -> NodeRef; + + /// Registers a syntax parse error. + /// + /// If the Syntax Parser encounters grammatically incorrect input sequence, it should recover + /// this error and register all syntax errors objects of the currently parsed + /// [SyntaxRule](crate::syntax::SyntaxRule) using this function. + /// + /// The function returns a [`weak reference`](crate::syntax::ErrorRef) into registered error. + fn error(&mut self, error: ::Error) -> ErrorRef; +} + +/// A static identifier of arbitrary syntax grammar rule. +/// +/// The exact values of this type are uniquely specified by the particular +/// [`syntax parsing algorithm`](crate::syntax::Node::new) except the [ROOT_RULE] that is always +/// specifies grammar's an entry rule. +pub type SyntaxRule = &'static usize; + +/// A syntax grammar entry rule. +/// +/// See [`syntax parser algorithm specification`](crate::syntax::Node::new) for details. +pub static ROOT_RULE: SyntaxRule = &0; + +pub(crate) static NON_ROOT_RULE: SyntaxRule = &1; + +pub(super) struct SequentialSyntaxSession< + 'code, + N: Node, + C: TokenCursor<'code, Token = ::Token>, +> { + pub(super) id: Id, + pub(super) primary: Option, + pub(super) nodes: Repository, + pub(super) errors: Repository, + pub(super) token_cursor: C, + pub(super) _code_lifetime: PhantomData<&'code ()>, +} + +impl<'code, N, C> Identifiable for SequentialSyntaxSession<'code, N, C> +where + N: Node, + C: TokenCursor<'code, Token = ::Token>, +{ + #[inline(always)] + fn id(&self) -> &Id { + &self.id + } +} + +impl<'code, N, C> TokenCursor<'code> for SequentialSyntaxSession<'code, N, C> +where + N: Node, + C: TokenCursor<'code, Token = ::Token>, +{ + type Token = ::Token; + + #[inline(always)] + fn advance(&mut self) -> bool { + self.token_cursor.advance() + } + + #[inline(always)] + fn token(&mut self, distance: TokenCount) -> Option<&'code Self::Token> { + self.token_cursor.token(distance) + } + + #[inline(always)] + fn site(&mut self, distance: TokenCount) -> Option { + self.token_cursor.site(distance) + } + + #[inline(always)] + fn length(&mut self, distance: TokenCount) -> Option { + self.token_cursor.length(distance) + } + + #[inline(always)] + fn string(&mut self, distance: TokenCount) -> Option<&'code str> { + self.token_cursor.string(distance) + } + + #[inline(always)] + fn token_ref(&mut self, distance: TokenCount) -> TokenRef { + self.token_cursor.token_ref(distance) + } + + #[inline(always)] + fn site_ref(&mut self, distance: TokenCount) -> SiteRef { + self.token_cursor.site_ref(distance) + } + + #[inline(always)] + fn end_site_ref(&mut self) -> SiteRef { + self.token_cursor.end_site_ref() + } +} + +impl<'code, N, C> SyntaxSession<'code> for SequentialSyntaxSession<'code, N, C> +where + N: Node, + C: TokenCursor<'code, Token = ::Token>, +{ + type Node = N; + + fn descend(&mut self, rule: SyntaxRule) -> NodeRef { + let node = N::new(rule, self); + + let node_ref = match rule == ROOT_RULE { + true => { + self.primary = Some(node); + + Ref::Primary + } + + false => self.nodes.insert(node), + }; + + NodeRef { + id: self.id, + cluster_ref: Ref::Primary, + node_ref, + } + } + + #[inline(always)] + fn error(&mut self, error: ::Error) -> ErrorRef { + ErrorRef { + id: self.id, + cluster_ref: Ref::Primary, + error_ref: self.errors.insert(error), + } + } +} diff --git a/work/crates/main/src/syntax/simple.rs b/work/crates/main/src/syntax/simple.rs new file mode 100644 index 0000000..fe99565 --- /dev/null +++ b/work/crates/main/src/syntax/simple.rs @@ -0,0 +1,105 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + lexis::SimpleToken, + std::*, + syntax::{Node, NodeRef, SyntaxError}, +}; + +/// A common generic syntax. +/// +/// This is a companion object of the [SimpleToken](crate::lexis::SimpleToken) lexis that represents +/// a set of nested parens: parenthesis, braces and brackets. +#[derive(Node, Clone, Debug, PartialEq, Eq)] +#[token(SimpleToken)] +#[error(SyntaxError)] +#[skip($Number | $Symbol | $Identifier | $String | $Char | $Whitespace | $Mismatch)] +#[define(ANY = Parenthesis | Brackets | Braces)] +pub enum SimpleNode { + /// A root node that contains all top-level parents. + #[root] + #[rule(inner: ANY*)] + Root { + /// Top-level parens of the source code. + inner: Vec, + }, + + /// A pair of parenthesis(`( ... )`) + #[rule($ParenOpen & inner: ANY* & $ParenClose)] + #[synchronization] + Parenthesis { + /// Parens that nested inside this Parenthesis pair. + inner: Vec, + }, + + /// A pair of brackets(`[ ... ]`) + #[rule($BracketOpen & inner: ANY* & $BracketClose)] + #[synchronization] + Brackets { + /// Parens that nested inside this Brackets pair. + inner: Vec, + }, + + /// A pair of braces(`{ ... }`) + #[rule($BraceOpen & inner: ANY* & $BraceClose)] + #[synchronization] + Braces { + /// Parens that nested inside this Braces pair. + inner: Vec, + }, +} + +impl Display for SimpleNode { + #[inline(always)] + fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult { + Debug::fmt(self, formatter) + } +} + +impl SimpleNode { + /// Returns a complete slice of the inner parens nested inside this parens node. + #[inline(always)] + pub fn inner(&self) -> &[NodeRef] { + match self { + Self::Root { inner } => &inner, + Self::Parenthesis { inner } => &inner, + Self::Brackets { inner } => &inner, + Self::Braces { inner } => &inner, + } + } +} diff --git a/work/crates/main/src/syntax/transducer.rs b/work/crates/main/src/syntax/transducer.rs new file mode 100644 index 0000000..2e67260 --- /dev/null +++ b/work/crates/main/src/syntax/transducer.rs @@ -0,0 +1,625 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Id, Identifiable, Ref, RefIndex, Repository}, + lexis::{ + Length, + Site, + SiteRef, + SiteSpan, + SourceCode, + ToSite, + ToSpan, + TokenCount, + TokenCursor, + TokenRef, + }, + std::*, + syntax::{ + buffer::BufferErrorIterator, + Cluster, + ErrorRef, + Node, + NodeRef, + SyntaxRule, + SyntaxSession, + SyntaxTree, + ROOT_RULE, + }, +}; + +/// An interface to transform the source code text into different representation. +/// +/// Basically, Transducer is a function(a [map](Transducer::map) function) that temporary +/// interrupts Syntax Parser parse process by invoking on every parse rule application. +/// On every invocation the function manages parse results of the currently parsed rule and its +/// branch down the tree using mutable reference to the [ParseContext] object. This function +/// computes and returns a new representation of this parse rule results to be further utilized by +/// this function on the next invocation steps. In this sense the `map` function depth-first +/// traverses virtual parse tree. +/// +/// The final invocation result of the `map` function is the result of the Transducer application. +/// +/// Transducers framework provides an API user with the mechanism to implement such tools as +/// Code Formatters and the Parse Tree rewriters. In particular, an API user could utilize this +/// framework to construct custom forms of (Abstract) Syntax Tree that would be compatible with +/// the 3rd party code analysis libraries. +/// +/// ```rust +/// use lady_deirdre::{ +/// syntax::{SimpleNode, TransduceRef, ParseContext}, +/// lexis::SourceCode, +/// Document, +/// }; +/// +/// // This example shows how to print a system of nested parens. +/// +/// let doc = Document::::from("foo [bar] ({baz} aaa (bbb))"); +/// +/// // `SourceCode::transduce` is an entry point of the Transduce process. In particular, an FnMut +/// // function implements Transducer interface too. +/// +/// let result = doc.transduce(|context: &mut ParseContext<_, _, String>| { +/// // `ParseContext::node` function returns a reference to the currently parsed Node. +/// let node = context.node(); +/// +/// let mut result = String::new(); +/// +/// match node { +/// SimpleNode::Root { .. } => (), +/// SimpleNode::Parenthesis { .. } => result.push('('), +/// SimpleNode::Brackets { .. } => result.push('['), +/// SimpleNode::Braces { .. } => result.push('{'), +/// } +/// +/// for inner_node in node.inner() { +/// // `TransduceRef::get` function returns results of the `map` function invocation +/// // previously called for any inner node of the currently parsed Parse Tree branch. +/// result.push_str(inner_node.get(context).unwrap().as_str()); +/// } +/// +/// match node { +/// SimpleNode::Root { .. } => (), +/// SimpleNode::Parenthesis { .. } => result.push(')'), +/// SimpleNode::Brackets { .. } => result.push(']'), +/// SimpleNode::Braces { .. } => result.push('}'), +/// } +/// +/// result +/// }); +/// +/// assert_eq!(result, "[]({}())"); +/// ``` +pub trait Transducer, R> { + /// A function that transforms particular parse tree node into the target representation type. + fn map(&mut self, context: &mut ParseContext) -> R; +} + +impl Transducer for F +where + N: Node, + S: SourceCode, + F: FnMut(&mut ParseContext) -> R, +{ + #[inline(always)] + fn map(&mut self, context: &mut ParseContext) -> R { + self(context) + } +} + +/// A Transducer's parse context. +/// +/// This object passed to the [`Transducer::map`](Transducer::map) function. +/// +/// ParseContext provides generic interface to inspect currently parsed node and its branch down the +/// parse tree. The context includes the reference of the [node](ParseContext::node), +/// the [span](ParseContext::node_span) of the node, and the [cursor](ParseContext::node_cursor) +/// into all tokens chunks covered by this parse rule. +/// +/// Additionally, ParseContext implements [SourceCode](crate::lexis::SourceCode) and +/// [SyntaxTree](crate::syntax::SyntaxTree) traits such that an API user can use ParseContext to +/// dereference any weakly referred [NodeRef](crate::syntax::NodeRef), +/// [TokenRef](crate::lexis::TokenRef) or any other weakly referred object already parsed by the +/// Syntax Parser and the Lexis Scanner. In particular an API user can use this object to +/// dereference weak references inside [Nodes](crate::lexis::Node) of the currently parsed parse +/// tree branch. +/// +/// Note, however, by design SourceCode and SyntaxTree implementations of the ParseContext object +/// provide immutable access capabilities of these interfaces only. As such an API user +/// cannot mutably dereference any of these weak references. +/// +/// Finally, an API user could use ParseContext to obtain the computation results of the +/// [`Transducer::map`](Transducer::map) function applied to all nodes of currently parsed tree +/// branch node down the parse tree. For this purpose an API user utilizes [TransduceRef] interface +/// that auto-extends normal [NodeRef](crate::syntax::NodeRef) weak references. +pub struct ParseContext<'code, N: Node, S: SourceCode, R> { + code: &'code S, + root: NodeRef, + cluster: Option<(SiteSpan, Cluster)>, + data: Vec<(SiteSpan, R)>, +} + +impl<'code, N, S, R> Identifiable for ParseContext<'code, N, S, R> +where + N: Node, + S: SourceCode, +{ + #[inline(always)] + fn id(&self) -> &Id { + self.code.id() + } +} + +impl<'code, N, S, R> SourceCode for ParseContext<'code, N, S, R> +where + N: Node, + S: SourceCode, +{ + type Token = N::Token; + + type Cursor<'a> + where + Self: 'a, + = S::Cursor<'a>; + + #[inline(always)] + fn contains(&self, chunk_ref: &Ref) -> bool { + self.code.contains(chunk_ref) + } + + #[inline(always)] + fn get_token(&self, chunk_ref: &Ref) -> Option<&Self::Token> { + self.code.get_token(chunk_ref) + } + + #[inline(always)] + fn get_token_mut(&mut self, _chunk_ref: &Ref) -> Option<&mut Self::Token> { + None + } + + #[inline(always)] + fn get_site(&self, chunk_ref: &Ref) -> Option { + self.code.get_site(chunk_ref) + } + + #[inline(always)] + fn get_string(&self, chunk_ref: &Ref) -> Option<&str> { + self.code.get_string(chunk_ref) + } + + #[inline(always)] + fn get_length(&self, chunk_ref: &Ref) -> Option { + self.code.get_length(chunk_ref) + } + + #[inline(always)] + fn cursor(&self, span: impl ToSpan) -> Self::Cursor<'_> { + self.code.cursor(span) + } + + #[inline(always)] + fn length(&self) -> Length { + self.code.length() + } + + #[inline(always)] + fn token_count(&self) -> TokenCount { + self.code.token_count() + } +} + +impl<'code, N, S, R> SyntaxTree for ParseContext<'code, N, S, R> +where + N: Node, + S: SourceCode, +{ + type Node = N; + + type ErrorIterator<'a> + where + Self: 'a, + = BufferErrorIterator<'a, Self::Node>; + + #[inline(always)] + fn root(&self) -> &NodeRef { + &self.root + } + + #[inline(always)] + fn errors(&self) -> Self::ErrorIterator<'_> { + let (_, cluster) = unsafe { self.cluster.as_ref().unwrap_unchecked() }; + + BufferErrorIterator { + id: self.code.id(), + inner: (&cluster.errors).into_iter(), + } + } + + #[inline(always)] + fn contains(&self, cluster_ref: &Ref) -> bool { + match cluster_ref { + Ref::Primary => true, + _ => false, + } + } + + #[inline(always)] + fn get_cluster(&self, cluster_ref: &Ref) -> Option<&Cluster> { + match cluster_ref { + Ref::Primary => { + let (_, cluster) = unsafe { self.cluster.as_ref().unwrap_unchecked() }; + Some(cluster) + } + + _ => None, + } + } + + #[inline(always)] + fn get_cluster_mut(&mut self, _cluster_ref: &Ref) -> Option<&mut Cluster> { + None + } +} + +impl<'code, N, S, R> ParseContext<'code, N, S, R> +where + N: Node, + S: SourceCode, +{ + /// Returns a reference of the [Node](crate::syntax::Node) belong to the currently parsed rule. + #[inline(always)] + pub fn node(&self) -> &N { + let (_, cluster) = unsafe { self.cluster.as_ref().unwrap_unchecked() }; + + &cluster.primary + } + + /// Returns a [SiteSpan](crate::lexis::SiteSpan) covered by the currently parsed rule. + #[inline(always)] + pub fn node_span(&self) -> SiteSpan { + let (span, _) = unsafe { self.cluster.as_ref().unwrap_unchecked() }; + + span.clone() + } + + /// Returns a [TokenCursor](crate::lexis::TokenCursor) through the all + /// [`token chunks`](crate::lexis::Chunk) covered by the currently parsed rule. + #[inline(always)] + pub fn node_cursor(&self) -> ::Cursor<'code> { + self.code.cursor(self.node_span()) + } +} + +/// An out-implemented extension of the [NodeRef](crate::syntax::NodeRef) interface for Transducers +/// parse metadata access. +/// +/// This interface provides an API user of access functions to the [Node](crate::syntax::Node)'s +/// parse rule metadata constructed during the previous invocation steps of the +/// [`Transducer::map`](Transducer::map) function. An API user utilizes [ParseContext] object +/// to dereference this metadata from the NodeRef weak references. +/// +/// See ParseContext [documentation](ParseContext) for details. +pub trait TransduceRef { + /// Immutably dereferences parse rule metadata received from the + /// [`Transducer::map`](Transducer::map) function. + /// + /// Returns [None] if this NodeRef object does not belong to the parse tree branch specified by + /// the [`context`](ParseContext) argument. + fn get<'context, N: Node, S: SourceCode, R>( + &self, + context: &'context ParseContext, + ) -> Option<&'context R>; + + /// Mutably dereferences parse rule metadata received from the + /// [`Transducer::map`](Transducer::map) function. + /// + /// Returns [None] if this NodeRef object does not belong to the parse tree branch specified by + /// the [`context`](ParseContext) argument. + fn get_mut<'context, N: Node, S: SourceCode, R>( + &self, + context: &'context mut ParseContext, + ) -> Option<&'context mut R>; + + /// Returns a [SiteSpan](crate::lexis::SiteSpan) of the tokens covered by the parse rule this + /// NodeRef object belongs to. + /// + /// Returns [None] if this NodeRef object does not belong to the parse tree branch specified by + /// the [`context`](ParseContext) argument. + fn span, R>( + &self, + context: &ParseContext, + ) -> Option; +} + +impl TransduceRef for NodeRef { + #[inline] + fn get<'context, N: Node, S: SourceCode, R>( + &self, + context: &'context ParseContext, + ) -> Option<&'context R> { + if &self.id != context.id() { + return None; + } + + match &self.node_ref { + Ref::Repository { index, .. } if *index < context.data.len() => unsafe { + Some(&context.data.get_unchecked(*index).1) + }, + + _ => None, + } + } + + #[inline] + fn get_mut<'context, N: Node, S: SourceCode, R>( + &self, + context: &'context mut ParseContext, + ) -> Option<&'context mut R> { + if &self.id != context.id() { + return None; + } + + match &self.node_ref { + Ref::Repository { index, .. } if *index < context.data.len() => unsafe { + Some(&mut context.data.get_unchecked_mut(*index).1) + }, + + _ => None, + } + } + + #[inline] + fn span, R>( + &self, + context: &ParseContext, + ) -> Option { + if &self.id != context.id() { + return None; + } + + match &self.node_ref { + Ref::Repository { index, .. } if *index < context.data.len() => unsafe { + Some(context.data.get_unchecked(*index).0.clone()) + }, + + _ => None, + } + } +} + +struct TransduceSyntaxSession< + 'context, + 'code, + N: Node, + S: SourceCode, + R, + Tr: Transducer, +> { + transducer: &'context mut Tr, + token_cursor: S::Cursor<'code>, + context: &'context mut ParseContext<'code, N, S, R>, + pending_node_index: RefIndex, + pending_errors: Option>, +} + +impl<'context, 'code, N, S, R, Tr> Identifiable + for TransduceSyntaxSession<'context, 'code, N, S, R, Tr> +where + N: Node, + S: SourceCode, + Tr: Transducer, +{ + #[inline(always)] + fn id(&self) -> &Id { + self.context.id() + } +} + +impl<'context, 'code, N, S, R, Tr> TokenCursor<'code> + for TransduceSyntaxSession<'context, 'code, N, S, R, Tr> +where + N: Node, + S: SourceCode, + Tr: Transducer, +{ + type Token = ::Token; + + #[inline(always)] + fn advance(&mut self) -> bool { + self.token_cursor.advance() + } + + #[inline(always)] + fn token(&mut self, distance: TokenCount) -> Option<&'code Self::Token> { + self.token_cursor.token(distance) + } + + #[inline(always)] + fn site(&mut self, distance: TokenCount) -> Option { + self.token_cursor.site(distance) + } + + #[inline(always)] + fn length(&mut self, distance: TokenCount) -> Option { + self.token_cursor.length(distance) + } + + #[inline(always)] + fn string(&mut self, distance: TokenCount) -> Option<&'code str> { + self.token_cursor.string(distance) + } + + #[inline(always)] + fn token_ref(&mut self, distance: TokenCount) -> TokenRef { + self.token_cursor.token_ref(distance) + } + + #[inline(always)] + fn site_ref(&mut self, distance: TokenCount) -> SiteRef { + self.token_cursor.site_ref(distance) + } + + #[inline(always)] + fn end_site_ref(&mut self) -> SiteRef { + self.token_cursor.end_site_ref() + } +} + +impl<'context, 'code, N, S, R, Tr> SyntaxSession<'code> + for TransduceSyntaxSession<'context, 'code, N, S, R, Tr> +where + N: Node, + S: SourceCode, + Tr: Transducer, +{ + type Node = N; + + fn descend(&mut self, rule: SyntaxRule) -> NodeRef { + let start = self + .site_ref(0) + .to_site(self.context.code) + .expect("Start SiteRef dereference failure."); + let node = N::new(rule, self); + let end = self + .site_ref(0) + .to_site(self.context.code) + .expect("End SiteRef dereference failure."); + + { + let cluster = match take(&mut self.context.cluster) { + Some((_, mut cluster)) => { + let pending = replace(&mut cluster.primary, node); + + unsafe { + cluster + .nodes + .set_unchecked(self.pending_node_index, pending) + }; + + cluster + } + + None => { + let errors = unsafe { take(&mut self.pending_errors).unwrap_unchecked() }; + + Cluster { + primary: node, + nodes: Repository::default(), + errors, + } + } + }; + + self.context.cluster = Some((start..end, cluster)) + } + + let data = self.transducer.map(self.context); + + let (span, cluster) = unsafe { self.context.cluster.as_mut().unwrap_unchecked() }; + + self.pending_node_index = cluster.nodes.reserve(); + + assert_eq!( + self.pending_node_index, + self.context.data.len(), + "Internal error. Node repository index and data vector index inconsistency", + ); + + self.context.data.push((span.clone(), data)); + + let node_ref = unsafe { cluster.nodes.make_ref(self.pending_node_index) }; + + NodeRef { + id: *self.context.id(), + cluster_ref: Ref::Primary, + node_ref, + } + } + + #[inline] + fn error(&mut self, error: ::Error) -> ErrorRef { + match &mut self.pending_errors { + None => { + let id = *self.context.id(); + + let (_, cluster) = unsafe { self.context.cluster.as_mut().unwrap_unchecked() }; + + ErrorRef { + id, + cluster_ref: Ref::Primary, + error_ref: cluster.errors.insert(error), + } + } + + Some(errors) => ErrorRef { + id: *self.context.id(), + cluster_ref: Ref::Primary, + error_ref: errors.insert(error), + }, + } + } +} + +#[inline] +pub(crate) fn transduce(code: &S, mut transducer: Tr) -> R +where + N: Node, + S: SourceCode, + Tr: Transducer, +{ + let mut context = ParseContext { + code, + root: NodeRef::nil(), + cluster: None, + data: Vec::with_capacity(1), + }; + + let mut session = TransduceSyntaxSession { + transducer: &mut transducer, + token_cursor: code.cursor(..), + context: &mut context, + pending_node_index: 0, + pending_errors: Some(Repository::default()), + }; + + let _ = session.descend(ROOT_RULE); + + let (_, last) = unsafe { session.context.data.pop().unwrap_unchecked() }; + + last +} diff --git a/work/crates/main/src/syntax/tree.rs b/work/crates/main/src/syntax/tree.rs new file mode 100644 index 0000000..1002287 --- /dev/null +++ b/work/crates/main/src/syntax/tree.rs @@ -0,0 +1,117 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is a part of the "Lady Deirdre" Work, // +// a compiler front-end foundation technology. // +// // +// This Work is a proprietary software with source available code. // +// // +// To copy, use, distribute, and contribute into this Work you must agree to // +// the terms of the End User License Agreement: // +// // +// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. // +// // +// The Agreement let you use this Work in commercial and non-commercial // +// purposes. Commercial use of the Work is free of charge to start, // +// but the Agreement obligates you to pay me royalties // +// under certain conditions. // +// // +// If you want to contribute into the source code of this Work, // +// the Agreement obligates you to assign me all exclusive rights to // +// the Derivative Work or contribution made by you // +// (this includes GitHub forks and pull requests to my repository). // +// // +// The Agreement does not limit rights of the third party software developers // +// as long as the third party software uses public API of this Work only, // +// and the third party software does not incorporate or distribute // +// this Work directly. // +// // +// AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY // +// OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES // +// RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. // +// // +// If you do not or cannot agree to the terms of this Agreement, // +// do not use this Work. // +// // +// Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). // +// All rights reserved. // +//////////////////////////////////////////////////////////////////////////////// + +use crate::{ + arena::{Identifiable, Ref}, + std::*, + syntax::{Cluster, Node, NodeRef}, +}; + +/// A low-level interface to access and inspect syntax structure of the compilation unit. +/// +/// SyntaxTree by convenient should be implemented for the compilation unit management object such +/// as [Document](crate::Document) and [SyntaxBuffer](crate::syntax::SyntaxBuffer) objects that +/// supposed to manage code's syntax grammar structure. +/// +/// This trait: +/// 1. Specifies syntax grammar through the [Node](crate::syntax::SyntaxTree::Node) associative +/// type. +/// 2. Provides a [root](crate::syntax::SyntaxTree::root) function to obtain a weak reference to +/// the root node of the syntax tree. An API uses utilizes this function to enter into the +/// the syntax tree structure, and uses received reference to further inspect and traverse this +/// syntax structure. +/// 3. Provides an [errors](crate::syntax::SyntaxTree::errors) function to obtain an +/// [iterator](crate::syntax::SyntaxTree::ErrorIterator) over all syntax and semantic errors +/// associated with this compilation unit. +/// 4. Provides low-level interface to resolve higher-level weak references(such as +/// [ClusterRef](crate::syntax::ClusterRef), [NodeRef](crate::syntax::NodeRef), or +/// [ErrorRef](crate::syntax::ErrorRef)). +/// +/// In practice an API user interacts with a small subset of this functionality directly. +/// +/// To implement an extension library to this Crate with the source code management of alternative +/// designs, you can implement this trait over these objects. In this case these new objects will be +/// able to interact with existing [Node](crate::syntax::Node) implementations, and the weak +/// references belong to them will work transparently with other conventional weak references. +pub trait SyntaxTree: Identifiable { + /// Specifies programming language lexical grammar. + /// + /// See [Node](crate::syntax::Node) for details. + type Node: Node; + + /// Specifies a finite iterator over the source code syntax and semantic errors belong + /// to this unit of compilation. + type ErrorIterator<'tree>: Identifiable + + Iterator::Error> + + FusedIterator + where + Self: 'tree; + + /// Returns a [`weak reference`](crate::syntax::NodeRef) to the root Node of the syntax tree. + fn root(&self) -> &NodeRef; + + /// Returns iterator over all syntax and semantic errors belong to this unit of compilation. + fn errors(&self) -> Self::ErrorIterator<'_>; + + /// Returns `true` if the [`Node Cluster`](crate::syntax::ClusterRef) referred by specified + /// low-level `cluster_ref` weak reference exists in this syntax tree instance. + /// + /// This is a low-level API used by the higher-level [ClusterRef](crate::syntax::ClusterRef), + /// [NodeRef](crate::syntax::NodeRef) and [ErrorRef](crate::syntax::ErrorRef) weak references + /// under the hood. An API user normally don't need to call this function directly. + fn contains(&self, cluster_ref: &Ref) -> bool; + + /// Immutably dereferences a [Cluster](crate::syntax::Cluster) instance by specified low-level + /// `cluster_ref` weak reference. + /// + /// Returns [None] if referred Cluster does not exist in this instance. + /// + /// This is a low-level API used by the higher-level [ClusterRef](crate::syntax::ClusterRef), + /// [NodeRef](crate::syntax::NodeRef) and [ErrorRef](crate::syntax::ErrorRef) weak references + /// under the hood. An API user normally don't need to call this function directly. + fn get_cluster(&self, cluster_ref: &Ref) -> Option<&Cluster>; + + /// Mutably dereferences a [Cluster](crate::syntax::Cluster) instance by specified low-level + /// `cluster_ref` weak reference. + /// + /// Returns [None] if referred Cluster does not exist in this instance. + /// + /// This is a low-level API used by the higher-level [ClusterRef](crate::syntax::ClusterRef), + /// [NodeRef](crate::syntax::NodeRef) and [ErrorRef](crate::syntax::ErrorRef) weak references + /// under the hood. An API user normally don't need to call this function directly. + fn get_cluster_mut(&mut self, cluster_ref: &Ref) -> Option<&mut Cluster>; +} diff --git a/work/rustfmt.toml b/work/rustfmt.toml new file mode 100644 index 0000000..eca28ae --- /dev/null +++ b/work/rustfmt.toml @@ -0,0 +1,40 @@ +################################################################################ +# This file is a part of the "Lady Deirdre" Work, # +# a compiler front-end foundation technology. # +# # +# This Work is a proprietary software with source available code. # +# # +# To copy, use, distribute, and contribute into this Work you must agree to # +# the terms of the End User License Agreement: # +# # +# https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md. # +# # +# The Agreement let you use this Work in commercial and non-commercial # +# purposes. Commercial use of the Work is free of charge to start, # +# but the Agreement obligates you to pay me royalties # +# under certain conditions. # +# # +# If you want to contribute into the source code of this Work, # +# the Agreement obligates you to assign me all exclusive rights to # +# the Derivative Work or contribution made by you # +# (this includes GitHub forks and pull requests to my repository). # +# # +# The Agreement does not limit rights of the third party software developers # +# as long as the third party software uses public API of this Work only, # +# and the third party software does not incorporate or distribute # +# this Work directly. # +# # +# AS FAR AS THE LAW ALLOWS, THIS SOFTWARE COMES AS IS, WITHOUT ANY WARRANTY # +# OR CONDITION, AND I WILL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES # +# RELATED TO THIS SOFTWARE, UNDER ANY KIND OF LEGAL CLAIM. # +# # +# If you do not or cannot agree to the terms of this Agreement, # +# do not use this Work. # +# # +# Copyright (c) 2022 Ilya Lakhin (Илья Александрович Лахин). # +# All rights reserved. # +################################################################################ + +imports_granularity = "Crate" +group_imports = "StdExternalCrate" +imports_layout = "HorizontalVertical"