diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml new file mode 100644 index 0000000..11a4285 --- /dev/null +++ b/.github/workflows/publish-to-pypi.yml @@ -0,0 +1,36 @@ +name: Publish Python distributions to PyPI + +on: push + +jobs: + build-n-publish: + name: Build and publish Python distributions to PyPI + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@master + - name: Set up Python 3.9 + uses: actions/setup-python@v1 + with: + python-version: 3.9 + + - name: Install pypa/build + run: >- + python -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: >- + python -m + build + --sdist + --wheel + --outdir dist/ + . + + - name: Publish distribution to PyPI + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@master + with: + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index ca9d5f7..1e4255f 100644 --- a/.gitignore +++ b/.gitignore @@ -159,5 +159,70 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ + +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/dictionaries +.idea/**/shelf + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# CMake +cmake-build-debug/ +cmake-build-release/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + # trained models models/ + +*.xlsx +.vscode +/test/SCEUA_*.csv +/test/SCEUA_* +/hydromodel/app/*.csv +/test/test_data_camels_cc.py +/example/* +/results/ +/.hydrodataset*/ diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..8b0c447 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,23 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-20.04 + tools: + python: "3.9" + # You can also specify other tool versions: + # nodejs: "16" + # rust: "1.55" + # golang: "1.17" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# If using Sphinx, optionally build your docs in additional formats such as PDF +# formats: +# - pdf \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..57fdc18 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include XXX *.csv *.txt # 打包需包含csv、txt为后缀的文件;XXX为包名 \ No newline at end of file diff --git a/definitions.py b/definitions.py new file mode 100644 index 0000000..b45dc23 --- /dev/null +++ b/definitions.py @@ -0,0 +1,18 @@ +""" +Author: Wenyu Ouyang +Date: 2021-07-26 08:51:23 +LastEditTime: 2022-11-16 18:47:10 +LastEditors: Wenyu Ouyang +Description: some configs for hydro-model-xaj +FilePath: \hydro-model-xaj\definitions.py +Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved. +""" +import os +from pathlib import Path + +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) # This is your Project Root +path = Path(ROOT_DIR) +DATASET_DIR = os.path.join(path.parent.parent.absolute(), "data") +print("Please Check your directory:") +print("ROOT_DIR of the repo: ", ROOT_DIR) +print("DATASET_DIR of the repo: ", DATASET_DIR) diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..061f32f --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..e0a1d29 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,53 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys + +sys.path.insert(0, os.path.abspath("../..")) + +# -- Project information ----------------------------------------------------- + +project = "hydro-model-xaj" +copyright = "2021, Ouyang,Wenyu" +author = "Ouyang,Wenyu" + +# The full version, including alpha/beta/rc tags +release = "0.0.1" + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +# Add napoleon to the extensions list +extensions = ["sphinx.ext.napoleon"] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "alabaster" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] diff --git a/docs/source/hydromodel.app.rst b/docs/source/hydromodel.app.rst new file mode 100644 index 0000000..c38edd1 --- /dev/null +++ b/docs/source/hydromodel.app.rst @@ -0,0 +1,21 @@ +hydromodel.app package +====================== + +Submodules +---------- + +hydromodel.app.calibrate\_xaj module +------------------------------------ + +.. automodule:: hydromodel.app.calibrate_xaj + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: hydromodel.app + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/hydromodel.calibrate.rst b/docs/source/hydromodel.calibrate.rst new file mode 100644 index 0000000..5afd543 --- /dev/null +++ b/docs/source/hydromodel.calibrate.rst @@ -0,0 +1,37 @@ +hydromodel.calibrate package +============================ + +Submodules +---------- + +hydromodel.calibrate.calibrate\_ga module +----------------------------------------- + +.. automodule:: hydromodel.calibrate.calibrate_ga + :members: + :undoc-members: + :show-inheritance: + +hydromodel.calibrate.calibrate\_sceua module +-------------------------------------------- + +.. automodule:: hydromodel.calibrate.calibrate_sceua + :members: + :undoc-members: + :show-inheritance: + +hydromodel.calibrate.stat module +-------------------------------- + +.. automodule:: hydromodel.calibrate.stat + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: hydromodel.calibrate + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/hydromodel.models.rst b/docs/source/hydromodel.models.rst new file mode 100644 index 0000000..713d8f0 --- /dev/null +++ b/docs/source/hydromodel.models.rst @@ -0,0 +1,45 @@ +hydromodel.models package +========================= + +Submodules +---------- + +hydromodel.models.gr4j module +----------------------------- + +.. automodule:: hydromodel.models.gr4j + :members: + :undoc-members: + :show-inheritance: + +hydromodel.models.hymod module +------------------------------ + +.. automodule:: hydromodel.models.hymod + :members: + :undoc-members: + :show-inheritance: + +hydromodel.models.xaj module +---------------------------- + +.. automodule:: hydromodel.models.xaj + :members: + :undoc-members: + :show-inheritance: + +hydromodel.models.xaj\_river module +----------------------------------- + +.. automodule:: hydromodel.models.xaj_river + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: hydromodel.models + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/hydromodel.rst b/docs/source/hydromodel.rst new file mode 100644 index 0000000..6a7018e --- /dev/null +++ b/docs/source/hydromodel.rst @@ -0,0 +1,22 @@ +hydromodel package +================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hydromodel.app + hydromodel.calibrate + hydromodel.models + hydromodel.utils + hydromodel.visual + +Module contents +--------------- + +.. automodule:: hydromodel + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/hydromodel.utils.rst b/docs/source/hydromodel.utils.rst new file mode 100644 index 0000000..91c6a05 --- /dev/null +++ b/docs/source/hydromodel.utils.rst @@ -0,0 +1,21 @@ +hydromodel.utils package +======================== + +Submodules +---------- + +hydromodel.utils.hydro\_utils module +------------------------------------ + +.. automodule:: hydromodel.utils.hydro_utils + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: hydromodel.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/hydromodel.visual.rst b/docs/source/hydromodel.visual.rst new file mode 100644 index 0000000..2831c81 --- /dev/null +++ b/docs/source/hydromodel.visual.rst @@ -0,0 +1,21 @@ +hydromodel.visual package +========================= + +Submodules +---------- + +hydromodel.visual.pyspot\_plots module +-------------------------------------- + +.. automodule:: hydromodel.visual.pyspot_plots + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: hydromodel.visual + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/img/xaj.jpg b/docs/source/img/xaj.jpg new file mode 100644 index 0000000..2a3f659 Binary files /dev/null and b/docs/source/img/xaj.jpg differ diff --git a/docs/source/img/xaj_.jpg b/docs/source/img/xaj_.jpg new file mode 100644 index 0000000..ea29adf Binary files /dev/null and b/docs/source/img/xaj_.jpg differ diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..eaebc8b --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,20 @@ +.. hydro-model-xaj documentation master file, created by + sphinx-quickstart on Fri Dec 10 08:29:52 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to hydro-model-xaj's documentation! +=========================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + modules + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000..ca1bdf5 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +hydromodel +========== + +.. toctree:: + :maxdepth: 4 + + hydromodel diff --git a/environment-dev.yml b/environment-dev.yml new file mode 100644 index 0000000..81764f5 --- /dev/null +++ b/environment-dev.yml @@ -0,0 +1,21 @@ +name: xaj-dev +channels: + - conda-forge + - defaults +dependencies: + - python=3.10 + - ipykernel + - numpy + - numba + - pandas + - scikit-learn + - deap + - spotpy=1.5.14 + - seaborn + - tqdm + - sphinx + - pytest + - black + - pip + - pip: + - hydrodataset diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..02544ac --- /dev/null +++ b/environment.yml @@ -0,0 +1,19 @@ +name: xaj +channels: + - conda-forge + - defaults +dependencies: + - python=3.10 + - ipykernel + - numpy + - numba + - pandas + - scikit-learn + - deap + - spotpy=1.5.14 + - seaborn + - tqdm + - pytest + - pip + - pip: + - hydrodataset diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..98e257b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +numpy~=1.24.4 +pandas~=2.1.0 +bmipy~=2.0 +numba~=0.57.1 +scipy~=1.11.2 +pyyaml~=6.0.1 +deap~=1.4.1 +tqdm~=4.66.1 +spotpy~=1.5.14 +pytest~=7.4.2 +geopandas~=0.14.0 +sqlalchemy~=2.0.21 +shapely~=2.0.1 +setuptools~=68.2.2 +hydrodataset~=0.1.3 +scikit-learn~=1.3.0 +requests~=2.31.0 +grpc4bmi~=0.4.0 +pyogrio +cdsapi +whitebox +hydromodel-calibrate-base +xgboost \ No newline at end of file diff --git a/runbmiserver.py b/runbmiserver.py new file mode 100644 index 0000000..3148dda --- /dev/null +++ b/runbmiserver.py @@ -0,0 +1,11 @@ + +# import grpc +# from grpc4bmi.bmi_grpc_client import BmiClient + +# mymodel = BmiClient(grpc.insecure_channel("localhost:5000")) +# print(mymodel.get_component_name()) + +#不用跑runbmiserver,BmiClientSubProcess函数包含run-bmi-server +from grpc4bmi.bmi_client_subproc import BmiClientSubProcess +mymodel = BmiClientSubProcess(path = "/home/wangjingyi/code/hydro-model-xaj",module_name = "xaj.xaj_bmi.xajBmi") +print(mymodel.get_component_name()) \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..29b5c04 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- + +from setuptools import setup, find_packages + +setup( + name="", # 输入项目名称 + version="", # 输入版本号 + keywords=[""], # 输入关键词 + description="", # 输入概述 + long_description="", # 输入描述 + + url="", # 输入项目Github仓库的链接 + author="", # 输入作者名字 + author_email="", # 输入作者邮箱 + license="", # 此为声明文件,一般填写 MIT_license + + packages=find_packages(), + include_package_data=True, + platforms="any", + install_requires=[""], # 输入项目所用的包 + python_requires='>= ', # Python版本要求 +) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/runxaj.yaml b/test/runxaj.yaml new file mode 100644 index 0000000..909e352 --- /dev/null +++ b/test/runxaj.yaml @@ -0,0 +1,43 @@ +# xaj model configuration +# The current model runs on the daily time step + start_time_str: "1990-01-01" + end_time_str: "2009-12-31" + time_units: "days" + # The current model runs on the hourly time step + # basin_area: "66400" + # start_time_str: "2021-06-01 00:00:00" + # end_time_str: "2021-08-31 23:00:00" + # time_units: "hours" +# initial condition + # forcing_data: "/example/01013500_lump_p_pe_q.txt" + # json_file: "/example/data_info.json" + # npy_file: "/example/basins_lump_p_pe_q.npy" + warmup_length: 24 + basin_area: 2097 + train_period: ["1990-01-01", "2000-12-31"] + test_period: ["2001-01-01", "2009-12-31"] + period: ["1990-01-01", "2009-12-31"] + cv_fold: 1 + algorithm: "SCE_UA" + +#model_info + model_name: "xaj_mz" + source_type: "sources" + source_book: "HF" + +#algorithm_SCE_UA + # algorithm_name: "SCE_UA" + # random_seed: 1234 + # rep: 2 + # ngs: 2 + # kstop: 1 + # peps: 0.001 + # pcento: 0.001 +#algorithm_GA + algorithm_name: "GA" + random_seed: 1234 + run_counts: 3 + pop_num: 50 + cross_prob: 0.5 + mut_prob: 0.5 + save_freq: 1 \ No newline at end of file diff --git a/test/test_clear_biliu_history_data.py b/test/test_clear_biliu_history_data.py new file mode 100644 index 0000000..9dce809 --- /dev/null +++ b/test/test_clear_biliu_history_data.py @@ -0,0 +1,49 @@ +import os.path + +import pandas as pd + +import definitions + + +def test_clear_biliu_history_data(): + history_path = os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data') + st_rain_0_df = pd.read_csv(os.path.join(history_path, 'st_rain_c.CSV'), engine='c') + st_rain_1_df = pd.read_csv(os.path.join(history_path, 'st_rain_c_1.CSV'), names=st_rain_0_df.columns, engine='c') + st_rain_2_df = pd.read_csv(os.path.join(history_path, 'st_rain_c_2.CSV'), names=st_rain_0_df.columns, engine='c') + st_rain_3_df = pd.read_csv(os.path.join(history_path, 'st_rain_c_3.CSV'), names=st_rain_0_df.columns, engine='c') + st_rain_df = pd.concat([st_rain_0_df, st_rain_1_df, st_rain_2_df, st_rain_3_df], axis=0).reset_index().drop(columns=['index', + 'collecttime']) + st_water_0_df = pd.read_csv(os.path.join(history_path, 'st_water_c.CSV'), engine='c') + st_water_1_df = pd.read_csv(os.path.join(history_path, 'st_water_c_1.CSV'), names=st_water_0_df.columns, engine='c') + st_water_df = pd.concat([st_water_0_df, st_water_1_df], axis=0).reset_index().drop(columns=['index', 'collecttime']) + stpara_df = pd.read_csv(os.path.join(history_path, 'st_stpara_r.CSV'), engine='c', encoding='gbk') + splited_path = os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data/history_data_splited') + for para_id in stpara_df['paraid']: + para_name = (stpara_df['paraname'][stpara_df['paraid'] == para_id]).values[0] + if para_name != '电压': + rain_para_df = st_rain_df[st_rain_df['paraid'] == para_id] + if len(rain_para_df) > 0: + stid = (stpara_df['stid'][stpara_df['paraid'] == para_id]).values[0] + rain_para_df.to_csv(os.path.join(splited_path, str(stid)+'_'+str(para_name)+'.csv')) + water_para_df = st_water_df[st_water_df['paraid'] == para_id] + if len(water_para_df) > 0: + stid = (stpara_df['stid'][stpara_df['paraid'] == para_id]).values[0] + water_para_df.to_csv(os.path.join(splited_path, str(stid)+'_'+str(para_name)+'.csv')) + + +def test_resample_biliu_data(): + splited_path = os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data/history_data_splited') + splited_hourly_path = os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data/history_data_splited_hourly') + for dir_name, sub_dir, files in os.walk(splited_path): + for file in files: + csv_path = os.path.join(splited_path, file) + prcp_df = pd.read_csv(csv_path, engine='c', parse_dates=['systemtime']).set_index(['systemtime']).drop(columns=['Unnamed: 0']) + if '雨量' in file: + # resample的时间点是向下取整,例如4:30和4:40的数据会被整合到4:00 + sum_series = prcp_df['paravalue'].resample('H').sum() + sum_df = pd.DataFrame({'paravalue': sum_series}) + sum_df.to_csv(os.path.join(splited_hourly_path, file.split('.')[0]+'_hourly.csv')) + if '水位' in file: + mean_series = prcp_df['paravalue'].resample('H').mean() + mean_df = pd.DataFrame({'paravalue': mean_series}) + mean_df.to_csv(os.path.join(splited_hourly_path, file.split('.')[0]+'_hourly.csv')) diff --git a/test/test_cmp_rain_datas_vision.py b/test/test_cmp_rain_datas_vision.py new file mode 100644 index 0000000..dc01993 --- /dev/null +++ b/test/test_cmp_rain_datas_vision.py @@ -0,0 +1,52 @@ +import os.path +import pathlib + +import matplotlib.pyplot as plt +import pandas as pd + +import definitions + + +def test_compare_rain_by_pics(): + origin_rain_pics = os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data/origin_biliu_pics') + filtered_by_time_pics = os.path.join(definitions.ROOT_DIR, 'example/filtered_rain_pics_by_time') + filtered_by_space_pics = os.path.join(definitions.ROOT_DIR, 'example/filtered_rain_pics_by_space') + biliu_rain_path = os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data/history_data_splited_hourly') + for dir_name, sub_dirs, files in os.walk(biliu_rain_path): + for file in files: + if '水位' not in file: + csv_path = os.path.join(biliu_rain_path, file) + df = pd.read_csv(csv_path, engine='c') + df.plot(x='systemtime', y='paravalue', xlabel='time', ylabel='rain') + plt.savefig(os.path.join(origin_rain_pics, file.split('.')[0]+'.png')) + sl_rain_path = os.path.join(definitions.ROOT_DIR, 'example/rain_datas') + for dir_name, sub_dirs, files in os.walk(sl_rain_path): + for file in files: + csv_path = os.path.join(sl_rain_path, file) + df = pd.read_csv(csv_path, engine='c') + df.plot(x='TM', y='DRP', xlabel='time', ylabel='rain') + plt.savefig(os.path.join(origin_rain_pics, file.split('.')[0]+'.png')) + total_rain_filtered_by_time_path = os.path.join(definitions.ROOT_DIR, 'example/filtered_data_by_time') + for dir_name, sub_dirs, files in os.walk(total_rain_filtered_by_time_path): + for file in files: + csv_path = os.path.join(total_rain_filtered_by_time_path, file) + df = pd.read_csv(csv_path, engine='c') + if ('systemtime' in df.columns) & ('paravalue' in df.columns): + df.plot(x='systemtime', y='paravalue', xlabel='time', ylabel='rain') + elif ('TM' in df.columns) & ('DRP' in df.columns): + df.plot(x='TM', y='DRP', xlabel='time', ylabel='rain') + plt.savefig(os.path.join(filtered_by_time_pics, file.split('.')[0]+'.png')) + total_rain_filtered_by_space_path = os.path.join(definitions.ROOT_DIR, 'example/filtered_rain_between_sl_biliu') + for dir_name, sub_dirs, files in os.walk(total_rain_filtered_by_space_path): + for file in files: + csv_path = os.path.join(total_rain_filtered_by_space_path, file) + df = pd.read_csv(csv_path, engine='c') + plt.xlabel('time') + plt.ylabel('rain') + if ('systemtime' in df.columns) & ('paravalue' in df.columns): + df.plot(x='systemtime', y='paravalue', xlabel='time', ylabel='rain') + elif ('TM' in df.columns) & ('DRP' in df.columns): + df.plot(x='TM', y='DRP', xlabel='time', ylabel='rain') + plt.savefig(os.path.join(filtered_by_space_pics, file.split('_')[0]+'.png')) + + diff --git a/test/test_cmp_sl_era_rain.py b/test/test_cmp_sl_era_rain.py new file mode 100644 index 0000000..c4f61ea --- /dev/null +++ b/test/test_cmp_sl_era_rain.py @@ -0,0 +1,123 @@ +import datetime +import os + +import numpy as np +import pandas as pd +import xarray as xr +from geopandas import GeoDataFrame +from shapely import Point +import geopandas as gpd + +import definitions + +gdf_biliu_shp: GeoDataFrame = gpd.read_file(os.path.join(definitions.ROOT_DIR, 'example/biliuriver_shp' + '/碧流河流域.shp'), engine='pyogrio') + + +# step1: kgo8gd/tnld77/tdi9atr3mir1e3g6 +def test_compare_era5_biliu_yr(): + rain_path = os.path.join(definitions.ROOT_DIR, 'example/rain_datas/') + sl_dict = {} + for root, dirs, files in os.walk(rain_path): + for file in files: + stcd = file.split('_')[0] + rain_table = pd.read_csv(os.path.join(rain_path, file), engine='c', parse_dates=['TM']) + file_yr_list = [] + for year in range(2018, 2023): + rain_sum_yr = rain_table['DRP'][rain_table['TM'].dt.year == year].sum() + file_yr_list.append(rain_sum_yr) + sl_dict[stcd] = file_yr_list + gdf_rain_stations = intersect_rain_stations().reset_index() + rain_coords = [(point.x, point.y) for point in gdf_rain_stations.geometry] + era5_dict = get_era5_history_dict(rain_coords, stcd_array=gdf_rain_stations['STCD']) + sl_df = pd.DataFrame(sl_dict, index=np.arange(2018, 2023, 1)).T + era5_df = pd.DataFrame(era5_dict, index=np.arange(2018, 2023, 1)).T + sl_np = sl_df.to_numpy() + era5_np = era5_df.to_numpy() + diff_np = np.round((era5_np - sl_np) / sl_np, 3) + diff_df = pd.DataFrame(data=diff_np, index=sl_df.index, columns=np.arange(2018, 2023, 1)) + sl_df.to_csv(os.path.join(definitions.ROOT_DIR, 'example/sl.csv')) + era5_df.to_csv(os.path.join(definitions.ROOT_DIR, 'example/era5_sl.csv')) + diff_df.to_csv(os.path.join(definitions.ROOT_DIR, 'example/era5_sl_diff.csv')) + + +def test_cmp_biliu_era_rain(): + history_rain_path = os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data/history_data_splited_hourly') + biliu_dict = {} + for root, dirs, files in os.walk(history_rain_path): + for file in files: + stcd = file.split('_')[0] + if '雨量' in file: + rain_table = pd.read_csv(os.path.join(history_rain_path, file), engine='c', parse_dates=['systemtime']) + file_yr_list = [] + for year in range(2018, 2023): + rain_sum_yr = rain_table['paravalue'][rain_table['systemtime'].dt.year == year].sum() + file_yr_list.append(rain_sum_yr) + biliu_dict[stcd] = file_yr_list + biliu_stas = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data/st_stbprp_b.CSV'), encoding='gbk') + # 在碧流河历史数据st_stbprp_b中,雨量站的sttp是2,水位站是1 + # 玉石水库(152)在st_water_c表中没有数据,故将其从st_stpara_r.CSV、st_stbprp_b.CSV剔除 + # 小宋屯(138)在st_stpara_r同时作为雨量站和水位站存在,但是在st_stbprb_b中只有一个水位站 + # 方便起见将其在st_stbprb_b.CSV中的sttp改成2 + stcd_array = biliu_stas['stid'][biliu_stas['sttp'] == 2].tolist() + biliu_lons = biliu_stas['lgtd'][biliu_stas['sttp'] == 2].reset_index()['lgtd'] + biliu_lats = biliu_stas['lttd'][biliu_stas['sttp'] == 2].reset_index()['lttd'] + rain_coords = [(biliu_lons[i], biliu_lats[i]) for i in range(0, len(stcd_array))] + era5_dict = get_era5_history_dict(rain_coords, stcd_array) + biliu_df = pd.DataFrame(biliu_dict, index=np.arange(2018, 2023, 1)).T + era5_df = pd.DataFrame(era5_dict, index=np.arange(2018, 2023, 1)).T + biliu_np = biliu_df.to_numpy() + era5_np = era5_df.to_numpy() + diff_np = np.round((era5_np - biliu_np) / biliu_np, 3) + diff_df = pd.DataFrame(data=diff_np, index=biliu_df.index, columns=np.arange(2018, 2023, 1)) + biliu_df.to_csv(os.path.join(definitions.ROOT_DIR, 'example/biliu.csv')) + era5_df.to_csv(os.path.join(definitions.ROOT_DIR, 'example/era5_biliu.csv')) + diff_df.to_csv(os.path.join(definitions.ROOT_DIR, 'example/era5_biliu_diff.csv')) + + +def get_era5_history_dict(rain_coords, stcd_array): + era_path = os.path.join(definitions.ROOT_DIR, 'example/era5_xaj/') + rain_round_coords = [(round(coord[0], 1), round(coord[1], 1)) for coord in rain_coords] + era5_dict = {} + for i in range(0, len(rain_round_coords)): + stcd = stcd_array[i] + coord = rain_round_coords[i] + year_sum_list = [] + for year in range(2018, 2023): + year_sum = 0 + for month in range(4, 11): + if month < 10: + path_era_file = os.path.join(era_path, 'era5_datas_' + str(year) + str(0) + str(month) + '.nc') + else: + path_era_file = os.path.join(era_path, 'era5_datas_' + str(year) + str(month) + '.nc') + era_ds = xr.open_dataset(path_era_file) + # tp在era5数据中代表总降雨 + month_rain = era_ds.sel(longitude=coord[0], latitude=coord[1])['tp'] + # 在这里有日期误差(每天0点数据是昨天一天的累积),但涉及到一年尺度,误差不大,可以容忍 + month_rain_daily = month_rain.loc[month_rain.time.dt.time == datetime.time(0, 0)] + # era5数据单位是m,所以要*1000 + month_rain_sum = (month_rain_daily.sum().to_numpy()) * 1000 + year_sum += month_rain_sum + year_sum_list.append(year_sum) + era5_dict[stcd] = year_sum_list + return era5_dict + + +def intersect_rain_stations(): + pp_df = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/rain_stations.csv'), engine='c').drop( + columns=['Unnamed: 0']) + geo_list = [] + stcd_list = [] + stnm_list = [] + for i in range(0, len(pp_df)): + xc = pp_df['LGTD'][i] + yc = pp_df['LTTD'][i] + stcd_list.append(pp_df['STCD'][i]) + stnm_list.append(pp_df['STNM'][i]) + geo_list.append(Point(xc, yc)) + gdf_pps: GeoDataFrame = gpd.GeoDataFrame({'STCD': stcd_list, 'STNM': stnm_list}, geometry=geo_list) + gdf_rain_stations = gpd.sjoin(gdf_pps, gdf_biliu_shp, 'inner', 'intersects') + gdf_rain_stations = gdf_rain_stations[~(gdf_rain_stations['STCD'] == '21422950')] + gdf_rain_stations.to_file( + os.path.join(definitions.ROOT_DIR, 'example/biliuriver_shp/biliu_basin_rain_stas.shp')) + return gdf_rain_stations diff --git a/test/test_compare_biliu_songliao.py b/test/test_compare_biliu_songliao.py new file mode 100644 index 0000000..9a24b53 --- /dev/null +++ b/test/test_compare_biliu_songliao.py @@ -0,0 +1,60 @@ +import os +import pandas as pd +from geopandas import GeoDataFrame + +import definitions +import geopandas as gpd +from matplotlib import pyplot as plt + + +def test_compare_bs_average(): + # 选取金店、桂云花、天益、转山湖、大姜屯 + test_dict = {'4002': '21423132', '4003': '21423100', '4006': '21423000', + '4010': '21423050', '4015': '21422600'} + gdf_biliu_shp: GeoDataFrame = gpd.read_file(os.path.join(definitions.ROOT_DIR, 'example/biliuriver_shp' + '/碧流河流域.shp'), engine='pyogrio') + biliu_series_dict = {} + sl_series_dict = {} + start_date = '2022-08-07' + end_date = '2022-08-25' + test_range = pd.date_range(start_date, end_date, freq='D') + test_range_time = pd.date_range('2022-08-07 00:00:00', '2022-08-25 00:00:00', freq='D') + exam_biliu_data = os.path.join(definitions.ROOT_DIR, 'example/biliu_rain_daily_datas') + exam_sl_data = os.path.join(definitions.ROOT_DIR, 'example/songliao_exam_stas') + for key in test_dict.keys(): + biliu_rain = pd.read_csv(os.path.join(exam_biliu_data, key+'_biliu_rain.csv'), engine='c', parse_dates=['InsertTime']) + sl_rain = pd.read_csv(os.path.join(exam_sl_data, test_dict[key]+'_rain.csv'), engine='c', parse_dates=['TM']) + biliu_rain['InsertTime'] = biliu_rain['InsertTime'].dt.date + biliu_rain = biliu_rain.set_index('InsertTime') + sl_rain = sl_rain.set_index('TM') + biliu_rain_list = biliu_rain.loc[test_range, 'Rainfall'].to_list() + sl_time_list = sl_rain.loc[test_range_time, 'DRP'].fillna(0).to_list() + biliu_series_dict[key] = biliu_rain_list + sl_series_dict[test_dict[key]] = sl_time_list + biliu_df = pd.DataFrame(biliu_series_dict) + sl_df = pd.DataFrame(sl_series_dict) + voronoi_gdf = gpd.read_file(os.path.join(definitions.ROOT_DIR, 'example/biliuriver_shp/碧流河流域_副本.shp')) + voronoi_gdf = voronoi_gdf.set_index('STCD') + stcd_area_dict = {} + for stcd in test_dict.values(): + polygon = voronoi_gdf.loc[stcd]['geometry'] + area = polygon.area*12100 + stcd_area_dict[stcd] = area + rain_aver_list_biliu = [] + for i in range(0, len(biliu_df)): + rain_aver = 0 + for stcd in biliu_df.columns: + rain_aver += (biliu_df.iloc[i])[stcd] * stcd_area_dict[test_dict[stcd]]/gdf_biliu_shp['area'][0] + rain_aver_list_biliu.append(rain_aver) + rain_aver_list_sl = [] + for i in range(0, len(sl_df)): + rain_aver = 0 + for stcd in sl_df.columns: + rain_aver += (sl_df.iloc[i])[stcd] * stcd_area_dict[stcd]/gdf_biliu_shp['area'][0] + rain_aver_list_sl.append(rain_aver) + result = pd.DataFrame({'Date': test_range, 'Biliu': rain_aver_list_biliu, 'SL': rain_aver_list_sl}, columns=['Date', 'Biliu', 'SL']) + # result = result.set_index('Date') + result.plot(marker='o') + plt.xlabel('Date') + plt.ylabel('Rainfall') + plt.show() \ No newline at end of file diff --git a/test/test_data.py b/test/test_data.py new file mode 100644 index 0000000..842fa86 --- /dev/null +++ b/test/test_data.py @@ -0,0 +1,79 @@ +""" +Author: Wenyu Ouyang +Date: 2022-10-25 21:16:22 +LastEditTime: 2022-11-28 14:50:30 +LastEditors: Wenyu Ouyang +Description: Test for data preprocess +FilePath: \hydro-model-xaj\test\test_data.py +Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved. +""" +import os +from collections import OrderedDict + +import numpy as np +import pandas as pd +import pytest + +import definitions +from hydromodel.utils import hydro_utils + + +@pytest.fixture() +def txt_file(): + return os.path.join( + definitions.ROOT_DIR, "hydromodel", "example", "01013500_lump_p_pe_q.txt" + ) + + +@pytest.fixture() +def json_file(): + return os.path.join(definitions.ROOT_DIR, "hydromodel", "example", "data_info.json") + + +@pytest.fixture() +def npy_file(): + return os.path.join( + definitions.ROOT_DIR, "hydromodel", "example", "basins_lump_p_pe_q.npy" + ) + + +def test_save_data(txt_file, json_file, npy_file): + data = pd.read_csv(txt_file) + print(data.columns) + # Note: The units are all mm/day! For streamflow, data is divided by basin's area + variables = ["prcp(mm/day)", "petfao56(mm/day)", "streamflow(mm/day)"] + data_info = OrderedDict( + { + "time": data["date"].values.tolist(), + "basin": ["01013500"], + "variable": variables, + "area": [2252.7], + } + ) + hydro_utils.serialize_json(data_info, json_file) + # 1 ft3 = 0.02831685 m3 + ft3tom3 = 2.831685e-2 + # 1 km2 = 10^6 m2 + km2tom2 = 1e6 + # 1 m = 1000 mm + mtomm = 1000 + # 1 day = 24 * 3600 s + daytos = 24 * 3600 + # trans ft3/s to mm/day + basin_area = 2252.7 + data[variables[-1]] = ( + data[["streamflow(ft3/s)"]].values + * ft3tom3 + / (basin_area * km2tom2) + * mtomm + * daytos + ) + df = data[variables] + hydro_utils.serialize_numpy(np.expand_dims(df.values, axis=1), npy_file) + + +def test_load_data(txt_file, npy_file): + data_ = pd.read_csv(txt_file) + df = data_[["prcp(mm/day)", "petfao56(mm/day)"]] + data = hydro_utils.unserialize_numpy(npy_file)[:, :, :2] + np.testing.assert_array_equal(data, np.expand_dims(df.values, axis=1)) diff --git a/test/test_draw_sessions.py b/test/test_draw_sessions.py new file mode 100644 index 0000000..e75d0fd --- /dev/null +++ b/test/test_draw_sessions.py @@ -0,0 +1,49 @@ + + +def test_plot_sessions(): + ''' + wjy_calibrate_time = pd.read_excel(os.path.join(definitions.ROOT_DIR, 'example/洪水率定时间.xlsx')) + wjy_calibrate_time['starttime'] = pd.to_datetime(wjy_calibrate_time['starttime'], format='%Y/%m/%d %H:%M:%S') + wjy_calibrate_time['endtime'] = pd.to_datetime(wjy_calibrate_time['endtime'], format='%Y/%m/%d %H:%M:%S') + for i in range(14, 25): + start_time = wjy_calibrate_time['starttime'][i] + end_time = wjy_calibrate_time['endtime'][i] + x = pd.date_range(start_time, end_time, freq='H') + fig, ax = plt.subplots(figsize=(9, 6)) + p = ax.twinx() + filtered_rain_aver_df.index = pd.to_datetime(filtered_rain_aver_df.index) + flow_mm_h.index = pd.to_datetime(flow_mm_h.index) + y_rain = filtered_rain_aver_df[start_time: end_time] + y_flow = flow_mm_h[start_time:end_time] + ax.bar(x, y_rain.to_numpy().flatten(), color='red', edgecolor='k', alpha=0.6, width=0.04) + ax.set_ylabel('rain(mm)') + ax.invert_yaxis() + p.plot(x, y_flow, color='green', linewidth=2) + p.set_ylabel('flow(mm/h)') + plt.savefig(os.path.join(definitions.ROOT_DIR, 'example/rain_flow_event_'+str(start_time).split(' ')[0]+'_wy.png')) + # XXX_FLOW 和 XXX_RAIN 长度不同,原因暂时未知,可能是数据本身问题(如插值导致)或者单位未修整 + plt.figure() + x = time + rain_event_array = np.zeros(shape=len(time)) + flow_event_array = np.zeros(shape=len(time)) + for i in range(0, len(BEGINNING_RAIN)): + rain_event = filtered_rain_aver_df['rain'][BEGINNING_RAIN[i]: END_RAIN[i]] + beginning_index = np.argwhere(time == BEGINNING_RAIN[i])[0][0] + end_index = np.argwhere(time == END_RAIN[i])[0][0] + rain_event_array[beginning_index: end_index + 1] = rain_event + for i in range(0, len(BEGINNING_FLOW)): + flow_event = flow_mm_h[BEGINNING_FLOW[i]: END_FLOW[i]] + beginning_index = np.argwhere(time == BEGINNING_FLOW[i])[0][0] + end_index = np.argwhere(time == END_FLOW[i])[0][0] + flow_event_array[beginning_index: end_index + 1] = flow_event + y_rain = rain_event_array + y_flow = flow_event_array + fig, ax = plt.subplots(figsize=(16, 12)) + p = ax.twinx() + ax.bar(x, y_rain, color='red', alpha=0.6) + ax.set_ylabel('rain(mm)') + ax.invert_yaxis() + p.plot(x, y_flow, color='green', linewidth=2) + p.set_ylabel('flow(mm/h)') + plt.savefig(os.path.join(definitions.ROOT_DIR, 'example/rain_flow_events.png')) + ''' \ No newline at end of file diff --git a/test/test_era5_pet.py b/test/test_era5_pet.py new file mode 100644 index 0000000..e885c74 --- /dev/null +++ b/test/test_era5_pet.py @@ -0,0 +1,82 @@ +import os +from datetime import datetime + +import cdsapi +import calendar + +import numpy as np +import pandas as pd +import xarray as xr + +import definitions + +c = cdsapi.Client() # 创建用户 + +# 数据信息字典 +dic = { + 'product_type': 'reanalysis-era5-land', # 产品类型 + 'format': 'netcdf', # 数据格式 + 'variable': 'potential_evaporation', # 变量名称 + 'year': '', # 年,设为空 + 'month': '', # 月,设为空 + 'day': [], # 日,设为空 + 'time': [ # 小时 + '00:00', '01:00', '02:00', '03:00', '04:00', '05:00', + '06:00', '07:00', '08:00', '09:00', '10:00', '11:00', + '12:00', '13:00', '14:00', '15:00', '16:00', '17:00', + '18:00', '19:00', '20:00', '21:00', '22:00', '23:00' + ], + 'area': [41, 122, 39, 123] +} + + +def test_download_era5(): + # 通过循环批量下载1979年到2020年所有月份数据 + for y in range(2013, 2024): # 遍历年 + for m in range(1, 13): # 遍历月 + day_num = calendar.monthrange(y, m)[1] # 根据年月,获取当月日数 + # 将年、月、日更新至字典中 + dic['year'] = str(y) + dic['month'] = str(m).zfill(2) + dic['day'] = [str(d).zfill(2) for d in range(1, day_num + 1)] + filename = os.path.join(definitions.ROOT_DIR, 'example/era5_data/', 'era5_datas_' + str(y) + str(m).zfill(2) + '.nc') # 文件存储路径 + c.retrieve('reanalysis-era5-land', dic, filename) + + +def test_average_pet(): + path = os.path.join(definitions.ROOT_DIR, 'example/era5_data/') + ds = xr.open_dataset(path + 'era5/201201.nc') + lats = ds['latitude'].values + lons = ds['longitude'].values + grid = pd.read_excel(path + 'grid.xlsx') + test = pd.DataFrame({'num': range(1, 34), 'latitude': np.nan, 'longitude': np.nan}) + test = test.astype(float) + for i in range(len(grid['lat'])): + test['latitude'][i] = np.where(lats == grid['lat'][i])[0][0] + test['longitude'][i] = np.where(lons == grid['lon'][i])[0][0] + filenames = ['era5/' + f for f in os.listdir(path + 'era5/')] + pev_ = pd.DataFrame({'num': range(1, 96433), 'pev': np.nan}) + pev_ = pev_.astype(float) + for i in range(len(test['num'])): + sum_time = 0 + for f in filenames: + ds = xr.open_dataset(path + f) + pev = ds['pev'].values + times = pd.to_datetime(ds['time'].values * 3600, origin='1900-01-01') + for t in range(len(times)): + pev_[t + sum_time]['pev'] = pev[test['longitude'][i], test['latitude'][i], t] * -100 + pev_[pev_ < 0] = 0 + sum_time += len(times) + pev_.to_excel(path + 'pet_calc/pev_' + + str(grid['lat'][i]) + '_' + str(grid['lon'][i]) + '.xlsx') + start = datetime(2012, 1, 1, 8) + end = datetime(2023, 1, 1, 7) + timesteps = pd.date_range(start, end, freq='H') + pre = pd.DataFrame(index=timesteps, columns=range(1, 34)) + for i in range(len(grid['FID'])): + data = pd.read_excel(path + 'pet_calc/pev_' + + str(grid['lat'][i]) + '_' + str(grid['lon'][i]) + '.xlsx') + pre.iloc[:, i] = data['pev'] + pev_jieguo = pd.DataFrame({'time': timesteps}) + pev_jieguo['pre'] = pre.mean(axis=1) + pev_jieguo.to_excel(path + 'pet_calc/PET_result.xlsx') diff --git a/test/test_filter_abnormal_and_basin_average.py b/test/test_filter_abnormal_and_basin_average.py new file mode 100644 index 0000000..b0903d9 --- /dev/null +++ b/test/test_filter_abnormal_and_basin_average.py @@ -0,0 +1,530 @@ +import os +import shutil + +import geopandas as gpd +import hydromodel.models.xaj +import numpy as np +import pandas as pd +import whitebox +from geopandas import GeoDataFrame +from hydromodel.calibrate.calibrate_ga import calibrate_by_ga +from hydromodel.utils.dmca_esr import step1_step2_tr_and_fluctuations_timeseries, step3_core_identification, \ + step4_end_rain_events, \ + step5_beginning_rain_events, step6_checks_on_rain_events, step7_end_flow_events, step8_beginning_flow_events, \ + step9_checks_on_flow_events, step10_checks_on_overlapping_events +from hydromodel.utils.stat import statRmse +from matplotlib import pyplot as plt +from pandas import DataFrame +from shapely import distance, Point + +import definitions + +sl_stas_table: GeoDataFrame = gpd.read_file( + os.path.join(definitions.ROOT_DIR, 'example/biliuriver_shp/biliu_basin_rain_stas.shp'), engine='pyogrio') +biliu_stas_table = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data/st_stbprp_b.CSV'), + encoding='gbk') +gdf_biliu_shp: GeoDataFrame = gpd.read_file(os.path.join(definitions.ROOT_DIR, 'example/biliuriver_shp/碧流河流域.shp'), + engine='pyogrio') +# 碧流河历史数据中,128、138、139、158号站点数据和era5数据出现较大偏差,舍去 +# 松辽委历史数据中,2022年站点数据和era5偏差较大,可能是4、5、9、10月缺测导致 +# 碧流河历史数据中,126、127、129、130、133、141、142、154出现过万极值,需要另行考虑或直接剔除 +# 134、137、143、144出现千级极值,需要再筛选 +filter_station_list = [128, 138, 139, 158] + + +class my: + data_dir = '.' + + @classmethod + def my_callback(cls, value): + if not "*" in value and not "%" in value: + print(value) + if "Elapsed Time" in value: + print('--------------') + + @classmethod + def my_callback_home(cls, value): + if not "*" in value and not "%" in value: + print(value) + if "Output file written" in value: + os.chdir(cls.data_dir) + + +def voronoi_from_shp(src, des, data_dir='.'): + my.data_dir = os.path.abspath(data_dir) + src = os.path.abspath(src) + des = os.path.abspath(des) + wbt = whitebox.WhiteboxTools() + wbt.voronoi_diagram(src, des, callback=my.my_callback) + + +def test_calc_filter_station_list(): + # 可以和之前比较的方法接起来而不是读csv + era5_sl_diff_df = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/era5_sl_diff.csv')).rename( + columns={'Unnamed: 0': 'STCD'}) + era5_biliu_diff_df = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/era5_biliu_diff.csv')).rename( + columns={'Unnamed: 0': 'STCD'}) + biliu_hourly_splited_path = os.path.join(definitions.ROOT_DIR, + 'example/biliu_history_data/history_data_splited_hourly') + biliu_hourly_filtered_path = os.path.join(definitions.ROOT_DIR, 'example/filtered_data_by_time') + sl_hourly_path = os.path.join(definitions.ROOT_DIR, 'example/rain_datas') + station_vari_dict = {} + station_vari_dict_by_time = {} + filter_station_list = [] + for i in range(0, len(era5_biliu_diff_df)): + if np.inf in era5_biliu_diff_df.iloc[i].to_numpy(): + filter_station_list.append(era5_biliu_diff_df['STCD'][i]) + for i in range(0, len(era5_sl_diff_df)): + if np.inf in era5_sl_diff_df.iloc[i].to_numpy(): + filter_station_list.append(era5_sl_diff_df['STCD'][i]) + for dir_name, sub_dirs, files in os.walk(biliu_hourly_splited_path): + for file in files: + stcd = file.split('_')[0] + csv_path = os.path.join(biliu_hourly_splited_path, file) + biliu_df = pd.read_csv(csv_path, engine='c') + if int(stcd) not in filter_station_list: + para_std = biliu_df['paravalue'].std() + para_aver = biliu_df['paravalue'].mean() + vari_corr = para_std / para_aver + station_vari_dict[stcd] = vari_corr + if vari_corr > 3: + filter_station_list.append(int(stcd)) + for dir_name, sub_dirs, files in os.walk(sl_hourly_path): + for file in files: + stcd = file.split('_')[0] + csv_path = os.path.join(sl_hourly_path, file) + sl_df = pd.read_csv(csv_path, engine='c') + if int(stcd) not in filter_station_list: + para_std = sl_df['DRP'].std() + para_aver = sl_df['DRP'].mean() + vari_corr = para_std / para_aver + station_vari_dict[stcd] = vari_corr + if vari_corr > 3: + filter_station_list.append(int(stcd)) + for dir_name, sub_dirs, files in os.walk(biliu_hourly_filtered_path): + for file in files: + stcd = file.split('.')[0] + csv_path = os.path.join(biliu_hourly_filtered_path, file) + data_df = pd.read_csv(csv_path, engine='c') + if int(stcd) not in filter_station_list: + if 'DRP' in data_df.columns: + para_std = data_df['DRP'].std() + para_aver = data_df['DRP'].mean() + vari_corr = para_std / para_aver + station_vari_dict_by_time[stcd] = vari_corr + if vari_corr > 3: + filter_station_list.append(int(stcd)) + elif 'paravalue' in data_df.columns: + para_std = data_df['paravalue'].std() + para_aver = data_df['paravalue'].mean() + vari_corr = para_std / para_aver + station_vari_dict_by_time[stcd] = vari_corr + if vari_corr > 3: + filter_station_list.append(int(stcd)) + print(filter_station_list) + print(station_vari_dict) + print(station_vari_dict_by_time) + return filter_station_list + + +def test_filter_abnormal_sl_and_biliu(): + biliu_his_stas_path = os.path.join(definitions.ROOT_DIR, 'example/biliu_history_data/history_data_splited_hourly') + sl_biliu_stas_path = os.path.join(definitions.ROOT_DIR, 'example/rain_datas') + time_df_dict_biliu_his = get_filter_data_by_time(biliu_his_stas_path, filter_station_list) + time_df_dict_sl_biliu = get_filter_data_by_time(sl_biliu_stas_path) + time_df_dict_sl_biliu.update(time_df_dict_biliu_his) + space_df_dict = get_filter_data_by_space(time_df_dict_sl_biliu, filter_station_list) + for key in space_df_dict.keys(): + space_df_dict[key].to_csv( + os.path.join(definitions.ROOT_DIR, 'example/filtered_rain_between_sl_biliu', key + '_filtered.csv')) + + +def get_filter_data_by_time(data_path, filter_list=None): + if filter_list is None: + filter_list = [] + time_df_dict = {} + test_filtered_by_time_path = os.path.join(definitions.ROOT_DIR, 'example/filtered_data_by_time') + for dir_name, sub_dir, files in os.walk(data_path): + for file in files: + stcd = file.split('_')[0] + feature = file.split('_')[1] + cached_csv_path = os.path.join(test_filtered_by_time_path, stcd + '.csv') + if (int(stcd) not in filter_list) & (~os.path.exists(cached_csv_path)) & (feature != '水位'): + drop_list = [] + csv_path = os.path.join(data_path, file) + table = pd.read_csv(csv_path, engine='c') + # 按降雨最大阈值为200和小时雨量一致性过滤索引 + # 松辽委数据不严格按照小时尺度排列,出于简单可以一概按照小时重采样 + if 'DRP' in table.columns: + table['TM'] = pd.to_datetime(table['TM'], format='%Y-%m-%d %H:%M:%S') + table = table.drop(columns=['Unnamed: 0']).drop(index=table.index[table['DRP'].isna()]) + # 21422722号站点中出现了2021-4-2 11:36的数据 + # 整小时数居,再按小时重采样求和,结果不变 + table = table.set_index('TM').resample('H').sum() + cached_time_array = table.index[table['STCD'] != 0].to_numpy() + cached_drp_array = table['DRP'][table['STCD'] != 0].to_numpy() + table['STCD'] = int(stcd) + table['DRP'] = np.nan + table['DRP'][cached_time_array] = cached_drp_array + table = table.fillna(-1).reset_index() + for i in range(0, len(table['DRP'])): + if table['DRP'][i] > 200: + drop_list.append(i) + if i >= 5: + hour_slice = table['DRP'][i - 5:i + 1].to_numpy() + if hour_slice.all() == np.mean(hour_slice): + drop_list.extend(list(range(i - 5, i + 1))) + drop_array = np.unique(np.array(drop_list, dtype=int)) + table = table.drop(index=drop_array) + drop_array_minus = table.index[table['DRP'] == -1] + table = table.drop(index=drop_array_minus) + if 'paravalue' in table.columns: + for i in range(0, len(table['paravalue'])): + if table['paravalue'][i] > 200: + drop_list.append(i) + if i >= 5: + hour_slice = table['paravalue'][i - 5:i + 1].to_numpy() + if hour_slice.all() == np.mean(hour_slice): + drop_list.extend(list(range(i - 5, i + 1))) + drop_array = np.unique(np.array(drop_list, dtype=int)) + table = table.drop(index=drop_array) + time_df_dict[stcd] = table + table.to_csv(cached_csv_path) + elif (int(stcd) not in filter_list) & (os.path.exists(cached_csv_path)) & (feature != '水位'): + table = pd.read_csv(cached_csv_path, engine='c') + time_df_dict[stcd] = table + return time_df_dict + + +def get_filter_data_by_space(time_df_dict, filter_list): + neighbor_stas_dict = find_neighbor_dict(sl_stas_table, biliu_stas_table, filter_list)[0] + gdf_stid_total = find_neighbor_dict(sl_stas_table, biliu_stas_table, filter_list)[1] + space_df_dict = {} + for key in time_df_dict: + time_drop_list = [] + neighbor_stas = neighbor_stas_dict[key] + table = time_df_dict[key] + if 'DRP' in table.columns: + table = table.set_index('TM') + if 'paravalue' in table.columns: + table = table.set_index('systemtime') + for time in table.index: + rain_time_dict = {} + for neighbor in neighbor_stas: + neighbor_df = time_df_dict[str(neighbor)] + if 'DRP' in neighbor_df.columns: + neighbor_df = neighbor_df.set_index('TM') + if time in neighbor_df.index: + rain_time_dict[str(neighbor)] = neighbor_df['DRP'][time] + if 'paravalue' in neighbor_df.columns: + neighbor_df = neighbor_df.set_index('systemtime') + if time in neighbor_df.index: + rain_time_dict[str(neighbor)] = neighbor_df['paravalue'][time] + if len(rain_time_dict) == 0: + continue + elif 0 < len(rain_time_dict) < 12: + weight_rain = 0 + weight_dis = 0 + for sta in rain_time_dict.keys(): + point = gdf_stid_total.geometry[gdf_stid_total['STCD'] == str(sta)].values[0] + point_self = gdf_stid_total.geometry[gdf_stid_total['STCD'] == str(key)].values[0] + dis = distance(point, point_self) + if 'DRP' in table.columns: + weight_rain += table['DRP'][time] / (dis ** 2) + weight_dis += 1 / (dis ** 2) + elif 'paravalue' in table.columns: + weight_rain += table['paravalue'][time] / (dis ** 2) + weight_dis += 1 / (dis ** 2) + interp_rain = weight_rain / weight_dis + if 'DRP' in table.columns: + if abs(interp_rain - table['DRP'][time]) > 4: + time_drop_list.append(time) + elif 'paravalue' in table.columns: + if abs(interp_rain - table['paravalue'][time]) > 4: + time_drop_list.append(time) + elif len(rain_time_dict) >= 12: + rain_time_series = pd.Series(rain_time_dict.values()) + quantile_25 = rain_time_series.quantile(q=0.25) + quantile_75 = rain_time_series.quantile(q=0.75) + average = rain_time_series.mean() + if 'DRP' in table.columns: + MA_Tct = (table['DRP'][time] - average) / (quantile_75 - quantile_25) + if MA_Tct > 4: + time_drop_list.append(time) + elif 'paravalue' in table.columns: + MA_Tct = (table['paravalue'][time] - average) / (quantile_75 - quantile_25) + if MA_Tct > 4: + time_drop_list.append(time) + table = table.drop(index=time_drop_list).drop(columns=['Unnamed: 0']) + space_df_dict[key] = table + return space_df_dict + + +def find_neighbor_dict(sl_biliu_gdf, biliu_stbprp_df, filter_list): + biliu_stbprp_df = biliu_stbprp_df[biliu_stbprp_df['sttp'] == 2].reset_index().drop(columns=['index']) + point_list = [] + for i in range(0, len(biliu_stbprp_df)): + point_x = biliu_stbprp_df['lgtd'][i] + point_y = biliu_stbprp_df['lttd'][i] + point = Point(point_x, point_y) + point_list.append(point) + gdf_biliu = GeoDataFrame({'STCD': biliu_stbprp_df['stid'], 'STNM': biliu_stbprp_df['stname']}, geometry=point_list) + sl_biliu_gdf_splited = sl_biliu_gdf[['STCD', 'STNM', 'geometry']] + # 需要筛选雨量 + gdf_stid_total = GeoDataFrame(pd.concat([gdf_biliu, sl_biliu_gdf_splited], axis=0)) + gdf_stid_total = gdf_stid_total.set_index('STCD').drop(index=filter_list).reset_index() + gdf_stid_total['STCD'] = gdf_stid_total['STCD'].astype('str') + neighbor_dict = {} + for i in range(0, len(gdf_stid_total.geometry)): + stcd = gdf_stid_total['STCD'][i] + gdf_stid_total['distance'] = gdf_stid_total.apply(lambda x: + distance(gdf_stid_total.geometry[i], x.geometry), axis=1) + nearest_stas = gdf_stid_total[(gdf_stid_total['distance'] > 0) & (gdf_stid_total['distance'] <= 0.2)] + nearest_stas_list = nearest_stas['STCD'].to_list() + neighbor_dict[stcd] = nearest_stas_list + gdf_stid_total = gdf_stid_total.drop(columns=['distance']) + return neighbor_dict, gdf_stid_total + + +def get_voronoi_total(): + node_shp = os.path.join(definitions.ROOT_DIR, 'example/biliuriver_shp/biliu_basin_rain_stas_total.shp') + dup_basin_shp = os.path.join(definitions.ROOT_DIR, 'example/biliuriver_shp/碧流河流域_副本.shp') + origin_basin_shp = os.path.join(definitions.ROOT_DIR, 'example/biliuriver_shp/碧流河流域.shp') + if not os.path.exists(node_shp): + shutil.copyfile(origin_basin_shp, dup_basin_shp) + gdf_stid_total = find_neighbor_dict(sl_stas_table, biliu_stas_table, filter_list=filter_station_list)[1] + gdf_stid_total.to_file(node_shp) + voronoi_from_shp(src=node_shp, des=dup_basin_shp) + voronoi_gdf = gpd.read_file(dup_basin_shp, engine='pyogrio') + return voronoi_gdf + + +def test_rain_average_filtered(start_date='2014-01-01 00:00:00', end_date='2022-09-01 00:00:00'): + start_date = pd.to_datetime(start_date, format='%Y-%m-%d %H:%M:%S') + end_date = pd.to_datetime(end_date, format='%Y-%m-%d %H:%M:%S') + voronoi_gdf = get_voronoi_total() + voronoi_gdf['real_area'] = voronoi_gdf.apply(lambda x: x.geometry.area * 12100, axis=1) + rain_path = os.path.join(definitions.ROOT_DIR, 'example/filtered_rain_between_sl_biliu') + table_dict = {} + for root, dirs, files in os.walk(rain_path): + for file in files: + stcd = file.split('_')[0] + rain_table = pd.read_csv(os.path.join(rain_path, file), engine='c') + if 'TM' in rain_table.columns: + rain_table['TM'] = pd.to_datetime(rain_table['TM']) + elif 'systemtime' in rain_table.columns: + rain_table['systemtime'] = pd.to_datetime(rain_table['systemtime']) + table_dict[stcd] = rain_table + # 参差不齐,不能直接按照长时间序列选择,只能一个个时间索引去找,看哪个站有数据,再做平均 + rain_aver_dict = {} + for time in pd.date_range(start_date, end_date, freq='H'): + time_rain_records = {} + for stcd in table_dict.keys(): + rain_table = table_dict[stcd] + if 'DRP' in rain_table.columns: + if time in rain_table['TM'].to_numpy(): + drp = rain_table['DRP'][rain_table['TM'] == time] + time_rain_records[stcd] = drp.values[0] + else: + drp = 0 + time_rain_records[stcd] = drp + elif 'paravalue' in rain_table.columns: + if time in rain_table['systemtime'].to_numpy(): + drp = rain_table['paravalue'][rain_table['systemtime'] == time] + time_rain_records[stcd] = drp.values[0] + else: + drp = 0 + time_rain_records[stcd] = drp + else: + continue + rain_aver = 0 + for stcd in time_rain_records.keys(): + voronoi_gdf['STCD'] = voronoi_gdf['STCD'].astype('str') + rain_aver += time_rain_records[stcd] * voronoi_gdf['real_area'][voronoi_gdf['STCD'] == stcd].values[0] / \ + gdf_biliu_shp['area'][0] + rain_aver_dict[time] = rain_aver + rain_aver_df = pd.DataFrame({'TM': rain_aver_dict.keys(), 'rain': rain_aver_dict.values()}) + rain_aver_df.to_csv(os.path.join(definitions.ROOT_DIR, 'example/filtered_rain_average.csv')) + return rain_aver_dict + + +def get_infer_inq(): + inq_csv_path = os.path.join(definitions.ROOT_DIR, 'example/biliu_inq_interpolated.csv') + if os.path.exists(inq_csv_path): + new_df = pd.read_csv(inq_csv_path, engine='c', parse_dates=['TM']).set_index('TM') + else: + biliu_flow_df = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/biliuriver_rsvr.csv'), + engine='c', parse_dates=['TM']) + biliu_area = gdf_biliu_shp.geometry[0].area * 12100 + biliu_flow_df: DataFrame = biliu_flow_df.fillna(-1) + inq_array = biliu_flow_df['INQ'].to_numpy() + otq_array = biliu_flow_df['OTQ'].to_numpy() + w_array = biliu_flow_df['W'].to_numpy() + tm_array = biliu_flow_df['TM'].to_numpy() + for i in range(1, len(biliu_flow_df)): + if (int(inq_array[i]) == -1) & (int(otq_array[i]) != -1): + # TypeError: unsupported operand type(s) for -: 'str' and 'str' + time_div = np.timedelta64(tm_array[i] - tm_array[i - 1]) / np.timedelta64(1, 'h') + inq_array[i] = otq_array[i] + (w_array[i] - w_array[i - 1]) / time_div + # 还要根据时间间隔插值 + new_df = pd.DataFrame({'TM': tm_array, 'INQ': inq_array, 'OTQ': otq_array}) + new_df = new_df.set_index('TM').resample('H').interpolate() + # 流量单位转换 + new_df['INQ_mm/h'] = new_df['INQ'].apply(lambda x: x * 3.6 / biliu_area) + new_df.to_csv(inq_csv_path) + return new_df['INQ'], new_df['INQ_mm/h'] + + +def biliu_rain_flow_division(): + # rain和flow之间的索引要尽量“对齐” + # 2014.1.1 00:00:00-2022.9.1 00:00:00 + filtered_rain_aver_df = (pd.read_csv(os.path.join(definitions.ROOT_DIR, + 'example/filtered_rain_average.csv'), engine='c'). + set_index('TM').drop(columns=['Unnamed: 0'])) + filtered_rain_aver_array = filtered_rain_aver_df['rain'].to_numpy() + flow_m3_s = (get_infer_inq()[0])[filtered_rain_aver_df.index] + flow_mm_h = (get_infer_inq()[1])[filtered_rain_aver_df.index] + time = filtered_rain_aver_df.index + rain_min = 0.01 + max_window = 100 + Tr, fluct_rain_Tr, fluct_flow_Tr, fluct_bivariate_Tr = step1_step2_tr_and_fluctuations_timeseries( + filtered_rain_aver_array, flow_mm_h, + rain_min, + max_window) + beginning_core, end_core = step3_core_identification(fluct_bivariate_Tr) + end_rain = step4_end_rain_events(beginning_core, end_core, filtered_rain_aver_array, fluct_rain_Tr, rain_min) + beginning_rain = step5_beginning_rain_events(beginning_core, end_rain, filtered_rain_aver_array, fluct_rain_Tr, + rain_min) + beginning_rain_checked, end_rain_checked, beginning_core, end_core = step6_checks_on_rain_events(beginning_rain, + end_rain, + filtered_rain_aver_array, + rain_min, + beginning_core, + end_core) + end_flow = step7_end_flow_events(end_rain_checked, beginning_core, end_core, filtered_rain_aver_array, + fluct_rain_Tr, fluct_flow_Tr, Tr) + beginning_flow = step8_beginning_flow_events(beginning_rain_checked, end_rain_checked, filtered_rain_aver_array, + beginning_core, + fluct_rain_Tr, fluct_flow_Tr) + beginning_flow_checked, end_flow_checked = step9_checks_on_flow_events(beginning_rain_checked, end_rain_checked, + beginning_flow, + end_flow, fluct_flow_Tr) + BEGINNING_RAIN, END_RAIN, BEGINNING_FLOW, END_FLOW = step10_checks_on_overlapping_events(beginning_rain_checked, + end_rain_checked, + beginning_flow_checked, + end_flow_checked, time) + print(len(BEGINNING_RAIN), len(END_RAIN), len(BEGINNING_FLOW), len(END_FLOW)) + print('_________________________') + print(BEGINNING_RAIN, END_RAIN) + print('_________________________') + print(BEGINNING_FLOW, END_FLOW) + # 从自动划分结果里手动选几个场次 + session_times = [('2017/8/1 15:00:00', '2017/8/7 07:00:00'), ('2018/8/19 12:00:00', '2018/8/23 09:00:00'), + ('2020/8/31 04:00:00', '2020/9/4 15:00:00'), ('2022/7/6 10:00:00', '2022/7/10 00:00:00'), + ('2022/8/6 10:00:00', '2022/8/11 00:00:00')] + session_df_list = [] + for session in session_times: + start_time = pd.to_datetime(session[0]) + end_time = pd.to_datetime(session[1]) + filtered_rain_aver_df.index = pd.to_datetime(filtered_rain_aver_df.index) + rain_session = filtered_rain_aver_df[start_time: end_time] + flow_session_mm_h = flow_mm_h[start_time: end_time] + flow_session_m3_s = flow_m3_s[start_time: end_time] + session_df = pd.DataFrame( + {'TM': pd.date_range(start_time, end_time, freq='H'), 'RAIN_SESSION': rain_session.to_numpy().flatten() + , 'FLOW_SESSION_MM_H': flow_session_mm_h.to_numpy(), 'FLOW_SESSION_M3_S': flow_session_m3_s.to_numpy()}) + session_df_list.append(session_df) + return session_df_list + + +def get_deap_dir_by_session(df: DataFrame): + top_deap_dir = os.path.join(definitions.ROOT_DIR, 'example/deap_dir/') + time_index = df.index[0].strftime('%Y-%m-%d-%H-%M-%S') + deap_dir = os.path.join(top_deap_dir, time_index) + if not os.path.exists(deap_dir): + os.makedirs(deap_dir) + return deap_dir + + +# need fusion with the last test +def test_calibrate_flow(): + # pet_df含有潜在蒸散发 + pet_df = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/era5_data/pet_calc/PET_result.CSV'), engine='c', + parse_dates=['time']).set_index('time') + session_df_list = biliu_rain_flow_division() + for session_df in session_df_list: + # session_df 含有雨和洪 + session_df = session_df.set_index('TM') + deap_dir = get_deap_dir_by_session(session_df) + session_pet = pet_df.loc[session_df.index].to_numpy().flatten() + calibrate_df = pd.DataFrame({'PRCP': session_df['RAIN_SESSION'].to_numpy(), 'PET': session_pet, + 'streamflow': session_df['FLOW_SESSION_MM_H'].to_numpy()}) + calibrate_np = calibrate_df.to_numpy() + calibrate_np = np.expand_dims(calibrate_np, axis=0) + calibrate_np = np.swapaxes(calibrate_np, 0, 1) + observed_output = np.expand_dims(calibrate_np[:, :, -1], axis=0) + observed_output = np.swapaxes(observed_output, 0, 1) + pop = calibrate_by_ga(input_data=calibrate_np[:, :, 0:2], observed_output=observed_output, deap_dir=deap_dir, + warmup_length=24) + print(pop) + + +def test_compare_paras(): + # 遗传算法是按照mm/h率定的 + ''' + test_session_times = [('2017/8/1 15:00:00', '2017/8/7 07:00:00'), ('2018/8/19 12:00:00', '2018/8/23 09:00:00'), + ('2020/8/31 04:00:00', '2020/9/4 15:00:00'), ('2022/7/6 10:00:00', '2022/7/10 00:00:00'), + ('2022/8/6 10:00:00', '2022/8/11 00:00:00')] + filtered_rain_aver_df = (pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/filtered_rain_average.csv'), + engine='c', parse_dates=['TM']).set_index('TM').drop(columns=['Unnamed: 0'])) + flow_m3_s = (get_infer_inq()[0])[filtered_rain_aver_df.index] + pet_df = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/era5_data/pet_calc/PET_result.CSV'), engine='c', + parse_dates=['time']).set_index('time') + test_session_dict = {} + for test_session_time in test_session_times: + start_time = pd.to_datetime(test_session_time[0]) + end_time = pd.to_datetime(test_session_time[1]) + rain_session = filtered_rain_aver_df[start_time: end_time] + session_pet = pet_df.loc[start_time: end_time].to_numpy().flatten() + flow_session_m3_s = flow_m3_s[start_time: end_time] + test_session_df = pd.DataFrame({'RAIN_SESSION': rain_session.to_numpy().flatten(), + 'PET': session_pet, + 'FLOW_SESSION_M3_S': flow_session_m3_s.to_numpy()}) + test_session_np = test_session_df.to_numpy() + test_session_np = np.expand_dims(test_session_np, axis=0) + test_session_np = np.swapaxes(test_session_np, 0, 1) + test_session_dict[start_time.strftime('%Y-%m-%d-%H-%M-%S')] = test_session_np + ''' + pet_df = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/era5_data/pet_calc/PET_result.CSV'), engine='c', + parse_dates=['time']).set_index('time') + sessions_list = biliu_rain_flow_division() + for session_df in sessions_list: + session_df = session_df.set_index('TM') + deap_dir = get_deap_dir_by_session(session_df) + time_dir = deap_dir.split('/')[-1] + pkl_epoch5_path = os.path.join(deap_dir, 'epoch5.pkl') + pkl_xaj = np.load(pkl_epoch5_path, allow_pickle=True) + warmup_length = 24 + pet_session = pet_df.loc[session_df.index] + session_df = pd.concat([session_df['RAIN_SESSION'], pet_session, session_df['FLOW_SESSION_MM_H'], session_df['FLOW_SESSION_M3_S']], axis=1) + session_np = session_df.to_numpy() + session_np = np.expand_dims(session_np, axis=1) + qsim, es = hydromodel.models.xaj.xaj(p_and_e=session_np[:, :, 0:2], + params=np.array(pkl_xaj['halloffame'][0]).reshape(1, -1), + warmup_length=warmup_length, name='xaj_mz') + qsim = qsim * 2097000 / 3600 + y_flow_obs = session_np[:, :, -1].flatten()[warmup_length:] + rmse = statRmse(qsim.flatten(), y_flow_obs) + x = session_df.index[warmup_length:] + rain_session = session_df['RAIN_SESSION'] + y_rain_obs = rain_session.to_numpy().flatten() + fig, ax = plt.subplots(figsize=(16, 12)) + p = ax.twinx() + ax.bar(x, y_rain_obs[warmup_length:], color='red', alpha=0.6, width=0.04) + ax.set_ylabel('rain(mm)') + ax.invert_yaxis() + p.plot(x, y_flow_obs, color='green', linewidth=2) + p.plot(x, qsim.flatten(), color='yellow', linewidth=2) + p.set_ylabel('flow(m3/s)') + plt.savefig(os.path.join(definitions.ROOT_DIR, 'example/deap_dir/calibrated_xaj_cmp', time_dir+'.png')) + print(rmse) diff --git a/test/test_gr4j.py b/test/test_gr4j.py new file mode 100644 index 0000000..d5df64f --- /dev/null +++ b/test/test_gr4j.py @@ -0,0 +1,98 @@ +import os + +import numpy as np +import pandas as pd +import pytest +import spotpy +from matplotlib import pyplot as plt +import definitions +from hydromodel.calibrate.calibrate_sceua import calibrate_by_sceua, SpotSetup +from hydromodel.models.gr4j import gr4j +from hydromodel.visual.pyspot_plots import show_calibrate_result + + +@pytest.fixture() +def basin_area(): + # the area of basin 01013500, unit km2 + # basin_area = 2252.7 + return 1.783 + + +@pytest.fixture() +def warmup_length(): + return 30 + + +@pytest.fixture() +def the_data(): + root_dir = definitions.ROOT_DIR + # test_data = pd.read_csv(os.path.join(root_dir, "hydromodel", "example", '01013500_lump_p_pe_q.txt')) + return pd.read_csv( + os.path.join(root_dir, "hydromodel", "example", "hymod_input.csv"), sep=";" + ) + + +@pytest.fixture() +def p_and_e(the_data): + # p_and_e_df = test_data[['prcp(mm/day)', 'petfao56(mm/day)']] + # three dims: sequence (time), batch (basin), feature (variable) + # p_and_e = np.expand_dims(p_and_e_df.values, axis=1) + p_and_e_df = the_data[["rainfall[mm]", "TURC [mm d-1]"]] + return np.expand_dims(p_and_e_df.values, axis=1) + + +@pytest.fixture() +def qobs(basin_area, the_data): + # 1 ft3 = 0.02831685 m3 + ft3tom3 = 2.831685e-2 + # 1 km2 = 10^6 m2 + km2tom2 = 1e6 + # 1 m = 1000 mm + mtomm = 1000 + # 1 day = 24 * 3600 s + daytos = 24 * 3600 + # qobs_ = np.expand_dims(test_data[['streamflow(ft3/s)']].values, axis=1) + # trans ft3/s to mm/day + # return qobs_ * ft3tom3 / (basin_area * km2tom2) * mtomm * daytos + + qobs_ = np.expand_dims(the_data[["Discharge[ls-1]"]].values, axis=1) + # trans l/s to mm/day + return qobs_ * 1e-3 / (basin_area * km2tom2) * mtomm * daytos + + +@pytest.fixture() +def params(): + # all parameters are in range [0,1] + return np.tile([0.5], (1, 4)) + + +def test_gr4j(p_and_e, params): + qsim = gr4j(p_and_e, params, warmup_length=10) + np.testing.assert_array_equal(qsim.shape, (1817, 1, 1)) + + +def test_calibrate_gr4j_sceua(p_and_e, qobs, warmup_length): + calibrate_by_sceua( + p_and_e, + qobs, + warmup_length, + model="gr4j", + random_seed=2000, + rep=5000, + ngs=7, + kstop=3, + peps=0.1, + pcento=0.1, + ) + + +def test_show_calibrate_sceua_result(p_and_e, qobs, warmup_length): + spot_setup = SpotSetup( + p_and_e, + qobs, + warmup_length, + model="gr4j", + obj_func=spotpy.objectivefunctions.rmse, + ) + show_calibrate_result(spot_setup, "SCEUA_gr4j") + plt.show() diff --git a/test/test_hymod.py b/test/test_hymod.py new file mode 100644 index 0000000..2067495 --- /dev/null +++ b/test/test_hymod.py @@ -0,0 +1,94 @@ +import os + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import pytest +import spotpy + +import sys +import definitions +from hydromodel.calibrate.calibrate_sceua import calibrate_by_sceua, SpotSetup +from hydromodel.models.hymod import hymod +from hydromodel.visual.pyspot_plots import show_calibrate_result + + +@pytest.fixture() +def basin_area(): + # # the area of basin 01013500 is 2252.7; unit km2 + # the area of a basin from hymod example, unit km2 + return 1.783 + + +@pytest.fixture() +def the_data(): + root_dir = definitions.ROOT_DIR + return pd.read_csv( + os.path.join(root_dir, "hydromodel", "example", "hymod_input.csv"), sep=";" + ) + # return pd.read_csv(os.path.join(root_dir, "hydromodel","example", '01013500_lump_p_pe_q.txt')) + + +@pytest.fixture() +def qobs(the_data, basin_area): + # 1 ft3 = 0.02831685 m3 + ft3tom3 = 2.831685e-2 + # 1 km2 = 10^6 m2 + km2tom2 = 1e6 + # 1 m = 1000 mm + mtomm = 1000 + # 1 day = 24 * 3600 s + daytos = 24 * 3600 + # qobs_ = np.expand_dims(test_data[['streamflow(ft3/s)']].values, axis=1) + # trans ft3/s to mm/day + # return qobs_ * ft3tom3 / (basin_area * km2tom2) * mtomm * daytos + + qobs_ = np.expand_dims(the_data[["Discharge[ls-1]"]].values, axis=1) + # trans l/s to mm/day + return qobs_ * 1e-3 / (basin_area * km2tom2) * mtomm * daytos + + +@pytest.fixture() +def p_and_e(the_data): + p_and_e_df = the_data[["rainfall[mm]", "TURC [mm d-1]"]] + # p_and_e_df = test_data[['prcp(mm/day)', 'petfao56(mm/day)']] + # three dims: batch (basin), sequence (time), feature (variable) + return np.expand_dims(p_and_e_df.values, axis=1) + + +@pytest.fixture() +def params(): + return np.array([0.39359, 0.01005, 0.20831, 0.75010, 0.48652]).reshape(1, 5) + + +def test_hymod(p_and_e, params): + qsim = hymod(p_and_e, params, warmup_length=10) + np.testing.assert_almost_equal( + qsim[:5, 0, 0], [0.0003, 0.0003, 0.0002, 0.0002, 0.0002], decimal=4 + ) + + +def test_calibrate_hymod_sceua(p_and_e, qobs, basin_area): + calibrate_by_sceua( + p_and_e, + qobs, + model="hymod", + random_seed=2000, + rep=5000, + ngs=7, + kstop=3, + peps=0.1, + pcento=0.1, + ) + + +def test_show_hymod_calibrate_sceua_result(p_and_e, qobs, basin_area): + spot_setup = SpotSetup( + p_and_e, + qobs, + warmup_length=10, + model="hymod", + obj_func=spotpy.objectivefunctions.rmse, + ) + show_calibrate_result(spot_setup, "SCEUA_hymod", "l s-1") + plt.show() diff --git a/test/test_read_db_by_json.py b/test/test_read_db_by_json.py new file mode 100644 index 0000000..d0ea916 --- /dev/null +++ b/test/test_read_db_by_json.py @@ -0,0 +1,31 @@ +import json +import os.path + +import sqlalchemy as sqa +from sqlalchemy import MetaData, Table +from sqlalchemy.orm import sessionmaker, scoped_session + +import definitions + + +def test_read_database_json(): + json_path = os.path.join(definitions.ROOT_DIR, 'example/database.json') + with (open(json_path, 'r') as jsonp): + json_obj = json.load(jsonp) + sl_dict = json_obj['biliu_old'] + sl_database_link = sl_dict['class'] + '+' + sl_dict['driver'] + '://' + sl_dict['id'] + ':' + sl_dict[ + 'password'] + '@' + sl_dict['ip'] + ':' + sl_dict['port'] + '/' + sl_dict['database'] + sl_engine = sqa.create_engine(sl_database_link) + ''' + query = "select * from STInfo" + ST_PPTN_STID = pd.read_sql(query, sl_engine) + ST_PPTN_STID.to_csv('biliu_total_stas.csv') + ''' + md = MetaData() + md.reflect(bind=sl_engine) + st_table = md.tables['STInfo'] + table_obj = Table(st_table, md, autoload_with=sl_engine) + session = scoped_session(sessionmaker(bind=sl_engine)) + result = session.query(table_obj).filter_by(STID=4000).all() + print(result) + diff --git a/test/test_xaj.py b/test/test_xaj.py new file mode 100644 index 0000000..7a9dffa --- /dev/null +++ b/test/test_xaj.py @@ -0,0 +1,254 @@ +import os + +import numpy as np +import pandas as pd +import pytest +import sys +sys.path.append("HYDRO_MODEL_XAJ") +import definitions + +from hydromodel.calibrate.calibrate_sceua import calibrate_by_sceua +from hydromodel.calibrate.calibrate_ga import calibrate_by_ga +from hydromodel.data.data_postprocess import read_save_sceua_calibrated_params +from hydromodel.utils import hydro_constant, hydro_utils +from hydromodel.visual.pyspot_plots import show_calibrate_result, show_test_result +from hydromodel.models.xaj import xaj, uh_gamma, uh_conv + + +@pytest.fixture() +def basin_area(): + # the area of basin 01013500, unit km2 + # basin_area = 2252.7 + return 1.783 + + +@pytest.fixture() +def db_name(): + db_name = os.path.join(definitions.ROOT_DIR, "test", "SCEUA_xaj_mz") + return db_name + + +@pytest.fixture() +def warmup_length(): + return 30 + + +@pytest.fixture() +def the_data(): + root_dir = definitions.ROOT_DIR + # test_data = pd.read_csv(os.path.join(root_dir, "hydromodel", "example", '01013500_lump_p_pe_q.txt')) + return pd.read_csv( + os.path.join(root_dir, "hydromodel", "example", "hymod_input.csv") + ) + + +@pytest.fixture() +def p_and_e(the_data): + # p_and_e_df = test_data[['prcp(mm/day)', 'petfao56(mm/day)']] + # three dims: sequence (time), batch (basin), feature (variable) + # p_and_e = np.expand_dims(p_and_e_df.values, axis=1) + p_and_e_df = the_data[["rainfall[mm]", "TURC [mm d-1]"]] + return np.expand_dims(p_and_e_df.values, axis=1) + + +@pytest.fixture() +def qobs(basin_area, the_data): + # 1 ft3 = 0.02831685 m3 + ft3tom3 = 2.831685e-2 + # 1 km2 = 10^6 m2 + km2tom2 = 1e6 + # 1 m = 1000 mm + mtomm = 1000 + # 1 day = 24 * 3600 s + daytos = 24 * 3600 + # qobs_ = np.expand_dims(test_data[['streamflow(ft3/s)']].values, axis=1) + # trans ft3/s to mm/day + # return qobs_ * ft3tom3 / (basin_area * km2tom2) * mtomm * daytos + + qobs_ = np.expand_dims(the_data[["Discharge[ls-1]"]].values, axis=1) + # trans l/s to mm/day + return qobs_ * 1e-3 / (basin_area * km2tom2) * mtomm * daytos + + +@pytest.fixture() +def params(): + # all parameters are in range [0,1] + return np.tile([0.5], (1, 15)) + + +def test_uh_gamma(): + # repeat for 20 periods and add one dim as feature: time_seq=20, batch=10, feature=1 + routa = np.tile(2.5, (20, 10, 1)) + routb = np.tile(3.5, (20, 10, 1)) + uh = uh_gamma(routa, routb, len_uh=15) + np.testing.assert_almost_equal( + uh[:, 0, :], + np.array( + [ + [0.0069], + [0.0314], + [0.0553], + [0.0738], + [0.0860], + [0.0923], + [0.0939], + [0.0919], + [0.0875], + [0.0814], + [0.0744], + [0.0670], + [0.0597], + [0.0525], + [0.0459], + ] + ), + decimal=3, + ) + + +def test_uh(): + uh_from_gamma = np.tile(1, (5, 3, 1)) + # uh_from_gamma = np.arange(15).reshape(5, 3, 1) + rf = np.arange(30).reshape(10, 3, 1) / 100 + qs = uh_conv(rf, uh_from_gamma) + np.testing.assert_almost_equal( + np.array( + [ + [0.0000, 0.0100, 0.0200], + [0.0300, 0.0500, 0.0700], + [0.0900, 0.1200, 0.1500], + [0.1800, 0.2200, 0.2600], + [0.3000, 0.3500, 0.4000], + [0.4500, 0.5000, 0.5500], + [0.6000, 0.6500, 0.7000], + [0.7500, 0.8000, 0.8500], + [0.9000, 0.9500, 1.0000], + [1.0500, 1.1000, 1.1500], + ] + ), + qs[:, :, 0], + decimal=3, + ) + + +def test_xaj(p_and_e, params, warmup_length): + qsim, e = xaj( + p_and_e, + params, + warmup_length=warmup_length, + name="xaj", + source_book="HF", + source_type="sources", + ) + results = pd.DataFrame({ + 'discharge': qsim.flatten(), + 'ET': e.flatten(), + }) + + # np.testing.assert_array_equal(qsim.shape[0], p_and_e.shape[0] - warmup_length) + + +def test_xaj_mz(p_and_e, params, warmup_length): + qsim, e = xaj( + p_and_e, + np.tile([0.5], (1, 16)), + warmup_length=warmup_length, + name="xaj_mz", + source_book="HF", + source_type="sources", + ) + np.testing.assert_array_equal(qsim.shape[0], p_and_e.shape[0] - warmup_length) + + +def test_calibrate_xaj_sceua(p_and_e, qobs, warmup_length, db_name): + # just for testing, so the hyper-param is chosen for quick running + calibrate_by_sceua( + p_and_e, + qobs, + db_name, + warmup_length, + model={ + "name": "xaj_mz", + "source_type": "sources", + "source_book": "HF", + }, + algorithm={ + "name": "SCE_UA", + "random_seed": 1234, + "rep": 5, + "ngs": 7, + "kstop": 3, + "peps": 0.1, + "pcento": 0.1, + }, + ) + + +def test_show_calibrate_sceua_result(p_and_e, qobs, warmup_length, db_name, basin_area): + sampler = calibrate_by_sceua( + p_and_e, + qobs, + db_name, + warmup_length, + model={ + "name": "xaj_mz", + "source_type": "sources", + "source_book": "HF", + }, + algorithm={ + "name": "SCE_UA", + "random_seed": 1234, + "rep": 5, + "ngs": 7, + "kstop": 3, + "peps": 0.1, + "pcento": 0.1, + }, + ) + train_period = hydro_utils.t_range_days(["2012-01-01", "2017-01-01"]) + show_calibrate_result( + sampler.setup, + db_name, + warmup_length=warmup_length, + save_dir=db_name, + basin_id="basin_id", + train_period=train_period, + basin_area=basin_area, + ) + + +def test_show_test_result(p_and_e, qobs, warmup_length, db_name, basin_area): + params = read_save_sceua_calibrated_params("basin_id", db_name, db_name) + qsim, _ = xaj( + p_and_e, + params, + warmup_length=warmup_length, + name="xaj_mz", + source_type="sources", + source_book="HF", + ) + + qsim = hydro_constant.convert_unit( + qsim, + unit_now="mm/day", + unit_final=hydro_constant.unit["streamflow"], + basin_area=basin_area, + ) + qobs = hydro_constant.convert_unit( + qobs[warmup_length:, :, :], + unit_now="mm/day", + unit_final=hydro_constant.unit["streamflow"], + basin_area=basin_area, + ) + test_period = hydro_utils.t_range_days(["2012-01-01", "2017-01-01"]) + show_test_result( + "basin_id", test_period[warmup_length:], qsim, qobs, save_dir=db_name + ) + + +def test_calibrate_xaj_ga(p_and_e, qobs, warmup_length): + calibrate_by_ga( + p_and_e, + qobs, + warmup_length, + ) diff --git a/test/test_xaj_bmi.py b/test/test_xaj_bmi.py new file mode 100644 index 0000000..0ee679b --- /dev/null +++ b/test/test_xaj_bmi.py @@ -0,0 +1,308 @@ +import logging + +import definitions +from xaj.configuration import read_config +from xaj.xaj_bmi import xajBmi + +logging.basicConfig(level=logging.INFO) + +import pandas as pd +import os +from pathlib import Path +import numpy as np +import fnmatch +import socket +from datetime import datetime + +from hydromodel.utils import hydro_utils +from hydromodel.data.data_preprocess import ( + cross_valid_data, + split_train_test, +) +from xaj.calibrate_sceua_xaj_bmi import calibrate_by_sceua +from xaj.calibrate_ga_xaj_bmi import ( + calibrate_by_ga, + show_ga_result, +) +from hydromodel.visual.pyspot_plots import show_calibrate_result, show_test_result +from hydromodel.data.data_postprocess import ( + renormalize_params, + read_save_sceua_calibrated_params, + save_streamflow, + summarize_metrics, + summarize_parameters, +) +from hydromodel.utils import hydro_constant + + +def test_bmi(): + ''' + model = xajBmi() + print(model.get_component_name()) + model.initialize("runxaj.yaml") + print("Start time:", model.get_start_time()) + print("End time:", model.get_end_time()) + print("Current time:", model.get_current_time()) + print("Time step:", model.get_time_step()) + print("Time units:", model.get_time_units()) + print(model.get_input_var_names()) + print(model.get_output_var_names()) + + discharge = [] + ET = [] + time = [] + while model.get_current_time() <= model.get_end_time(): + time.append(model.get_current_time()) + model.update() + + discharge=model.get_value("discharge") + ET=model.get_value("ET") + + results = pd.DataFrame({ + 'discharge': discharge.flatten(), + 'ET': ET.flatten(), + }) + results.to_csv('/home/wangjingyi/code/hydro-model-xaj/scripts/xaj.csv') + model.finalize() + ''' + # 模型率定 + config = read_config(os.path.relpath("runxaj.yaml")) + forcing_data = Path(str(definitions.ROOT_DIR) + str(config['forcing_data'])) + train_period = config['train_period'] + test_period = config['test_period'] + # period = config['period'] + json_file = Path(str(definitions.ROOT_DIR) + str(config['json_file'])) + npy_file = Path(str(definitions.ROOT_DIR) + str(config['npy_file'])) + cv_fold = config['cv_fold'] + warmup_length = config['warmup_length'] + # model_info + model_name = config['model_name'] + source_type = config['source_type'] + source_book = config['source_book'] + # algorithm + algorithm_name = config['algorithm_name'] + + if not (cv_fold > 1): + # no cross validation + periods = np.sort( + [train_period[0], train_period[1], test_period[0], test_period[1]] + ) + if cv_fold > 1: + cross_valid_data(json_file, npy_file, periods, warmup_length, cv_fold) + else: + split_train_test(json_file, npy_file, train_period, test_period) + + kfold = [ + int(f_name[len("data_info_fold"): -len("_test.json")]) + for f_name in os.listdir(os.path.dirname(forcing_data)) + if fnmatch.fnmatch(f_name, "*_fold*_test.json") + ] + kfold = np.sort(kfold) + for fold in kfold: + print(f"Start to calibrate the {fold}-th fold") + train_data_info_file = os.path.join( + os.path.dirname(forcing_data), f"data_info_fold{str(fold)}_train.json" + ) + train_data_file = os.path.join( + os.path.dirname(forcing_data), f"basins_lump_p_pe_q_fold{str(fold)}_train.npy" + ) + test_data_info_file = os.path.join( + os.path.dirname(forcing_data), f"data_info_fold{str(fold)}_test.json" + ) + test_data_file = os.path.join( + os.path.dirname(forcing_data), f"basins_lump_p_pe_q_fold{str(fold)}_test.npy" + ) + if ( + os.path.exists(train_data_info_file) is False + or os.path.exists(train_data_file) is False + or os.path.exists(test_data_info_file) is False + or os.path.exists(test_data_file) is False + ): + raise FileNotFoundError( + "The data files are not found, please run datapreprocess4calibrate.py first." + ) + data_train = hydro_utils.unserialize_numpy(train_data_file) + data_test = hydro_utils.unserialize_numpy(test_data_file) + data_info_train = hydro_utils.unserialize_json_ordered(train_data_info_file) + data_info_test = hydro_utils.unserialize_json_ordered(test_data_info_file) + current_time = datetime.now().strftime("%b%d_%H-%M-%S") + # one directory for one model + one hyperparam setting and one basin + save_dir = os.path.join(os.path.dirname(forcing_data), current_time + "_" + socket.gethostname() + "_fold" + str(fold)) + if algorithm_name == "SCE_UA": + random_seed = config['random_seed'] + rep = config['rep'] + ngs = config['ngs'] + kstop = config['kstop'] + peps = config['peps'] + pcento = config['pcento'] + for i in range(len(data_info_train["basin"])): + basin_id = data_info_train["basin"][i] + basin_area = data_info_train["area"][i] + # one directory for one model + one hyperparam setting and one basin + spotpy_db_dir = os.path.join( + save_dir, + basin_id, + ) + + if not os.path.exists(spotpy_db_dir): + os.makedirs(spotpy_db_dir) + db_name = os.path.join(spotpy_db_dir, "SCEUA_" + model_name) + sampler = calibrate_by_sceua( + data_train[:, i: i + 1, 0:2], + data_train[:, i: i + 1, -1:], + db_name, + warmup_length=warmup_length, + model={ + 'name': model_name, + 'source_type': source_type, + 'source_book': source_book + }, + algorithm={ + 'name': algorithm_name, + 'random_seed': random_seed, + 'rep': rep, + 'ngs': ngs, + 'kstop': kstop, + 'peps': peps, + 'pcento': pcento + }, + ) + + show_calibrate_result( + sampler.setup, + db_name, + warmup_length=warmup_length, + save_dir=spotpy_db_dir, + basin_id=basin_id, + train_period=data_info_train["time"], + basin_area=basin_area, + ) + params = read_save_sceua_calibrated_params( + basin_id, spotpy_db_dir, db_name + ) + + model = xajBmi() + model.initialize(os.path.relpath("runxaj.yaml"), params, data_test[:, i: i + 1, 0:2]) + j = 0 + while j <= len(data_info_test["time"]): + model.update() + j += 1 + q_sim = model.get_value("discharge") + + qsim = hydro_constant.convert_unit( + q_sim, + unit_now="mm/day", + unit_final=hydro_constant.unit["streamflow"], + basin_area=basin_area, + ) + + qobs = hydro_constant.convert_unit( + data_test[warmup_length:, i: i + 1, -1:], + unit_now="mm/day", + unit_final=hydro_constant.unit["streamflow"], + basin_area=basin_area, + ) + test_result_file = os.path.join( + spotpy_db_dir, + "test_qsim_" + model_name + "_" + str(basin_id) + ".csv", + ) + pd.DataFrame(qsim.reshape(-1, 1)).to_csv( + test_result_file, + sep=",", + index=False, + header=False, + ) + test_date = pd.to_datetime( + data_info_test["time"][warmup_length:] + ).values.astype("datetime64[D]") + show_test_result( + basin_id, test_date, qsim, qobs, save_dir=spotpy_db_dir + ) + elif algorithm_name == "GA": + random_seed = config['random_seed'] + run_counts = config['run_counts'] + pop_num = config['pop_num'] + cross_prob = config['cross_prob'] + mut_prob = config['mut_prob'] + save_freq = config['save_freq'] + for i in range(len(data_info_train["basin"])): + basin_id = data_info_train["basin"][i] + basin_area = data_info_train["area"][i] + # one directory for one model + one hyperparam setting and one basin + deap_db_dir = os.path.join(save_dir, basin_id) + + if not os.path.exists(deap_db_dir): + os.makedirs(deap_db_dir) + calibrate_by_ga( + data_train[:, i: i + 1, 0:2], + data_train[:, i: i + 1, -1:], + deap_db_dir, + warmup_length=warmup_length, + model={ + 'name': model_name, + 'source_type': source_type, + 'source_book': source_book + }, + ga_param={ + 'name': algorithm_name, + 'random_seed': random_seed, + 'run_counts': run_counts, + 'pop_num': pop_num, + 'cross_prob': cross_prob, + 'mut_prob': mut_prob, + 'save_freq': save_freq + }, + ) + show_ga_result( + deap_db_dir, + warmup_length=warmup_length, + basin_id=basin_id, + the_data=data_train[:, i: i + 1, :], + the_period=data_info_train["time"], + basin_area=basin_area, + model_info={ + 'name': model_name, + 'source_type': source_type, + 'source_book': source_book + }, + train_mode=True, + ) + show_ga_result( + deap_db_dir, + warmup_length=warmup_length, + basin_id=basin_id, + the_data=data_test[:, i: i + 1, :], + the_period=data_info_test["time"], + basin_area=basin_area, + model_info={ + 'name': model_name, + 'source_type': source_type, + 'source_book': source_book + }, + train_mode=False, + ) + else: + raise NotImplementedError( + "We don't provide this calibrate method! Choose from 'SCE_UA' or 'GA'!" + ) + summarize_parameters(save_dir, model_info={ + 'name': model_name, + 'source_type': source_type, + 'source_book': source_book + }) + renormalize_params(save_dir, model_info={ + 'name': model_name, + 'source_type': source_type, + 'source_book': source_book + }) + summarize_metrics(save_dir, model_info={ + 'name': model_name, + 'source_type': source_type, + 'source_book': source_book + }) + save_streamflow(save_dir, model_info={ + 'name': model_name, + 'source_type': source_type, + 'source_book': source_book, + }, fold=fold) + print(f"Finish calibrating the {fold}-th fold") diff --git a/test/test_xgb_find_abnormal.py b/test/test_xgb_find_abnormal.py new file mode 100644 index 0000000..c8f5d2e --- /dev/null +++ b/test/test_xgb_find_abnormal.py @@ -0,0 +1,61 @@ +import datetime +import math +import os + +import joblib as jl +import matplotlib.pyplot as plt # noqa:401 +import numpy as np +import pandas as pd +import xarray as xr +from sklearn import metrics +from sklearn.model_selection import train_test_split +from sklearn.tree import DecisionTreeRegressor + +import definitions + + +def test_dt_find_abnormal_otq(): + era_path = os.path.join(definitions.ROOT_DIR, 'example/era5_xaj/') + dt_reg = DecisionTreeRegressor() + date_x = (pd.date_range('2018-1-1 00:00:00', '2022-12-31 23:00:00', freq='D') - + pd.to_datetime('2000-01-01 00:00:00'))/np.timedelta64(1, 'D') + rain_y = np.array([]) + for year in range(2018, 2023): + for month in range(1, 13): + if month < 10: + path_era_file = os.path.join(era_path, 'era5_datas_' + str(year) + str(0) + str(month) + '.nc') + else: + path_era_file = os.path.join(era_path, 'era5_datas_' + str(year) + str(month) + '.nc') + era_ds = xr.open_dataset(path_era_file) + # sro在era5land数据中代表地表径流, 也是累积型数据 + month_rain = era_ds.sel(longitude=122.5, latitude=39.8)['sro'] + month_rain_daily = month_rain.loc[month_rain.time.dt.time == datetime.time(0, 0)] + rain_y = np.append(rain_y, month_rain_daily.to_numpy()*1.21e8/86400) + X_train, X_test, y_train, y_test = train_test_split(date_x, rain_y, test_size=0.3) + dt_path = os.path.join(definitions.ROOT_DIR, 'example/dt_reg_test') + if os.path.exists(dt_path): + dt_reg = jl.load(dt_path) + else: + dt_reg.fit(X=np.expand_dims(X_train, 1), y=np.expand_dims(y_train, 1)) + jl.dump(dt_reg, dt_path) + pred_era5 = dt_reg.predict(np.expand_dims(X_test, 1)) + r2_era5 = metrics.r2_score(pred_era5, y_test) + rmse_era5 = math.sqrt(metrics.mean_squared_error(pred_era5, y_test)) + print(r2_era5, rmse_era5) + biliu_flow_df = pd.read_csv(os.path.join(definitions.ROOT_DIR, 'example/biliuriver_rsvr.csv'), + engine='c', parse_dates=['TM']) + predict_range = (biliu_flow_df['TM'][(~biliu_flow_df['OTQ'].isna()) & (biliu_flow_df['TM'] > pd.to_datetime('2018-01-01 00:00:00'))] + - pd.to_datetime('2000-01-01 08:00:00'))/np.timedelta64(1, 'D') + pred_y = dt_reg.predict(np.expand_dims(predict_range, 1)) + obs_y = biliu_flow_df['OTQ'][~biliu_flow_df['OTQ'].isna()].to_numpy() + rmse_array = [math.sqrt(metrics.mean_squared_error(pred_y[i:i+10], obs_y[i:i+10])) for i in range(0, len(pred_y)-10)] + plt.plot(rmse_array) + plt.xlabel('slice') + plt.ylabel('rmse') + plt.show() + print(rmse_array) + normal_rmse_array = np.argwhere(np.array(rmse_array) <= 15) + print(normal_rmse_array) + + +