hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit f36430e38a6714f7d563425bf4a9b280c4e40f52
parent b0787fa64b41c3c5da99b378aa6926031f5af446
Author: Drew DeVault <sir@cmpwn.com>
Date:   Sat, 13 Mar 2021 16:28:36 -0500

Merge remote-tracking branch 'stdlib/master'

Diffstat:
M.build.yml | 1-
ACOPYING | 373+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AREADME.md | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aascii/ctype.ha | 102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aascii/strcmp.ha | 36++++++++++++++++++++++++++++++++++++
Abufio/buffered.ha | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abufio/dynamic.ha | 190+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abufio/fixed.ha | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abytes/contains.ha | 5+++++
Abytes/copy.ha | 15+++++++++++++++
Abytes/equal.ha | 24++++++++++++++++++++++++
Abytes/index.ha | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abytes/reverse.ha | 25+++++++++++++++++++++++++
Abytes/tokenize.ha | 163+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrypto/math/bits.ha | 22++++++++++++++++++++++
Acrypto/random/+linux.ha | 40++++++++++++++++++++++++++++++++++++++++
Acrypto/random/random.ha | 42++++++++++++++++++++++++++++++++++++++++++
Acrypto/sha256/+test.ha | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrypto/sha256/sha256.ha | 211+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adirs/xdg.ha | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aencoding/hex/hex.ha | 25+++++++++++++++++++++++++
Aencoding/utf8/decode.ha | 151++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aencoding/utf8/encode.ha | 41+++++++++++++++++++++++++++++++++++++++++
Aencoding/utf8/rune.ha | 27+++++++++++++++++++++++++++
Aendian/big.ha | 31+++++++++++++++++++++++++++++++
Aendian/endian.ha | 17+++++++++++++++++
Aendian/host+aarch64.ha | 2++
Aendian/host+x86_64.ha | 2++
Aendian/little.ha | 31+++++++++++++++++++++++++++++++
Afmt/fmt.ha | 432+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aformat/elf/types.ha | 446+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afs/fs.ha | 159+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afs/types.ha | 227+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afs/util.ha | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agetopt/getopts.ha | 307+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/ast/types.ha | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/ast/unparse.ha | 21+++++++++++++++++++++
Ahare/lex/+test.ha | 262+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/lex/lex.ha | 518+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/lex/token.ha | 299+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/module/context.ha | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/module/scan.ha | 290+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/module/types.ha | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/parse/+test.ha | 167+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/parse/parse.ha | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahare/parse/types.ha | 16++++++++++++++++
Ahare/parse/util.ha | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahash/fnv/fnv.ha | 183+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahash/hash.ha | 46++++++++++++++++++++++++++++++++++++++++++++++
Aio/+test/copy.ha | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aio/+test/limit.ha | 36++++++++++++++++++++++++++++++++++++
Aio/+test/stream.ha | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Aio/+test/strings.ha | 44++++++++++++++++++++++++++++++++++++++++++++
Aio/arch+aarch64.ha | 1+
Aio/arch+x86_64.ha | 1+
Aio/copy.ha | 28++++++++++++++++++++++++++++
Aio/limit.ha | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aio/println.ha | 24++++++++++++++++++++++++
Aio/stream.ha | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aio/strings.ha | 34++++++++++++++++++++++++++++++++++
Aio/tee.ha | 34++++++++++++++++++++++++++++++++++
Aio/types.ha | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amath/random/random.ha | 35+++++++++++++++++++++++++++++++++++
Aos/+linux/dirfdfs.ha | 356+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aos/+linux/environ.ha | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aos/+linux/errors.ha | 16++++++++++++++++
Aos/+linux/exit.ha | 4++++
Aos/+linux/fdstream.ha | 131+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aos/+linux/fs.ha | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Aos/+linux/open.ha | 27+++++++++++++++++++++++++++
Aos/+linux/stdfd.ha | 11+++++++++++
Aos/exec/+linux.ha | 24++++++++++++++++++++++++
Aos/exec/cmd+linux.ha | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aos/exec/cmd.ha | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aos/exec/process+linux.ha | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aos/exec/types.ha | 27+++++++++++++++++++++++++++
Aos/fs.ha | 46++++++++++++++++++++++++++++++++++++++++++++++
Aos/stdfd.ha | 10++++++++++
Apath/+linux.ha | 2++
Apath/iter.ha | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apath/join.ha | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apath/names.ha | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apath/util.ha | 10++++++++++
Art/+aarch64/jmp.ha | 1+
Art/+aarch64/longjmp.s | 20++++++++++++++++++++
Art/+aarch64/restore.s | 11+++++++++++
Art/+aarch64/setjmp.s | 18++++++++++++++++++
Art/+linux/+aarch64.ha | 22++++++++++++++++++++++
Art/+linux/+x86_64.ha | 22++++++++++++++++++++++
Art/+linux/abort.ha | 25+++++++++++++++++++++++++
Art/+linux/env.ha | 3+++
Art/+linux/errno.ha | 423+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/+linux/segmalloc.ha | 26++++++++++++++++++++++++++
Art/+linux/signal.ha | 44++++++++++++++++++++++++++++++++++++++++++++
Art/+linux/start+aarch64.s | 8++++++++
Art/+linux/start+x86_64.s | 6++++++
Art/+linux/start.ha | 7+++++++
Art/+linux/stat.ha | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/+linux/syscall+aarch64.s | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/+linux/syscall+x86_64.s | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/+linux/syscallno+aarch64.ha | 293+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/+linux/syscallno+x86_64.ha | 347+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/+linux/syscalls.ha | 363+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/+linux/types.ha | 485+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/+test/+linux.ha | 13+++++++++++++
Art/+test/abort.ha | 16++++++++++++++++
Art/+test/cstring.ha | 16++++++++++++++++
Art/+test/start.ha | 124+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/+test/ztos.ha | 38++++++++++++++++++++++++++++++++++++++
Art/+x86_64/jmp.ha | 1+
Art/+x86_64/longjmp.s | 15+++++++++++++++
Art/+x86_64/restore.s | 11+++++++++++
Art/+x86_64/setjmp.s | 16++++++++++++++++
Art/ensure.ha | 36++++++++++++++++++++++++++++++++++++
Art/hare.sc | 36++++++++++++++++++++++++++++++++++++
Art/jmp.ha | 9+++++++++
Art/malloc.ha | 163+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Art/memcpy.ha | 6++++++
Art/memset.ha | 6++++++
Art/start.ha | 24++++++++++++++++++++++++
Art/strcmp.ha | 18++++++++++++++++++
Aslice/reverse.ha | 22++++++++++++++++++++++
Asort/+test.ha | 17+++++++++++++++++
Asort/search.ha | 26++++++++++++++++++++++++++
Astrconv/+test/stoi.ha | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrconv/+test/stou.ha | 41+++++++++++++++++++++++++++++++++++++++++
Astrconv/itos.ha | 107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrconv/numeric.ha | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrconv/stoi.ha | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrconv/stou.ha | 149+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrconv/types.ha | 22++++++++++++++++++++++
Astrconv/utos.ha | 135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrings/concat.ha | 36++++++++++++++++++++++++++++++++++++
Astrings/contains.ha | 15+++++++++++++++
Astrings/cstrings.ha | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Astrings/dup.ha | 41+++++++++++++++++++++++++++++++++++++++++
Astrings/index.ha | 25+++++++++++++++++++++++++
Astrings/iter.ha | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrings/sub.ha | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Astrings/suffix.ha | 48++++++++++++++++++++++++++++++++++++++++++++++++
Astrings/tokenize.ha | 120+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrings/utf8.ha | 42++++++++++++++++++++++++++++++++++++++++++
Astrio/dynamic.ha | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrio/fixed.ha | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrio/ops.ha | 110+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atemp/+linux.ha | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atypes/arch+aarch64.ha | 23+++++++++++++++++++++++
Atypes/arch+x86_64.ha | 23+++++++++++++++++++++++
Atypes/classes.ha | 42++++++++++++++++++++++++++++++++++++++++++
Atypes/limits.ha | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
150 files changed, 12481 insertions(+), 1 deletion(-)

diff --git a/.build.yml b/.build.yml @@ -18,7 +18,6 @@ tasks: - hare: | cd hare cp config.example.mk config.mk - ln -s ../stdlib ./ make - tests: | cd hare diff --git a/COPYING b/COPYING @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/README.md b/README.md @@ -0,0 +1,71 @@ +# The Hare standard library [![builds.sr.ht status](https://builds.sr.ht/~sircmpwn/stdlib/commits.svg)](https://builds.sr.ht/~sircmpwn/stdlib/commits?) + +This is the standard library for the [Hare](https://harelang.org) programming +language. + +## Hare stdlib mandate + +The Hare standard library shall provide: + +1. Useful features to complement Hare language features +2. An interface to the host operating system +3. Implementations of broadly useful algorithms +4. Implementations of broadly useful formats and protocols +5. Introspective meta-features for Hare-aware programs + +Each of these services shall: + +1. Have a concise and straightforward interface +2. Correctly and completely implement the useful subset of the required behavior* +3. Provide complete documentation for each exported symbol +4. Be sufficiently tested to provide confidence in the implementation + +\* This means read the RFC before you start writing the code + +Some examples of on-topic features include: + +### Language features + +- Memory allocation +- High-level string manipulation (e.g. concat, replace, split) +- High-level slice manipulation (e.g. sort) +- Test harness and testing support code + +### Introspection + +- Hare lexing, parsing (and unparsing), and type checking +- ELF, DWARF +- Stack unwinding + +### Operating system interface + +- I/O support +- Filesystem access +- Sockets + +### Useful algorithms + +- Sorting, searching +- Cryptography +- Hashing +- Compression +- Date & time support +- Regex + +### Useful formats & protocols + +- Internet protocol suite +- JSON, XML, INI, HTML +- tar, zip, cpio +- MIME, RFC 2822 + +## Conventions + +See also the [Hare style guide](https://harelang.org/style/) + +1. Tagged unions should be written from most to least common case, which + generally puts the error cases last. +2. Prefer to design APIs which avoid allocation if it can be done without being + at the expense of good API design. +3. Whatever the semantics, document the allocation and lifetime behaviors and + expectations of each function to which they apply. diff --git a/ascii/ctype.ha b/ascii/ctype.ha @@ -0,0 +1,102 @@ +def U: u8 = 0o1; +def L: u8 = 0o2; +def N: u8 = 0o4; +def S: u8 = 0o10; +def P: u8 = 0o20; +def C: u8 = 0o40; +def B: u8 = 0o100; +def X: u8 = 0o200; + +// LUT of bitfields with character attributes +const cclass: []u8 = [ +// 0 1 2 3 4 5 6 7 + C, C, C, C, C, C, C, C, // 0 + C, S|C, S|C, S|C, S|C, S|C, C, C, // 10 + C, C, C, C, C, C, C, C, // 20 + C, C, C, C, C, C, C, C, // 30 + S|B, P, P, P, P, P, P, P, // 40 + P, P, P, P, P, P, P, P, // 50 + N|X, N|X, N|X, N|X, N|X, N|X, N|X, N|X, // 60 + N|X, N|X, P, P, P, P, P, P, // 70 + P, U|X, U|X, U|X, U|X, U|X, U|X, U, // 100 + U, U, U, U, U, U, U, U, // 110 + U, U, U, U, U, U, U, U, // 120 + U, U, U, P, P, P, P, P, // 130 + P, L|X, L|X, L|X, L|X, L|X, L|X, L, // 140 + L, L, L, L, L, L, L, L, // 150 + L, L, L, L, L, L, L, L, // 160 + L, L, L, P, P, P, P, C, // 170 +]; + +// True if an ASCII character is a letter +export fn isalpha(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&(U|L) > 0; + +// True if an ASCII character is uppercase +export fn isupper(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&U > 0; + +// True if an ASCII character is lowercase +export fn islower(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&L > 0; + +// True if an ASCII character is a digit +export fn isdigit(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&N > 0; + +// True if an ASCII character is a hexadecimal digit +export fn isxdigit(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&X > 0; + +// True if an ASCII character is a space. +export fn isspace(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&S > 0; + +// True if an ASCII character is punctuation. +export fn ispunct(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&P > 0; + +// True if an ASCII character is alphanumeric. +export fn isalnum(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&(U|L|N) > 0; + +// True if an ASCII character is printable. +export fn isprint(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&(P|U|L|N|B) > 0; + +// True if an ASCII character is any printable character other than space. +export fn isgraph(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&(P|U|L|N) > 0; + +// True if an ASCII character is a control character. +export fn iscntrl(c: rune) bool = + if (!isascii(c)) false else cclass[c: u32]&C > 0; + +// True if a rune is a valid ASCII character. +export fn isascii(c: rune) bool = c: u32 <= 0o177; + +// Returns the uppercase form of an ASCII character, or the original character +// if it was not a lowercase letter. +export fn toupper(c: rune) rune = { + return if (islower(c)) { + (c: u32 - ('a': u32) + ('A': u32)): rune; + } else c; +}; + +// Returns the lowercase form of an ASCII character, or the original character +// if it was not an uppercase letter. +export fn tolower(c: rune) rune = { + return if (isupper(c)) { + (c: u32 - ('A': u32) + ('a': u32)): rune; + } else c; +}; + +@test fn ctype() void = { + // Just some simple tests + assert(isspace(' ') && !isspace('x') && !isspace('こ')); + assert(isalnum('a') && isalnum('8') && !isalnum('こ')); + assert(!ispunct('\0') && iscntrl('\b')); + assert(isascii('a') && isascii('\0') && isascii('\x7F')); + assert(!isascii('\x80') && !isascii('こ')); + assert(tolower('A') == 'a' && tolower('こ') == 'こ'); +}; diff --git a/ascii/strcmp.ha b/ascii/strcmp.ha @@ -0,0 +1,36 @@ +use strings; + +// Compares two strings by their ASCII sort order. If either string is not +// entirely composed of ASCII characters, void is returned. Otherwise, zero is +// returned if the strings are equal, a negative value if a is less than b, or a +// positive value if a is greater than b. +export fn strcmp(a: str, b: str) (int | void) = { + let a = strings::iter(a), b = strings::iter(b); + for (true) { + let ra = match (strings::next(&a)) { + void => return match (strings::next(&b)) { + void => 0, + rune => -1, + }, + r: rune => r, + }; + let rb = match (strings::next(&b)) { + void => return 1, + r: rune => r, + }; + if (!isascii(ra) || !isascii(rb)) { + return; + }; + if (ra != rb) { + return ra: u32: int - rb: u32: int; + }; + }; +}; + +@test fn strcmp() void = { + assert(strcmp("ABC", "ABC") as int == 0); + assert(strcmp("ABC", "AB") as int == 1); + assert(strcmp("AB", "ABC") as int == -1); + assert(strcmp("BCD", "ABC") as int == 1); + assert(strcmp("ABC", "こんにちは") is void); +}; diff --git a/bufio/buffered.ha b/bufio/buffered.ha @@ -0,0 +1,85 @@ +use io; + +type bufstream = struct { + stream: io::stream, + source: *io::stream, + rbuffer: []u8, + wbuffer: []u8, + rfilled: []u8, + wfilled: []u8, + flush: []u8, +}; + +// Creates a stream which buffers reads and writes for the underlying stream. +// This is generally used to improve performance of small reads/writes for +// sources where I/O operations are costly, such as if they invoke a syscall or +// take place over the network. +// +// The caller should supply one or both of a read and write buffer as a slice of +// the desired buffer slice, or empty slices if read or write functionality is +// disabled (in which case the 'mode' argument must be set accordingly). +// +// When the buffered stream is closed, the underlying stream is also closed. The +// provided buffers are not freed. +export fn buffered( + src: *io::stream, + rbuf: []u8, + wbuf: []u8, + mode: io::mode, +) *io::stream = { + let s = alloc(bufstream { + stream = io::stream { + name = src.name, + closer = &buffered_close, + }, + source = src, + rbuffer = rbuf, + wbuffer = wbuf, + rfilled = rbuf[..0], + wfilled = wbuf[..0], + flush = ['\n': u32: u8], + }): *io::stream; + if (mode & io::mode::READ == io::mode::READ) { + assert(len(rbuf) != 0); + s.reader = &buffered_read; + }; + if (mode & io::mode::WRITE == io::mode::WRITE) { + assert(len(wbuf) != 0); + s.writer = &buffered_write; + }; + return s; +}; + +// Flushes pending writes to the underlying stream. +export fn flush(s: *io::stream) void = { + assert(s.closer == &buffered_close, + "bufio::flushed used on non-buffered stream"); + let s = s: *bufstream; + abort(); // TODO +}; + +// Sets the list of bytes which will cause the stream to flush when written. By +// default, the stream will flush when a newline (\n) is written. +export fn set_flush_bytes(s: *io::stream, b: []u8) void = { + assert(s.closer == &buffered_close, + "bufio::set_flush_bytes used on non-buffered stream"); + let s = s: *bufstream; + s.flush = b; +}; + +fn buffered_close(s: *io::stream) void = { + assert(s.closer == &buffered_close); + let s = s: *bufstream; + io::close(s.source); + free(s); +}; + +fn buffered_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { + assert(s.reader == &buffered_read); + return io::unsupported; // TODO +}; + +fn buffered_write(s: *io::stream, buf: const []u8) (size | io::error) = { + assert(s.writer == &buffered_write); + return io::unsupported; // TODO +}; diff --git a/bufio/dynamic.ha b/bufio/dynamic.ha @@ -0,0 +1,190 @@ +use bytes; +use io; + +type dynamic_stream = struct { + stream: io::stream, + buf: []u8, + pos: size, +}; + +// Creates an [io::stream] which dynamically allocates a buffer to store writes +// into. Subsequent reads will consume the buffered data. Upon failure to +// allocate sufficient memory to store writes, the program aborts. +// +// Calling [io::close] on this stream will free the buffer. Call [bufio::finish] +// instead to free up resources associated with the stream, but transfer +// ownership of the buffer to the caller. +export fn dynamic(mode: io::mode) *io::stream = dynamic_from([], mode); + +// Like [dynamic], but takes an existing slice as input. Writes are appended to +// it and reads consume bytes from the initial buffer, plus any additional +// writes. Like [dynamic], calling [io::close] will free the buffer, and +// [bufio::finish] can be used to return ownership of the buffer to the caller. +export fn dynamic_from(in: []u8, mode: io::mode) *io::stream = { + let s = alloc(dynamic_stream { + stream = io::stream { + closer = &dynamic_close, + seeker = &dynamic_seek, + ... + }, + buf = in, + pos = 0, + }): *io::stream; + if (mode & io::mode::READ == io::mode::READ) { + s.reader = &dynamic_read; + }; + if (mode & io::mode::WRITE == io::mode::WRITE) { + s.writer = &dynamic_write; + }; + return s; +}; + +fn dynamic_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let s = s: *dynamic_stream; + if (s.pos == len(s.buf)) { + append(s.buf, ...buf); + } else { + // TODO: update this after we add insert + let new: []u8 = alloc([], len(s.buf) + len(buf)); + new[..s.pos] = s.buf[..s.pos]; + new[s.pos..s.pos + len(buf)] = buf[..]; + new[s.pos + len(buf)..] = s.buf[s.pos..]; + free(s.buf); + s.buf = new; + }; + + s.pos += len(buf); + return len(buf); +}; + +fn dynamic_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { + let s = s: *dynamic_stream; + if (len(s.buf) == s.pos && len(buf) != 0) { + return io::EOF; + }; + const n = if (len(s.buf) - s.pos < len(buf)) { + len(s.buf) - s.pos; + } else { + len(buf); + }; + buf[..n] = s.buf[s.pos..s.pos + n]; + s.pos += n; + return n; +}; + +fn dynamic_seek( + s: *io::stream, + off: io::off, + w: io::whence +) (io::off | io::error) = { + let stream = s: *dynamic_stream; + switch (w) { + io::whence::SET => { + if (len(stream.buf) < off: size) { + abort("invalid offset"); + }; + stream.pos = off: size; + }, + io::whence::CUR => { + if (stream.pos + off: size > len(stream.buf)) { + abort("invalid offset"); + }; + stream.pos += off: size; + }, + io::whence::END => { + if (len(stream.buf) - (-off): size < len(stream.buf)) { + abort("invalid offset"); + }; + stream.pos = len(stream.buf) - (-off): size; + }, + }; + return stream.pos: io::off; +}; + +fn dynamic_close(s: *io::stream) void = { + const s = s: *dynamic_stream; + free(s.buf); + free(s); +}; + +// Closes the stream without freeing the buffer, instead transferring ownership +// of it to the caller. +export fn finish(s: *io::stream) []u8 = { + if (s.closer != &dynamic_close) { + abort("bufio::finish called on non-bufio stream"); + }; + let s = s: *dynamic_stream; + let buf = s.buf; + free(s); + return buf; +}; + +// Returns the current buffer. +export fn buffer(s: *io::stream) []u8 = { + if (s.closer != &dynamic_close) { + abort("bufio::buffer called on non-bufio stream"); + }; + let s = s: *dynamic_stream; + return s.buf; +}; + +// Resets the buffer's length to zero, but keeps the allocated memory around for +// future writes. +export fn reset(s: *io::stream) void = { + if (s.closer != &dynamic_close) { + abort("bufio::reset called on non-bufio stream"); + }; + const s = s: *dynamic_stream; + s.pos = 0; + s.buf = s.buf[..0]; +}; + +// Truncates the buffer, freeing memory associated with it and setting its +// length to zero. +export fn truncate(s: *io::stream) (void | io::unsupported) = { + if (s.closer != &dynamic_close) { + return io::unsupported; + }; + let s = s: *dynamic_stream; + s.pos = 0; + delete(s.buf[..]); +}; + +@test fn dynamic() void = { + // TODO: slice/array equality + let s = dynamic(io::mode::RDWR); + assert(io::write(s, [1, 2, 3]) as size == 3); + assert(bytes::equal(buffer(s), [1, 2, 3])); + assert(io::write(s, [4, 5]) as size == 2); + assert(bytes::equal(buffer(s), [1, 2, 3, 4, 5])); + let buf: [2]u8 = [0...]; + assert(io::seek(s, 0, io::whence::SET) as io::off == 0: io::off); + assert(io::read(s, buf[..]) as size == 2 && bytes::equal(buf, [1, 2])); + assert(io::read(s, buf[..]) as size == 2 && bytes::equal(buf, [3, 4])); + assert(io::read(s, buf[..]) as size == 1 && buf[0] == 5); + assert(io::read(s, buf[..]) is io::EOF); + assert(io::write(s, [6, 7, 8]) as size == 3); + assert(bytes::equal(buffer(s), [1, 2, 3, 4, 5, 6, 7, 8])); + reset(s); + assert(len(buffer(s)) == 0); + assert(io::write(s, [1, 2, 3]) as size == 3); + assert(truncate(s) is void); + assert(len(buffer(s)) == 0); + + let sl: []u8 = alloc([1, 2, 3]); + let s = dynamic_from(sl, io::mode::WRITE); + assert(io::seek(s, 0, io::whence::END) as io::off == 3: io::off); + assert(io::write(s, [4, 5, 6]) as size == 3); + assert(bytes::equal(buffer(s), [1, 2, 3, 4, 5, 6])); + // TODO: this should check for io::unsupported (harec bug prevents that) + assert(io::read(s, buf[..]) is io::error); + io::close(s); + + sl = alloc([1, 2]); + let s = dynamic_from(sl, io::mode::READ); + assert(io::read(s, buf[..1]) as size == 1 && buf[0] == 1); + assert(io::seek(s, 1, io::whence::CUR) as io::off == 2: io::off); + assert(io::read(s, buf[..]) is io::EOF); + // TODO: this should check for io::unsupported (harec bug prevents that) + assert(io::write(s, [1, 2]) is io::error); +}; diff --git a/bufio/fixed.ha b/bufio/fixed.ha @@ -0,0 +1,62 @@ +use bytes; +use io; +use strings; + +type fixed_stream = struct { + stream: io::stream, + buf: []u8, +}; + +// Creates an [io::stream] for a fixed, caller-supplied buffer. Supports either +// read or write, but not both. The program aborts if writes would exceed the +// buffer's capacity. +export fn fixed(in: []u8, mode: io::mode) *io::stream = { + let s = alloc(fixed_stream { + stream = io::stream { + name = "<bufio::fixed>", + ... + }, + buf = in, + }); + if (mode & io::mode::READ == io::mode::READ) { + assert(mode & io::mode::WRITE != io::mode::WRITE); + s.stream.reader = &fixed_read; + }; + if (mode & io::mode::WRITE == io::mode::WRITE) { + assert(mode & io::mode::READ != io::mode::READ); + s.stream.writer = &fixed_write; + }; + return &s.stream; +}; + +fn fixed_read(s: *io::stream, buf: []u8) (size | io::error | io::EOF) = { + let stream = s: *fixed_stream; + if (len(stream.buf) == 0) { + return io::EOF; + }; + const n = if (len(buf) > len(stream.buf)) len(stream.buf) else len(buf); + buf[..n] = stream.buf[..n]; + stream.buf = stream.buf[n..]; + return n; +}; + +fn fixed_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let stream = s: *fixed_stream; + if (len(stream.buf) == 0) { + abort("bufio::fixed buffer exceeded"); + }; + const n = if (len(buf) > len(stream.buf)) len(stream.buf) else len(buf); + stream.buf[..n] = buf[..n]; + stream.buf = stream.buf[n..]; + return n; +}; + +@test fn fixed() void = { + // TODO: add a read test too + static let buf: [1024]u8 = [0...]; + let stream = fixed(buf, io::mode::WRITE); + let n = 0z; + n += io::write(stream, strings::to_utf8("hello ")) as size; + n += io::write(stream, strings::to_utf8("world")) as size; + assert(bytes::equal(buf[..n], strings::to_utf8("hello world"))); +}; diff --git a/bytes/contains.ha b/bytes/contains.ha @@ -0,0 +1,5 @@ +// Returns true if a byte slice contains a byte or a sequence of bytes. +export fn contains(haystack: []u8, needle: (u8 | []u8)) bool = match (needle) { + b: u8 => !(index_byte(haystack, b) is void), + b: []u8 => !(index_slice(haystack, b) is void), +}; diff --git a/bytes/copy.ha b/bytes/copy.ha @@ -0,0 +1,15 @@ +// Copies bytes from src to dest. dest must have the same length as src. +export fn copy(dest: []u8, src: []u8) void = { + assert(len(dest) == len(src), + "Destination slice must have same length as source slice"); + for (let i = 0z; i < len(dest); i += 1) { + dest[i] = src[i]; + }; +}; + +@test fn copy() void = { + let a: [4]u8 = [1, 3, 3, 7]; + let b: [4]u8 = [0...]; + copy(b[..], a[..]); + assert(equal(a, b)); +}; diff --git a/bytes/equal.ha b/bytes/equal.ha @@ -0,0 +1,24 @@ +// Returns true if the two byte sequences are identical. +export fn equal(a: []u8, b: []u8) bool = { + if (len(a) != len(b)) { + return false; + }; + for (let i = 0z; i < len(a); i += 1) { + if (a[i] != b[i]) { + return false; + }; + }; + return true; +}; + +@test fn equal() void = { + let a: []u8 = [1, 2, 3]; + let b: []u8 = [1, 2, 3]; + let c: []u8 = [1, 4, 5]; + let d: []u8 = [1, 2, 3, 4]; + let e: []u8 = [1, 2]; + assert(equal(a, b)); + assert(!equal(a, c)); + assert(!equal(a, d)); + assert(!equal(a, e)); +}; diff --git a/bytes/index.ha b/bytes/index.ha @@ -0,0 +1,108 @@ +// Returns the offset of the first instance of 'needle' in a 'haystack' of +// bytes, or void if it is not found. +export fn index(haystack: []u8, needle: (u8 | []u8)) (size | void) = { + return match (needle) { + b: u8 => index_byte(haystack, b), + b: []u8 => index_slice(haystack, b), + }; +}; + +fn index_byte(haystack: []u8, needle: u8) (size | void) = { + for (let i = 0z; i < len(haystack); i += 1) { + if (haystack[i] == needle) { + return i; + }; + }; +}; + +fn index_slice(haystack: []u8, needle: []u8) (size | void) = { + for (let i = 0z; i + len(needle) <= len(haystack); i += 1) { + if (equal(haystack[i..i + len(needle)], needle)) { + return i; + }; + }; +}; + + +// Returns the offset of the last instance of 'needle' in a 'haystack' of +// bytes, or void if it is not found. +export fn rindex(haystack: []u8, needle: (u8 | []u8)) (size | void) = { + return match (needle) { + b: u8 => rindex_byte(haystack, b), + b: []u8 => rindex_slice(haystack, b), + }; +}; + +fn rindex_byte(haystack: []u8, needle: u8) (size | void) = { + for (let i = len(haystack); i > 0; i -= 1) { + if (haystack[i - 1] == needle) { + return i - 1; + }; + }; +}; + +fn rindex_slice(haystack: []u8, needle: []u8) (size | void) = { + for (let i = 0z; i + len(needle) <= len(haystack); i += 1) { + let r = len(haystack) - i; + if (equal(haystack[r - len(needle)..r], needle)) { + return r - len(needle); + }; + }; +}; + + +@test fn index() void = { + // Bytes + const a: [4]u8 = [1, 3, 3, 7]; + match (index(a, 7)) { + n: size => assert(n == 3), + void => abort(), + }; + match (index(a, 42)) { + size => abort(), + void => void, + }; + match (index([], 42)) { + size => abort(), + void => void, + }; + + match (rindex(a, 3)) { + n: size => assert(n == 2), + void => abort(), + }; + match (rindex(a, 42)) { + n: size => abort(), + void => void, + }; + match (rindex([], 42)) { + size => abort(), + void => void, + }; + + + // Slices + match (index(a, [3, 3])) { + n: size => assert(n == 1), + void => abort(), + }; + match (index(a, [])) { + n: size => assert(n == 0), + void => abort(), + }; + match(index(a, [4, 2])) { + size => abort(), + void => void, + }; + + const special: []u8 = [1, 1, 1, 2]; + match (index(special, [1, 1, 2])) { + n: size => assert(n == 1), + void => abort(), + }; + + match (rindex(special, [1, 1])) { + n: size => assert(n == 1), + void => abort(), + }; +}; diff --git a/bytes/reverse.ha b/bytes/reverse.ha @@ -0,0 +1,25 @@ +// Reverses a slice of bytes. +export fn reverse(b: []u8) void = { + if (len(b) == 0) { + return; + }; + for (let s = 0z, e = len(b) - 1; s < e) { + let x = b[s]; + b[s] = b[e]; + b[e] = x; + s += 1; + e -= 1; + }; +}; + +@test fn reverse() void = { + let a: [4]u8 = [1, 3, 3, 7]; + reverse(a); + assert(equal(a, [7, 3, 3, 1])); + let b: [5]u8 = [1, 2, 3, 4, 5]; + reverse(b); + assert(equal(b, [5, 4, 3, 2, 1])); + let c: []u8 = []; + reverse(c); + assert(equal(c, [])); +}; diff --git a/bytes/tokenize.ha b/bytes/tokenize.ha @@ -0,0 +1,163 @@ +use types; + +// The state for a tokenizer. +export type tokenizer = struct { s: []u8, d: []u8, p: size }; + +// Returns a tokenizer which yields sub-slices tokenized by a delimiter. +// Caller should ensure delim is not an empty slice +export fn tokenize(s: []u8, delim: []u8) tokenizer = { + assert(len(delim) > 0); + if (len(s) == 0) { + delim = []; + }; + return tokenizer { + s = s, + d = delim, + p = types::SIZE_MAX, + }; +}; + +// Returns the next slice from a tokenizer, and advances the cursor. Returns +// void if there are no tokens left and on all subsequent invocations. If a +// string starts with, or ends with, a token, an empty slice is returned at the +// beginning or end of the sequence, respectively. +export fn next_token(s: *tokenizer) ([]u8 | void) = match (peek_token(s)) { + b: []u8 => { + if (s.p == len(s.s)) { + s.d = s.d[..0]; + s.s = s.s[..0]; + } else { + s.s = s.s[s.p + len(s.d)..]; + }; + s.p = types::SIZE_MAX; + return b; + }, + void => void, +}; + +// Same as next_token(), but does not advance the cursor +export fn peek_token(s: *tokenizer) ([]u8 | void) = { + if (len(s.d) == 0) { + return; + }; + if (s.p > len(s.s)) { + s.p = match (index(s.s, s.d)) { + i: size => i, + void => len(s.s), + }; + }; + return s.s[..s.p]; +}; + + +// Returns the remainder of the slice associated with a tokenizer, without doing +// any further tokenization. +export fn remaining_tokens(s: *tokenizer) []u8 = { + return s.s; +}; + +@test fn tokenize() void = { + const input: [_]u8 = [1, 2, 24, 42, 3, 24, 24, 42, 4, 5]; + let t = tokenize(input, [24, 42]); + + let p = peek_token(&t) as []u8; + let n = next_token(&t) as []u8; + assert(equal(p, n)); + assert(equal([1, 2], n)); + + p = peek_token(&t) as []u8; + n = next_token(&t) as []u8; + assert(equal(p, n)); + assert(equal([3, 24], n)); + + assert(equal(peek_token(&t) as []u8, peek_token(&t) as []u8)); + match (next_token(&t)) { + b: []u8 => assert(equal([4, 5], b)), + void => abort(), + }; + + assert(peek_token(&t) is void); + assert(next_token(&t) is void); + + const input2: [_]u8 = [24, 42, 1, 24, 42]; + t = tokenize(input2, [24, 42]); + + assert(equal(peek_token(&t) as []u8, peek_token(&t) as []u8)); + match (next_token(&t)) { + b: []u8 => assert(equal([], b)), + void => abort(), + }; + + assert(equal(peek_token(&t) as []u8, peek_token(&t) as []u8)); + match (next_token(&t)) { + b: []u8 => assert(equal([1], b)), + void => abort(), + }; + + //assert(equal(peek_token(&t) as []u8, peek_token(&t) as []u8)); + //assert(false); + match (next_token(&t)) { + b: []u8 => assert(equal([], b)), + void => abort(), + }; + + assert(peek_token(&t) is void); + assert(next_token(&t) is void); + + const input3: [_]u8 = [1, 1, 1, 2, 1, 1, 2, 2]; + t = tokenize(input3, [1, 2]); + + match (next_token(&t)) { + b: []u8 => assert(equal([1, 1], b)), + void => abort(), + }; + + match (next_token(&t)) { + b: []u8 => assert(equal([1], b)), + void => abort(), + }; + + match (next_token(&t)) { + b: []u8 => assert(equal([2], b)), + void => abort(), + }; + + assert(next_token(&t) is void); + + const input4: [_]u8 = [1, 2]; + t = tokenize(input4, [1, 2]); + + match (next_token(&t)) { + b: []u8 => assert(equal([], b)), + void => abort(), + }; + + match (next_token(&t)) { + b: []u8 => assert(equal([], b)), + void => abort(), + }; + + assert(peek_token(&t) is void); + assert(next_token(&t) is void); + + const input5: [_]u8 = [24, 42, 1, 24, 42, 2, 3, 4]; + t = tokenize(input5, [24, 42]); + + match (next_token(&t)) { + b: []u8 => assert(equal([], b)), + void => abort(), + }; + + match (next_token(&t)) { + b: []u8 => assert(equal([1], b)), + void => abort(), + }; + + assert(equal(remaining_tokens(&t), [2, 3, 4])); + assert(equal(peek_token(&t) as []u8, [2, 3, 4])); + assert(equal(remaining_tokens(&t), [2, 3, 4])); + + t = tokenize([]: []u8, [42]); + assert(peek_token(&t) is void); + assert(next_token(&t) is void); +}; diff --git a/crypto/math/bits.ha b/crypto/math/bits.ha @@ -0,0 +1,22 @@ +// crypto::math provides constant-time mathematical operations useful for +// cryptographic algorithms. + +// Rotates a 32-bit unsigned integer left by k bits. k may be negative to rotate +// right instead, or see [rotr32]. +export fn rotl32(x: u32, k: int) u32 = { + const n = 32u32; + const s = k: u32 & (n - 1); + return x << s | x >> (n - s); +}; + +// Rotates a 32-bit unsigned integer right by k bits. k may be negative to +// rotate left instead, or see [rotl32]. +export fn rotr32(x: u32, k: int) u32 = rotl32(x, -k); + +@test fn lrot32() void = { + let a = 0b11110000111100001111000011110000u32; + assert(rotl32(a, 2) == 0b11000011110000111100001111000011u32); + assert(rotl32(a, -2) == 0b00111100001111000011110000111100u32); + assert(rotl32(a, 32) == 0b11110000111100001111000011110000u32); + assert(rotl32(a, 64) == 0b11110000111100001111000011110000u32); +}; diff --git a/crypto/random/+linux.ha b/crypto/random/+linux.ha @@ -0,0 +1,40 @@ +use rt; +use io; + +// Fills the given buffer with cryptographically random data. If the system is +// unable to provide random data, abort. If you need to handle errors or want to +// use whatever random data the system can provide, even if less than the +// requested amont, use [stream] instead. +export fn buffer(buf: []u8) void = { + let n = 0z; + for (n < len(buf)) { + match (rt::getrandom(buf[n..]: *[*]u8, len(buf), 0)) { + err: rt::errno => switch (err) { + rt::EINTR => void, + * => abort(), + }, + z: size => n += z, + }; + }; +}; + +fn rand_reader(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { + assert(s == stream); + return match (rt::getrandom(buf: *[*]u8, len(buf), 0)) { + err: rt::errno => errno_to_io(err), + n: size => n, + }; +}; + +fn io_errstr(data: *void) str = { + const errno = data: uintptr: int: rt::errno; + return rt::errstr(errno); +}; + +fn errno_to_io(err: rt::errno) io::error = { + let e = io::os_error { + string = &io_errstr, + data = err: uintptr: *void, + }; + return e: io::error; +}; diff --git a/crypto/random/random.ha b/crypto/random/random.ha @@ -0,0 +1,42 @@ +use io; +use rt; + +export let _stream: io::stream = io::stream { + name = "<random>", + reader = &rand_reader, + ... +}; + +// An [io::stream] which returns cryptographically random data on reads. Be +// aware, it may return less than you asked for! +export let stream: *io::stream = &_stream; + +@test fn buffer() void = { + let buf: [4096]u8 = [0...]; + buffer(buf[..]); + + let sum = 0z; + for (let i = 0z; i < len(buf); i += 1) { + sum += buf[i]; + }; + let avg = sum / len(buf); + assert(avg < 0xA0 && avg > 0x60); +}; + +@test fn reader() void = { + let buf: [4096]u8 = [0...]; + let test: []u8 = []; + match (io::read(stream, buf[..])) { + (io::error | io::EOF) => abort(), + n: size => test = buf[..n], + }; + + assert(len(test) > 0); + + let sum = 0z; + for (let i = 0z; i < len(test); i += 1) { + sum += test[i]; + }; + let avg = sum / len(test); + assert(avg < 0xA0 && avg > 0x60); +}; diff --git a/crypto/sha256/+test.ha b/crypto/sha256/+test.ha @@ -0,0 +1,66 @@ +use fmt; +use hash; +use io; +use strings; +use strio; + +@test fn sha256() void = { + let sha = sha256(); + defer hash::finish(sha); + + const vectors = [ + ("", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"), + ("abc", "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"), + ("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1"), + ("abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1"), + ("hello world", "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"), + ("Hare is a cool language", "3f6fe31611580448e33af475ce0e66c7d55a156c6ec43c794225cc3084e04635"), + ("'UNIX was not designed to stop its users from doing stupid things, as that would also stop them from doing clever things' - Doug Gwyn", "5cfa9eccaafa0a7d9d965e36b0a54cc1dd97dd1dff7e11d5e631bdea7f2ef328"), + ("'Life is too short to run proprietary software' - Bdale Garbee", "79ecc26605c1fa5156821c5da9ebc959d8a46050ee49f47da57bf9391a558ceb"), + ("'The central enemy of reliability is complexity.' - Geer et al", "80b2fd9ae9e9c2ccd801c923f5e3684d56c6b05edc2eb480634b0af10f9c810b"), + ("'A language that doesn’t have everything is actually easier to program in than some that do.' - Dennis Ritchie", "10ebb04c1ddd55528d0c8db05a1f5fad6c04ebc20cfc4a53308f9a05a90cc438"), + ]; + + for (let i = 0z; i < len(vectors); i += 1) { + const vector = vectors[i]; + hash::reset(sha); + hash::write(sha, strings::to_utf8(vector.0)); + let sum = hash::sum(sha); + defer free(sum); + + let hex = strio::dynamic(); + defer io::close(hex); + for (let i = 0z; i < SIZE; i += 1) { + fmt::fprintf(hex, "{:02x}", sum[i]); + }; + + if (strio::string(hex) != vector.1) { + fmt::errorfln("Vector {}: {} != {}", + i, strio::string(hex), vector.1); + abort(); + }; + }; + + // Uncomment this to run the 1G test vector (I promise it works): + + //const input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno"; + //const expected = "50e72a0e26442fe2552dc3938ac58658228c0cbfb1d2ca872ae435266fcd055e"; + //hash::reset(sha); + //for (let i = 0z; i < 16777216; i += 1) { + // hash::write(sha, strings::to_utf8(input)); + //}; + //let sum = hash::sum(sha); + //defer free(sum); + + //let hex = strio::dynamic(); + //defer io::close(hex); + //for (let i = 0z; i < SIZE; i += 1) { + // fmt::fprintf(hex, "{:02x}", sum[i]); + //}; + + //if (strio::string(hex) != expected) { + // fmt::errorfln("Biggo vector: {} != {}", + // strio::string(hex), expected); + // abort(); + //}; +}; diff --git a/crypto/sha256/sha256.ha b/crypto/sha256/sha256.ha @@ -0,0 +1,211 @@ +use crypto::math; +use endian; +use hash; +use io; + +// The size, in bytes, of a SHA-256 digest. +export def SIZE: size = 32; + +// Loosely based on the Go implementation +def chunk: size = 64; +def init0: u32 = 0x6A09E667; +def init1: u32 = 0xBB67AE85; +def init2: u32 = 0x3C6EF372; +def init3: u32 = 0xA54FF53A; +def init4: u32 = 0x510E527F; +def init5: u32 = 0x9B05688C; +def init6: u32 = 0x1F83D9AB; +def init7: u32 = 0x5BE0CD19; + +const k: [_]u32 = [ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +]; + +type state = struct { + hash: hash::hash, + h: [8]u32, + x: [chunk]u8, + nx: size, + ln: size, +}; + +export fn sha256() *hash::hash = { + let sha = alloc(state { + hash = hash::hash { + stream = io::stream { + writer = &write, + closer = &close, + ... + }, + sum = &sum, + reset = &reset, + sz = SIZE, + ... + }, + }); + let hash = &sha.hash; + hash::reset(hash); + return hash; +}; + +fn reset(h: *hash::hash) void = { + let h = h: *state; + h.h[0] = init0; + h.h[1] = init1; + h.h[2] = init2; + h.h[3] = init3; + h.h[4] = init4; + h.h[5] = init5; + h.h[6] = init6; + h.h[7] = init7; + h.nx = 0; + h.ln = 0; +}; + +fn write(st: *io::stream, buf: const []u8) (size | io::error) = { + let h = st: *state; + let b: []u8 = buf; + let n = len(b); + h.ln += n; + if (h.nx > 0) { + let n = if (len(b) > len(h.x) - h.nx) { + len(h.x) - h.nx; + } else len(b); + h.x[h.nx..] = b[..n]; + h.nx += n; + if (h.nx == chunk) { + block(h, h.x[..]); + h.nx = 0; + }; + b = b[n..]; + }; + if (len(b) >= chunk) { + let n = len(b) & ~(chunk - 1); + block(h, b[..n]); + b = b[n..]; + }; + if (len(b) > 0) { + let n = len(b); + h.x[..n] = b[..]; + h.nx = n; + }; + return n; +}; + +fn close(st: *io::stream) void = { + free(st); +}; + +fn sum(h: *hash::hash) []u8 = { + let h = h: *state; + let copy = *h; + let h = &copy; + + // Add padding + let ln = h.ln; + let tmp: [64]u8 = [0...]; + tmp[0] = 0x80; + const n = if ((ln % 64z) < 56z) 56z - ln % 64z + else 64z + 56z - ln % 64z; + write(&h.hash.stream, tmp[..n]); + + ln <<= 3; + endian::beputu64(tmp, ln: u64); + write(&h.hash.stream, tmp[..8]); + + assert(h.nx == 0); + + let digest: [SIZE]u8 = [0...]; + endian::beputu32(digest[0..], h.h[0]); + endian::beputu32(digest[4..], h.h[1]); + endian::beputu32(digest[8..], h.h[2]); + endian::beputu32(digest[12..], h.h[3]); + endian::beputu32(digest[16..], h.h[4]); + endian::beputu32(digest[20..], h.h[5]); + endian::beputu32(digest[24..], h.h[6]); + endian::beputu32(digest[28..], h.h[7]); + + let slice: []u8 = alloc([], SIZE); + append(slice, ...digest); + return slice; +}; + +// TODO: Rewrite me in assembly +fn block(h: *state, buf: []u8) void = { + let w: [64]u32 = [0...]; + let h0 = h.h[0], h1 = h.h[1], h2 = h.h[2], h3 = h.h[3], + h4 = h.h[4], h5 = h.h[5], h6 = h.h[6], h7 = h.h[7]; + for (len(buf) >= chunk) { + for (let i = 0; i < 16; i += 1) { + let j = i * 4; + w[i] = buf[j]: u32 << 24 + | buf[j+1]: u32 << 16 + | buf[j+2]: u32 << 8 + | buf[j+3]: u32; + }; + + for (let i = 16; i < 64; i += 1) { + let v1 = w[i - 2]; + let t1 = (math::rotr32(v1, 17)) + ^ (math::rotr32(v1, 19)) + ^ (v1 >> 10); + let v2 = w[i - 15]; + let t2 = (math::rotr32(v2, 7)) + ^ (math::rotr32(v2, 18)) + ^ (v2 >> 3); + w[i] = t1 + w[i - 7] + t2 + w[i - 16]; + }; + + let a = h0, b = h1, c = h2, d = h3, + e = h4, f = h5, g = h6, h = h7; + for (let i = 0; i < 64; i += 1) { + let t1 = h + ((math::rotr32(e, 6)) + ^ (math::rotr32(e, 11)) + ^ (math::rotr32(e, 25))) + + ((e & f) ^ (~e & g)) + k[i] + w[i]; + + let t2 = ((math::rotr32(a, 2)) + ^ (math::rotr32(a, 13)) + ^ (math::rotr32(a, 22))) + + ((a & b) ^ (a & c) ^ (b & c)); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + }; + + h0 += a; + h1 += b; + h2 += c; + h3 += d; + h4 += e; + h5 += f; + h6 += g; + h7 += h; + + buf = buf[chunk..]; + }; + + h.h[0] = h0; + h.h[1] = h1; + h.h[2] = h2; + h.h[3] = h3; + h.h[4] = h4; + h.h[5] = h5; + h.h[6] = h6; + h.h[7] = h7; +}; diff --git a/dirs/xdg.ha b/dirs/xdg.ha @@ -0,0 +1,58 @@ +use fs; +use os; +use path; +use io; + +fn lookup(prog: str, var: str, default: str) str = { + match (os::getenv(var)) { + s: str => { + let path = path::join(s, prog); + match (os::stat(path)) { + err: fs::error => { + os::mkdirs(path) as void; + return path; + }, + st: fs::filestat => { + if (fs::is_dir(st.mode)) { + return path; + }; + }, + }; + }, + void => void, + }; + + let home = match (os::getenv("HOME")) { + s: str => s, + void => abort("$HOME unset"), // TODO: Try reading /etc/passwd + }; + + let path = path::join(home, default, prog); + os::mkdirs(path) as void; + return path; +}; + +// Returns a directory suitable for storing config files. If 'prog' is given, a +// unique path for this program to store data will be returned. +export fn config(prog: str) str = lookup(prog, "XDG_CONFIG_HOME", ".config"); + +// Returns an [fs::fs] for storing config files. If 'prog' is given, a unique +// path for this program to store data will be returned. +export fn config_fs(prog: str) *fs::fs = os::diropen(config(prog)) as *fs::fs; + +// Returns a directory suitable for cache files. If 'prog' is given, a unique +// path for this program to store data will be returned. +export fn cache(prog: str) str = lookup(prog, "XDG_CACHE_HOME", ".cache"); + +// Returns an [fs::fs] for cache files. If 'prog' is given, a unique path for +// this program to store data will be returned. +export fn cache_fs(prog: str) *fs::fs = os::diropen(cache(prog)) as *fs::fs; + +// Returns a directory suitable for persistent data files. If 'prog' is given, a +// unique path for this program to store data will be returned. +export fn data(prog: str) str = + lookup(prog, "XDG_DATA_HOME", path::join(".local", "share")); + +// Returns an [fs::fs] for persistent data files. If 'prog' is given, a unique +// path for this program to store data will be returned. +export fn data_fs(prog: str) *fs::fs = os::diropen(data(prog)) as *fs::fs; diff --git a/encoding/hex/hex.ha b/encoding/hex/hex.ha @@ -0,0 +1,25 @@ +use io; +use strconv; +use strings; +use strio; + +// Encodes a byte slice to hex and writes it to a string. The caller must free +// this string. +export fn encode(b: []u8) str = { + let buf = strio::dynamic(); + for (let i = 0z; i < len(b); i += 1) { + let s = strconv::u8tosb(b[i], strconv::base::HEX_LOWER); + if (len(s) == 1) { + io::write(buf, ['0': u32: u8]); + }; + io::write(buf, strings::to_utf8(s)) as size; + }; + return strio::finish(buf); +}; + +@test fn encode() void = { + let in: [_]u8 = [0xCA, 0xFE, 0xBA, 0xBE, 0xDE, 0xAD, 0xF0, 0x0D]; + let s = encode(in); + defer free(s); + assert(s == "cafebabedeadf00d"); +}; diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha @@ -0,0 +1,151 @@ +use types; + +fn to_utf8(in: str) []u8 = *(&in: *[]u8); + +// The state for the UTF-8 decoder. +export type decoder = struct { + offs: size, + src: []u8, +}; + +// Initializes a new UTF-8 decoder. +export fn decode(src: (str | []u8)) decoder = match (src) { + s: str => decoder { src = to_utf8(s), ... }, + b: []u8 => decoder { src = b, ... }, +}; + +// Indicates that more data is needed, i.e. that a partial UTF-8 sequence was +// encountered. +export type more = void; + +// An error indicating that an invalid UTF-8 sequence was found. +export type invalid = void!; + +// Returns the next rune from a decoder. If the slice ends with a complete UTF-8 +// sequence, void is returned. If an incomplete sequence is encountered, more is +// returned. And if an invalid sequence is encountered, invalid is returned. +export fn next(d: *decoder) (rune | void | more | invalid) = { + assert(d.offs <= len(d.src)); + if (d.offs == len(d.src)) { + return; + }; + + // XXX: It would be faster if we decoded and measured at the same time. + const n = utf8sz(d.src[d.offs]); + if (n == types::SIZE_MAX) { + return invalid; + } else if (d.offs + n > len(d.src)) { + return more; + }; + let bytes = d.src[d.offs..d.offs+n]; + d.offs += n; + + let r = 0u32; + if (bytes[0] < 128) { + // ASCII + return bytes[0]: u32: rune; + }; + + const mask = masks[n - 1]; + r = bytes[0] & mask; + for (let i = 1z; i < len(bytes); i += 1) { + r <<= 6; + r |= bytes[i] & 0x3F; + }; + return r: rune; +}; + +// Returns the previous rune from a decoder. If the slice starts with a complete UTF-8 +// sequence, void is returned. If an incomplete sequence is encountered, more is +// returned. And if an invalid sequence is encountered, invalid is returned. +export fn prev(d: *decoder) (rune | void | more | invalid) = { + if (d.offs == 0) { + return; + }; + + let n = 0z; + let r = 0u32; + + for (let i = 0z; i < d.offs; i += 1) { + if ((d.src[d.offs - i - 1] & 0xC0) == 0x80) { + let tmp: u32 = d.src[d.offs - i - 1] & 0x3F; + r |= tmp << (i * 6): u32; + } else { + n = i + 1; + let tmp: u32 = d.src[d.offs - i - 1] & masks[i]; + r |= tmp << (i * 6): u32; + break; + }; + }; + if (n == 0) { + return more; + }; + d.offs -= n; + if (n != utf8sz(d.src[d.offs])) { + return invalid; + }; + return r: rune; +}; + +@test fn decode() void = { + const input: [_]u8 = [ + 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, + 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF, 0x00, + ]; + const expected = ['こ', 'ん', 'に', 'ち', 'は', '\0']; + let decoder = decode(input); + for (let i = 0z; i < len(expected); i += 1) { + match (next(&decoder)) { + (invalid | more | void) => abort(), + r: rune => assert(r == expected[i]), + }; + }; + assert(next(&decoder) is void); + assert(decoder.offs == len(decoder.src)); + for (let i = 0z; i < len(expected); i += 1) { + match (prev(&decoder)) { + (invalid | more | void) => abort(), + r: rune => assert(r == expected[len(expected) - i - 1]), + }; + }; + assert(prev(&decoder) is void); + + // TODO: Test more invalid sequences + const invalid: [_]u8 = [0xA0, 0xA1]; + decoder = decode(invalid); + assert(next(&decoder) is invalid); + decoder.offs = 2; + assert(prev(&decoder) is more); + + const incomplete: [_]u8 = [0xE3, 0x81]; + decoder = decode(incomplete); + assert(next(&decoder) is more); + decoder.offs = 2; + assert(prev(&decoder) is invalid); +}; + +// Returns true if a given string or byte slice contains only valid UTF-8 +// sequences. Note that Hare strings (str) are always valid UTF-8 - if this +// returns false for a str type, something funny is going on. +export fn valid(src: (str | []u8)) bool = { + let decoder = decode(src); + for (true) { + match (next(&decoder)) { + void => return true, + invalid => return false, + more => return false, + rune => void, + }; + }; + abort(); +}; + +// Returns the expected length of a UTF-8 character in bytes. +export fn utf8sz(c: u8) size = { + for (let i = 0z; i < len(sizes); i += 1) { + if (c & sizes[i].mask == sizes[i].result) { + return sizes[i].octets; + }; + }; + return types::SIZE_MAX; +}; diff --git a/encoding/utf8/encode.ha b/encoding/utf8/encode.ha @@ -0,0 +1,41 @@ +// Encodes a rune as UTF-8 and returns the result as a slice. The result is +// statically allocated; duplicate it if you aren't using it right away. +export fn encode_rune(r: rune) []u8 = { + let ch = r: u32, n = 0z, first = 0u8; + if (ch < 0x80) { + first = 0; + n = 1; + } else if (ch < 0x800) { + first = 0xC0; + n = 2; + } else if (ch < 0x10000) { + first = 0xE0; + n = 3; + } else { + first = 0xF0; + n = 4; + }; + + static let buf: [6]u8 = [0...]; + for (let i = n - 1; i > 0; i -= 1) { + buf[i] = ch: u8 & 0x3F | 0x80; + ch >>= 6; + }; + buf[0] = ch: u8 | first; + return buf[..n]; +}; + +@test fn encode() void = { + const expected: [_][]u8 = [ + [0], + [0x25], + [0xE3, 0x81, 0x93], + ]; + const inputs = ['\0', '%', 'こ']; + for (let i = 0z; i < len(inputs); i += 1) { + const out = encode_rune(inputs[i]); + for (let j = 0z; j < len(expected[i]); j += 1) { + assert(out[j] == expected[i][j]); + }; + }; +}; diff --git a/encoding/utf8/rune.ha b/encoding/utf8/rune.ha @@ -0,0 +1,27 @@ +use types; + +const masks: [_]u8 = [0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01]; + +type rsize = struct { + mask: u8, + result: u8, + octets: size, +}; + +const sizes: [_]rsize = [ + rsize { mask = 0x80, result = 0x00, octets = 1 }, + rsize { mask = 0xE0, result = 0xC0, octets = 2 }, + rsize { mask = 0xF0, result = 0xE0, octets = 3 }, + rsize { mask = 0xF8, result = 0xF0, octets = 4 }, + rsize { mask = 0xFC, result = 0xF8, octets = 5 }, + rsize { mask = 0xFE, result = 0xF8, octets = 6 }, +]; + +// Returns the size of a rune, in octets, when encoded as UTF-8. +export fn runesz(r: rune) size = { + const ch = r: u32; + return if (ch < 0x80) 1 + else if (ch < 0x800) 2 + else if (ch < 0x10000) 3 + else 4; +}; diff --git a/endian/big.ha b/endian/big.ha @@ -0,0 +1,31 @@ +// Writes a u32 into a buffer in big-endian order. +export fn beputu32(buf: []u8, in: u32) void = { + buf[3] = (in): u8; + buf[2] = (in >> 8): u8; + buf[1] = (in >> 16): u8; + buf[0] = (in >> 24): u8; +}; + +// Writes a u64 into a buffer in big-endian order. +export fn beputu64(buf: []u8, in: u64) void = { + buf[7] = (in >> 0): u8; + buf[6] = (in >> 8): u8; + buf[5] = (in >> 16): u8; + buf[4] = (in >> 24): u8; + buf[3] = (in >> 32): u8; + buf[2] = (in >> 40): u8; + buf[1] = (in >> 48): u8; + buf[0] = (in >> 56): u8; +}; + +@test fn big() void = { + let buf: [8]u8 = [0...]; + beputu32(buf, 0x12345678); + assert(buf[0] == 0x12 && buf[1] == 0x34 + && buf[2] == 0x56 && buf[3] == 0x78); + beputu64(buf, 0x1234567887654321); + assert(buf[0] == 0x12 && buf[1] == 0x34 + && buf[2] == 0x56 && buf[3] == 0x78 + && buf[4] == 0x87 && buf[5] == 0x65 + && buf[6] == 0x43 && buf[7] == 0x21); +}; diff --git a/endian/endian.ha b/endian/endian.ha @@ -0,0 +1,17 @@ +// The set of functions used for endian-aware encoding. +export type endian = struct { + putu32: *fn(buf: []u8, in: u32) void, + putu64: *fn(buf: []u8, in: u64) void, +}; + +// Big endian; MSB first. +export const big: endian = endian { + putu32 = &beputu32, + putu64 = &beputu64, +}; + +// Little endian; LSB first. +export const little: endian = endian { + putu32 = &leputu32, + putu64 = &leputu64, +}; diff --git a/endian/host+aarch64.ha b/endian/host+aarch64.ha @@ -0,0 +1,2 @@ +// The [endian] functions which map to the host architecture. +export const host: *endian = &little; diff --git a/endian/host+x86_64.ha b/endian/host+x86_64.ha @@ -0,0 +1,2 @@ +// The [endian] functions which map to the host architecture. +export const host: *endian = &little; diff --git a/endian/little.ha b/endian/little.ha @@ -0,0 +1,31 @@ +// Writes a u32 into a buffer in big-endian order. +export fn leputu32(buf: []u8, in: u32) void = { + buf[0] = (in): u8; + buf[1] = (in >> 8): u8; + buf[2] = (in >> 16): u8; + buf[3] = (in >> 24): u8; +}; + +// Writes a u64 into a buffer in big-endian order. +export fn leputu64(buf: []u8, in: u64) void = { + buf[0] = (in >> 0): u8; + buf[1] = (in >> 8): u8; + buf[2] = (in >> 16): u8; + buf[3] = (in >> 24): u8; + buf[4] = (in >> 32): u8; + buf[5] = (in >> 40): u8; + buf[6] = (in >> 48): u8; + buf[7] = (in >> 56): u8; +}; + +@test fn little() void = { + let buf: [8]u8 = [0...]; + leputu32(buf, 0x12345678); + assert(buf[0] == 0x78 && buf[1] == 0x56 + && buf[2] == 0x34 && buf[3] == 0x12); + leputu64(buf, 0x1234567887654321); + assert(buf[0] == 0x21 && buf[1] == 0x43 + && buf[2] == 0x65 && buf[3] == 0x87 + && buf[4] == 0x78 && buf[5] == 0x56 + && buf[6] == 0x34 && buf[7] == 0x12); +}; diff --git a/fmt/fmt.ha b/fmt/fmt.ha @@ -0,0 +1,432 @@ +// A format string consists of a string of literal characters, to be printed +// verbatim, and format sequences, which describe how to format arguments from +// a set of variadic parameters for printing. +// +// A format sequence is enclosed in curly braces '{}'. An empty sequence takes +// the next argument from the parameter list, in order. A specific parameter may +// be selected by indexing it from zero: '{0}', '{1}', and so on. To print '{', +// use '{{', and for '}', use '}}'. +// +// You may use a colon to add format modifiers; for example, '{:x}' will format +// an argument in hexadecimal, and '{3:-10}' will left-align the 3rd argument to +// at least 10 characters. +// +// The format modifiers takes the form of an optional flag character: +// +// 0: Numeric values are zero-padded up to the required width. +// -: The value shall be left-aligned, and spaces inserted on the right to meet +// the required width. '-' takes precedence over '0' if both are used. +// : (a space) insert a space before positive numbers, where '-' would be if it +// were negative. +// +: insert a '+' before positive numbers, where '-' would be if it were +// negative. '+' takes precedence over ' ' if both are used. +// +// Following the flag, an optional decimal number shall specify the minimum +// width of this field. If '0' or '-' were not given, the default behavior shall +// be to pad with spaces to achieve the necessary width. +// +// Following the width, an optional precision may be given as a decimal number +// following a '.' character. For integer types, this gives the minimum number +// of digits to include. For floating types, this gives the number of digits +// following the radix to include. +// +// Following the precision, an optional character controls the output format: +// +// x, X: print in lowercase or uppercase hexadecimal +// o, b: print in octal or binary +// +// TODO: Expand this with more format modifiers +use ascii; +use bufio; +use encoding::utf8; +use io; +use os; +use strconv; +use strings; +use types; + +// Tagged union of all types which are formattable. +export type formattable = + (...types::numeric | uintptr | str | rune | bool | nullable *void); + +// Formats text for printing and writes it to [os::stdout]. +export fn printf(fmt: str, args: formattable...) (io::error | size) = + fprintf(os::stdout, fmt, args...); + +// Formats text for printing and writes it to [os::stdout], followed by a line +// feed. +export fn printfln(fmt: str, args: formattable...) (io::error | size) = + fprintfln(os::stdout, fmt, args...); + +// Formats text for printing and writes it to [os::stderr]. +export fn errorf(fmt: str, args: formattable...) (io::error | size) = + fprintf(os::stderr, fmt, args...); + +// Formats text for printing and writes it to [os::stderr], followed by a line +// feed. +export fn errorfln(fmt: str, args: formattable...) (io::error | size) = + fprintfln(os::stderr, fmt, args...); + +// Formats text for printing and writes it into a heap-allocated string. The +// caller must free the return value. +export fn asprintf(fmt: str, args: formattable...) str = { + let buf = bufio::dynamic(io::mode::WRITE); + assert(fprintf(buf, fmt, args...) is size); + return strings::from_utf8_unsafe(bufio::finish(buf)); +}; + +// Formats text for printing and writes it into a caller supplied buffer. The +// returned string is borrowed from this buffer. +export fn bsprintf(buf: []u8, fmt: str, args: formattable...) str = { + let sink = bufio::fixed(buf, io::mode::WRITE); + let l = fprintf(sink, fmt, args...) as size; + return strings::from_utf8_unsafe(buf[..l]); +}; + +// Formats text for printing and writes it to [os::stderr], followed by a line +// feed, then exits the program with an error status. +export @noreturn fn fatal(fmt: str, args: formattable...) void = { + fprintfln(os::stderr, fmt, args...); + os::exit(1); +}; + +// Formats text for printing and writes it to an [io::stream], followed by a +// line feed. +export fn fprintfln( + s: *io::stream, + fmt: str, + args: formattable... +) (io::error | size) = { + return fprintf(s, fmt, args...)? + io::write(s, ['\n': u32: u8])?; +}; + +// Formats values for printing using the default format modifiers and writes +// them to [os::stdout] separated by spaces +export fn print(args: formattable...) (io::error | size) = + fprint(os::stdout, args...); + +// Formats values for printing using the default format modifiers and writes +// them to [os::stdout] separated by spaces and followed by a line feed +export fn println(args: formattable...) (io::error | size) = + fprintln(os::stdout, args...); + +// Formats values for printing using the default format modifiers and writes +// them to [os::stderr] separated by spaces +export fn error(args: formattable...) (io::error | size) = + fprint(os::stderr, args...); + +// Formats values for printing using the default format modifiers and writes +// them to [os::stderr] separated by spaces and followed by a line feed +export fn errorln(args: formattable...) (io::error | size) = + fprintln(os::stderr, args...); + +// Formats values for printing using the default format modifiers and writes +// them into a heap-allocated string separated by spaces. The caller must free +// the return value. +export fn asprint(args: formattable...) str = { + let buf = bufio::dynamic(io::mode::WRITE); + assert(fprint(buf, args...) is size); + return strings::from_utf8_unsafe(bufio::finish(buf)); +}; + +// Formats values for printing using the default format modifiers and writes +// them into a caller supplied buffer separated by spaces. The returned string +// is borrowed from this buffer. +export fn bsprint(buf: []u8, args: formattable...) str = { + let sink = bufio::fixed(buf, io::mode::WRITE); + assert(fprint(sink, args...) is size); + return strings::from_utf8_unsafe(buf); +}; + +// Formats values for printing using the default format modifiers and writes +// them to an [io::stream] separated by spaces and followed by a line feed +export fn fprintln(s: *io::stream, args: formattable...) (io::error | size) = { + return fprint(s, args...)? + io::write(s, ['\n': u32: u8])?; +}; + +// Formats values for printing using the default format modifiers and writes +// them to an [io::stream] separated by spaces +export fn fprint(s: *io::stream, args: formattable...) (io::error | size) = { + let mod = modifiers { base = strconv::base::DEC, ... }; + let n = 0z; + for (let i = 0z; i < len(args); i += 1) { + n += format(s, args[i], &mod)?; + if (i != len(args) - 1) { + n += io::write(s, [' ': u32: u8])?; + }; + }; + return n; +}; + +type negation = enum { + NONE, + SPACE, + PLUS, +}; + +type padding = enum { + ALIGN_RIGHT, + ALIGN_LEFT, + ZEROES, +}; + +type modifiers = struct { + padding: padding, + negation: negation, + width: uint, + precision: uint, + base: strconv::base, +}; + +type modflags = enum uint { + NONE = 0, + ZERO = 1 << 0, + MINUS = 1 << 1, + SPACE = 1 << 2, + PLUS = 1 << 3, +}; + +// Formats text for printing and writes it to an [io::stream]. +export fn fprintf( + s: *io::stream, + fmt: str, + args: formattable... +) (io::error | size) = { + let n = 0z, i = 0z; + let iter = strings::iter(fmt); + for (true) { + let r: rune = match (strings::next(&iter)) { + void => break, + r: rune => r, + }; + + if (r == '{') { + r = match (strings::next(&iter)) { + void => abort("Invalid format string (unterminated '{')"), + r: rune => r, + }; + + const arg = if (r == '{') { + n += io::write(s, utf8::encode_rune('{'))?; + continue; + } else if (ascii::isdigit(r)) { + strings::push(&iter, r); + args[scan_uint(&iter)]; + } else { + strings::push(&iter, r); + i += 1; + args[i - 1]; + }; + + let mod = modifiers { base = strconv::base::DEC, ... }; + r = match (strings::next(&iter)) { + void => abort("Invalid format string (unterminated '{')"), + r: rune => r, + }; + switch (r) { + ':' => scan_modifiers(&iter, &mod), + '}' => void, + * => abort("Invalid format string"), + }; + + n += format(s, arg, &mod)?; + } else if (r == '}') { + match (strings::next(&iter)) { + void => abort("Invalid format string (hanging '}')"), + r: rune => assert(r == '}', "Invalid format string (hanging '}')"), + }; + + n += io::write(s, utf8::encode_rune('}'))?; + } else { + n += io::write(s, utf8::encode_rune(r))?; + }; + }; + + return n; +}; + +fn format(out: *io::stream, arg: formattable, mod: *modifiers) (size | io::error) = { + let z = format_raw(io::empty, arg, mod)?; + + let pad: []u8 = []; + if (z < mod.width: size) { + pad = utf8::encode_rune(switch (mod.padding) { + padding::ZEROES => '0', + * => ' ', + }); + }; + + if (mod.padding == padding::ALIGN_LEFT) { + format_raw(out, arg, mod); + }; + + for (z < mod.width: size) { + z += io::write(out, pad)?; + }; + + if (mod.padding != padding::ALIGN_LEFT) { + format_raw(out, arg, mod); + }; + + return z; +}; + +fn format_raw( + out: *io::stream, + arg: formattable, + mod: *modifiers, +) (size | io::error) = match (arg) { + s: str => io::write(out, strings::to_utf8(s)), + r: rune => io::write(out, utf8::encode_rune(r)), + b: bool => io::write(out, strings::to_utf8(if (b) "true" else "false")), + n: types::numeric => { + let s = strconv::numerictosb(n, mod.base); + io::write(out, strings::to_utf8(s)); + }, + p: uintptr => { + let s = strconv::uptrtosb(p, mod.base); + io::write(out, strings::to_utf8(s)); + }, + v: nullable *void => match (v) { + v: *void => { + let s = strconv::uptrtosb(v: uintptr, + strconv::base::HEX_LOWER); + let n = io::write(out, strings::to_utf8("0x"))?; + n += io::write(out, strings::to_utf8(s))?; + n; + }, + null => format(out, "(null)", mod), + }, +}; + + +fn scan_uint(iter: *strings::iterator) uint = { + let num: []u8 = []; + defer free(num); + for (true) { + let r = match (strings::next(iter)) { + void => abort("Invalid format string (unterminated '{')"), + r: rune => r, + }; + + if (ascii::isdigit(r)) { + append(num, r: u32: u8); + } else { + strings::push(iter, r); + match (strconv::stou(strings::from_utf8(num))) { + (strconv::invalid | strconv::overflow) => + abort("Invalid format string (invalid index)"), + u: uint => return u, + }; + }; + }; + abort("unreachable"); +}; + +fn scan_modifier_flags(iter: *strings::iterator, mod: *modifiers) void = { + let flags = modflags::NONE; + + for (true) { + let r = match (strings::next(iter)) { + void => abort("Invalid format string (unterminated '{')"), + r: rune => r, + }; + + switch (r) { + '0' => flags |= modflags::ZERO, + '-' => flags |= modflags::MINUS, + ' ' => flags |= modflags::SPACE, + '+' => flags |= modflags::PLUS, + * => { + strings::push(iter, r); + break; + }, + }; + }; + + mod.padding = if (flags & modflags::MINUS != 0) + padding::ALIGN_LEFT + else if (flags & modflags::ZERO != 0) + padding::ZEROES + else + padding::ALIGN_RIGHT; + + mod.negation = if (flags & modflags::PLUS != 0) + negation::PLUS + else if (flags & modflags::SPACE != 0) + negation::SPACE + else + negation::NONE; +}; + +fn scan_modifier_width(iter: *strings::iterator, mod: *modifiers) void = { + let r = match (strings::next(iter)) { + void => abort("Invalid format string (unterminated '{')"), + r: rune => r, + }; + + let is_digit = ascii::isdigit(r); + strings::push(iter, r); + + if (is_digit) { + mod.width = scan_uint(iter); + }; +}; + +fn scan_modifier_precision(iter: *strings::iterator, mod: *modifiers) void = { + let r = match (strings::next(iter)) { + void => abort("Invalid format string (unterminated '{')"), + r: rune => r, + }; + + if (r == '.') { + mod.precision = scan_uint(iter); + } else { + strings::push(iter, r); + }; +}; + +fn scan_modifier_base(iter: *strings::iterator, mod: *modifiers) void = { + let r = match (strings::next(iter)) { + void => abort("Invalid format string (unterminated '{')"), + r: rune => r, + }; + + switch (r) { + 'x' => mod.base = strconv::base::HEX_LOWER, + 'X' => mod.base = strconv::base::HEX_UPPER, + 'o' => mod.base = strconv::base::OCT, + 'b' => mod.base = strconv::base::BIN, + * => strings::push(iter, r), + }; +}; + +fn scan_modifiers(iter: *strings::iterator, mod: *modifiers) void = { + scan_modifier_flags(iter, mod); + scan_modifier_width(iter, mod); + scan_modifier_precision(iter, mod); + scan_modifier_base(iter, mod); + + // eat '}' + let terminated = match (strings::next(iter)) { + void => false, + r: rune => r == '}', + }; + assert(terminated, "Invalid format string (unterminated '{')"); +}; + +@test fn fmt() void = { + let buf: [1024]u8 = [0...]; + assert(bsprintf(buf, "hello world") == "hello world"); + assert(bsprintf(buf, "{} {}", "hello", "world") == "hello world"); + assert(bsprintf(buf, "{0} {1}", "hello", "world") == "hello world"); + assert(bsprintf(buf, "{0} {0}", "hello", "world") == "hello hello"); + assert(bsprintf(buf, "{1} {0} {1}", "hello", "world") == "world hello world"); + assert(bsprintf(buf, "x: {:08x}", 0xBEEF) == "x: 0000beef"); + assert(bsprintf(buf, "x: {:8X}", 0xBEEF) == "x: BEEF"); + assert(bsprintf(buf, "x: {:-8X}", 0xBEEF) == "x: BEEF "); + assert(bsprintf(buf, "x: {:o}", 0o755) == "x: 755"); + assert(bsprintf(buf, "x: {:b}", 0b11011) == "x: 11011"); + assert(bsprintf(buf, "{} {} {} {}", true, false, null, 'x') + == "true false (null) x"); +}; diff --git a/format/elf/types.ha b/format/elf/types.ha @@ -0,0 +1,446 @@ +// An implementation of the ELF64 file format. Best accompanied with a reading +// of the ELF-64 Object Format (Version 1.5). +// +// TODO: +// - Flesh out ELF32 structures +export def MAGIC: str = "\x7FELF"; +export def EI_MAG0: uint = 0; +export def EI_MAG1: uint = 1; +export def EI_MAG2: uint = 2; +export def EI_MAG3: uint = 3; +export def EI_CLASS: uint = 4; +export def EI_DATA: uint = 5; +export def EI_VERSION: uint = 6; +export def EI_OSABI: uint = 7; +export def EI_ABIVERSION: uint = 8; +export def EI_PAD: uint = 9; +export def EI_NIDENT: uint = 16; +export def EV_CURRENT: u32 = 1; + +// ELF header for ELF64 +export type header64 = struct { + // ELF identification + e_ident: [EI_NIDENT]u8, + // Object file type + e_type: elf_type, + // Machine type + e_machine: u16, + // Object file version ([EV_CURRENT]) + e_version: u32, + // Entry point address + e_entry: u64, + // Program header offset + e_phoff: u64, + // Section header offset + e_shoff: u64, + // Processor-specific flags + e_flags: u32, + // ELF header size + e_ehsize: u16, + // Size of program header entry + e_phentsize: u16, + // Number of program header entries + e_phnum: u16, + // Size of section header entry + e_shentsize: u16, + // Number of section header entries + e_shnum: u16, + // Section name string table index, or [shn::UNDEF] + e_shstrndx: u16, +}; + +// Section header for ELF64 +export type section64 = struct { + // Section name + sh_name: u32, + // Section type + sh_type: u32, + // Section attributes + sh_flags: u64, + // Virtual address in memory + sh_addr: u64, + // Offset in file + sh_offset: u64, + // Size of section + sh_size: u64, + // Link to other section + sh_link: u32, + // Miscellaenous information + sh_info: u32, + // Address alignment boundary + sh_addralign: u64, + // Size of entries, if section has table + sh_entsize: u64, +}; + +// ELF file class +export type ident_class = enum u8 { + // 32-bit objects + ELF32 = 1, + // 64-bit objects + ELF64 = 2, +}; + +// Byte ordering +export type ident_data = enum u8 { + // Object file data structures are little-endian + LSB = 1, + // Object file data structures are big-endian + MSB = 2, +}; + +// Application binary interface +export type ident_abi = enum u8 { + // System-V ABI + SYSV = 0, + // HP-UX operating system + HPUX = 1, + // Standalone (embedded) application + STANDALONE = 255, +}; + +// ELF file type +export type elf_type = enum u16 { + // No file type + NONE = 0, + // Relocatable object file + REL = 1, + // Executable file + EXEC = 2, + // Shared object file + DYN = 3, + // Core file + CORE = 4, + // Environment-specific use + LOOS = 0xFE00, + // Environment-specific use + HIOS = 0xFEFF, + // Processor-specific use + LOPROC = 0xFF00, + // Processor-specific use + HIPROC = 0xFFFF, +}; + +// Special section indicies +export type shn = enum u16 { + // Used to mark an undefined or meaningless section reference + UNDEF = 0, + // Processor-specific use + LOPROC = 0xFF00, + // Processor-specific use + HIPROC = 0xFF1F, + // Environment-specific-use + LOOS = 0xFF20, + // Environment-specific-use + HIOS = 0xFF3F, + // Indicates that the corresponding reference is an absolute value + ABS = 0xFFF1, + // Indicates a symbol that has been declared as a common block + COMMON = 0xFFF2, +}; + +// Section type +export type sht = enum u32 { + // Marks an unused section header + NULL = 0, + // Contains information defined by the program + PROGBITS = 1, + // Contains a linker symbol table + SYMTAB = 2, + // Contains a string table + STRTAB = 3, + // Contains "Rela" type relocation entries + RELA = 4, + // Contains a symbol hash table + HASH = 5, + // Contains dynamic linking tables + DYNAMIC = 6, + // Contains note information + NOTE = 7, + // Contains uninitialized space; does not occupy any space in the file + NOBITS = 8, + // Contains "Rel" type relocation entries + REL = 9, + // Reserved + SHLIB = 10, + // Contains a dynamic loader symbol table + DYNSYM = 11, + // Environment-specific use + LOOS = 0x60000000, + // Environment-specific use + HIOS = 0x6FFFFFFF, + // Processor-specific use + LOPROC = 0x7000000, + // Processor-specific use + HIPROC = 0x7FFFFFF, +}; + +// Section flags +export type shf = enum u32 { + // Section contains writable data + WRITE = 0x1, + // Section is allocated in memory image of program + ALLOC = 0x2, + // Section contains executable instructions + EXECINSTR = 0x4, + // Environment-specific use + MASKOS = 0x0F000000, + // Processor-specific use + MASKPROC = 0xF0000000, +}; + +// Symbol table entry +export type sym64 = struct { + // Symbol name offset + st_name: u32, + // Type and binding attributes + st_info: u8, + // Reserved + st_other: u8, + // Section table index + st_shndx: u16, + // Symbol value + st_value: u64, + // Size of object + st_size: u64, +}; + +// Symbol bindings +export type stb = enum u8 { + // Not visible outside the object file + LOCAL = 0, + // Global symbol, visible to all object files + GLOBAL = 1, + // Global scope, but with lower precedence than global symbols + WEAK = 2, + // Environment-specific use + LOOS = 10, + // Environment-specific use + HIOS = 12, + // Processor-specific use + LOPROC = 13, + // Processor-specific use + HIPROC = 15, +}; + +// Obtains the binding part of [sym64.st_info]. +// +// Equivalent to the ELF64_ST_BIND macro. +export fn st_bind(i: u8) stb = (i >> 4): stb; + +// Symbol types +export type stt = enum u8 { + // No type specified (e.g. an absolute symbol) + NOTYPE = 0, + // Data object + OBJECT = 1, + // Function entry point + FUNC = 2, + // Symbol is associated with a section + SECTION = 3, + // Source file associated with the object + FILE = 4, + // Environment-specific use + LOOS = 10, + // Environment-specific use + HIOS = 12, + // Processor-specific use + LOPROC = 13, + // Processor-specific use + HIPROC = 15, +}; + +// Obtains the type part of [sym64.st_info]. +// +// Equivalent to the ELF64_ST_TYPE macro. +export fn st_type(i: u8) stt = (i & 0xF): stt; + +// Converts symbol bindings and type into [sym64.st_info]. +// +// Equivalent to the ELF64_ST_INFO macro. +export fn st_info(b: stb, t: stt) u8 = b: u8 << 4 + t: u8 & 0xF; + +// Relocation entry +export type rel64 = struct { + // Address of reference + r_offset: u64, + // Symbol table index and type of relocation + r_info: u64, +}; + +// Relocation entry with explicit addend +export type rela64 = struct { + // Address of reference + r_offset: u64, + // Symbol table index and type of relocation + r_info: u64, + // Constant part of expression + r_addend: i64, +}; + +// Obtains the symbol table index part of [rel64.r_info]. +// +// Equivalent to the ELF64_R_SYM macro. +export fn r64_sym(info: u64) u64 = info >> 32; + +// Obtains the relocation type part of [rel64.r_info]. +// +// Equivalent to the ELF64_R_TYPE macro. +export fn r64_type(info: u64) u64 = info & 0xFFFFFFFF; + +// Converts symbol table index and a relocation type into [rel64.r_info]. +// +// Equivalent to the ELF64_R_INFO macro. +export fn r64_info(sym: u64, stype: u64) u64 = sym << 32 | stype & 0xFFFFFFFF; + +// Program header table entry (segment) +export type phdr64 = struct { + // Type of segment + p_type: pt, + // Segment attributes + p_flags: u32, + // Offset in file + p_offset: u64, + // Virtual address in memory + p_vaddr: u64, + // Reserved + p_paddr: u64, + // Size of segment in file + p_filesz: u64, + // Size of segment in memory + p_memsz: u64, + // Alignment of segment + p_align: u64, +}; + +// Segment types +export type pt = enum u32 { + // Unused entry + NULL = 0, + // Loadable segment + LOAD = 1, + // Dynamic linking tables + DYNAMIC = 2, + // Program interpreter path name + INTERP = 3, + // Note sections + NOTE = 4, + // Reserved + SHLIB = 5, + // Program header table + PHDR = 6, + // Environment-specific use + LOOS = 0x60000000, + // Environment-specific use + HIOS = 0x6FFFFFFF, + // Processor-specific use + LOPROC = 0x70000000, + // Processor-specific use + HIPROC = 0x7FFFFFFF, +}; + +// Segment attributes +export type pf = enum u32 { + // Execute permission + X = 0x1, + // Write permission + W = 0x2, + // Read permission + R = 0x4, + // Reserved for environment-specific use + MASKOS = 0x00FF0000, + // Reserved for processor-specific use + MASKPROC = 0xFF000000, +}; + +// Dynamic table entry +export type dyn64 = struct { + // The type of this entry + d_tag: dt, + // Additional data associated with this entry. The value which is valid + // is selected based on the entry type. + union { + d_val: u64, + d_ptr: u64, + }, +}; + +// Dynamic table entry type +export type dt = enum i64 { + // Marks the end of the dynamic array. + NULL = 0, + // The string table offset of the name of a needed library. + NEEDED = 1, + // Total size, in bytes, of the relocation entries associated with the + // procedure linkage table. + PLTRELSZ = 2, + // Contains an address associated with the linkage table. The specific + // meaning of this field is processor-dependent. + PLTGOT = 3, + // Address of the symbol hash table. + HASH = 4, + // Address of the dynamic string table. + STRTAB = 5, + // Address of the dynamic symbol table. + SYMTAB = 6, + // Address of a relocation table with rela64 entries. + RELA = 7, + // Total size, in bytes, of the RELA relocation table. + RELASZ = 8, + // Size, in bytes, of each RELA relocation entry. + REALENT = 9, + // Total size, in bytes, of the string table. + STRSZ = 10, + // Size, in bytes, of each symbol table entry. + SYMENT = 11, + // Address of the initialization function. + INIT = 12, + // Address of the termination function. + FINI = 13, + // The string table offset of the name of this shared object. + SONAME = 14, + // The string table offset of a shared library search path string. + RPATH = 15, + // The presence of this dynamic table entry modifies the symbol + // resolution algorithm for references within the library. Symbols + // defined within the library are used to resolve references before the + // dynamic linker searches the usual search path. + SYMBOLIC = 16, + // Address of a relocation table with rel64 entries. + REL = 17, + // Total size, in bytes, of the REL relocation table. + RELSZ = 18, + // Size, in bytes, of each REL relocation entry. + RELENT = 19, + // Type of relocation entry used for the procedure linkage table. The + // d_val member contains either [dt::REL] or [dt::RELA]. + PLTREL = 20, + // Reserved for debugger use. + DEBUG = 21, + // The presence of this dynamic table entry signals that the relocation + // table contains relocations for a non-writable segment. + TEXTREL = 22, + // Address of the relocations associated with the procedure linkage + // table. + JMPREL = 23, + // The presence of this dynamic table entry signals that the dynamic + // loader should process all relocations for this object before + // transferring control to the program. + BIND_NOW = 24, + // Pointer to an array of initialiation functions. + INIT_ARRAY = 25, + // Pointer to an array of termination functions. + FINI_ARRAY = 26, + // Size, in bytes, of the array of initialization functions. + INIT_ARRAYSZ = 27, + // Size, in bytes, of the array of termination functions. + FINI_ARRAYSZ = 28, + // Reserved for environment-specific use. + LOOS = 0x60000000, + // Reserved for environment-specific use. + HIOS = 0x6FFFFFFF, + // Reserved for processor-specific use. + LOPROC = 0x70000000, + // Reserved for processor-specific use. + HIPROC = 0x7FFFFFFF, +}; diff --git a/fs/fs.ha b/fs/fs.ha @@ -0,0 +1,159 @@ +use io; +use path; + +// Closes a filesystem. The fs cannot be used after this function is called. +export fn close(fs: *fs) void = { + match (fs.close) { + null => void, + f: *closefunc => f(fs), + }; +}; + +// Opens a file. If no flags are provided, the default read/write mode is +// RDONLY. +export fn open(fs: *fs, path: str, flags: flags...) (*io::stream | error) = { + return match (fs.open) { + null => io::unsupported, + f: *openfunc => f(fs, path, flags...), + }; +}; + +// Creates a new file and opens it for writing. If no flags are provided, the +// default read/write mode is WRONLY. +// +// Only the permission bits of the mode are used. If other bits are set, they +// are discarded. +export fn create( + fs: *fs, + path: str, + mode: mode, + flags: flags... +) (*io::stream | error) = { + mode = mode & 0o777; + return match (fs.create) { + null => io::unsupported, + f: *createfunc => f(fs, path, mode, flags...), + }; +}; + +// Removes a file. +export fn remove(fs: *fs, path: str) (void | error) = { + return match (fs.remove) { + null => io::unsupported, + f: *removefunc => f(fs, path), + }; +}; + +// Returns an iterator for a path, which yields the contents of a directory. +// Pass empty string to yield from the root. The order in which entries are +// returned is undefined. +export fn iter(fs: *fs, path: str) (*iterator | error) = { + return match (fs.iter) { + null => io::unsupported, + f: *iterfunc => f(fs, path), + }; +}; + +// Obtains information about a file or directory. If the target is a symlink, +// information is returned about the link, not its target. +export fn stat(fs: *fs, path: str) (filestat | error) = { + return match (fs.stat) { + null => io::unsupported, + f: *statfunc => f(fs, path), + }; +}; + +// Opens a new filesystem for a subdirectory. The subdirectory must be closed +// separately from the parent filesystem, and its lifetime can outlive that of +// its parent. +export fn subdir(fs: *fs, path: str) (*fs | error) = { + return match (fs.subdir) { + null => io::unsupported, + f: *subdirfunc => f(fs, path), + }; +}; + +// Creates a directory. +export fn mkdir(fs: *fs, path: str) (void | error) = { + return match (fs.mkdir) { + null => io::unsupported, + f: *mkdirfunc => f(fs, path), + }; +}; + +// Makes a directory, and all non-extant directories in its path. +export fn mkdirs(fs: *fs, path: str) (void | error) = { + let parent = path::dirname(path); + if (path != parent) { + match (mkdirs(fs, parent)) { + exists => void, + err: error => return err, + void => void, + }; + }; + return mkdir(fs, path); +}; + +// Removes a directory. The target directory must be empty; see [rmdirall] to +// remove its contents as well. +export fn rmdir(fs: *fs, path: str) (void | error) = { + return match (fs.rmdir) { + null => io::unsupported, + f: *rmdirfunc => f(fs, path), + }; +}; + +// Removes a directory, and anything in it. +export fn rmdirall(fs: *fs, path: str) (void | error) = { + let it = iter(fs, path)?; + for (true) match (next(it)) { + ent: dirent => { + if (ent.name == "." || ent.name == "..") { + continue; + }; + switch (ent.ftype) { + mode::DIR => { + let p = path::join(path, ent.name); + defer free(p); + rmdirall(fs, p)?; + }, + * => { + let p = path::join(path, ent.name); + defer free(p); + remove(fs, p)?; + }, + }; + }, + void => break, + }; + return rmdir(fs, path); +}; + +// Creates a directory and returns a subdir for it. Some filesystems support +// doing this operation atomically, but if not, a fallback is used. +export fn mksubdir(fs: *fs, path: str) (*fs | error) = { + return match (fs.mksubdir) { + null => { + mkdir(fs, path)?; + subdir(fs, path); + }, + f: *mksubdirfunc => f(fs, path), + }; +}; + +// Resolves a path to its absolute, normalized value. This consoldates ./ and +// ../ sequences, roots the path, and returns a new path. The caller must free +// the return value. +export fn resolve(fs: *fs, path: str) str = { + match (fs.resolve) { + f: *resolvefunc => return f(fs, path), + null => void, + }; + abort(); // TODO +}; + +// Returns the next directory entry from an interator, or void if none remain. +// It is a programming error to call this again after it has returned void. The +// file stat returned may only have the type bits set on the file mode; callers +// should call [fs::stat] to obtain the detailed file mode. +export fn next(iter: *iterator) (dirent | void) = iter.next(iter); diff --git a/fs/types.ha b/fs/types.ha @@ -0,0 +1,227 @@ +use io; +use strings; +use path; + +// An entry was requested which does not exist. +export type noentry = void!; + +// An attempt was made to create a file or directory which already exists. +export type exists = void!; + +// The user does not have permission to use this resource. +export type noaccess = void!; + +// An entry of a particular type was sought, but is something else in practice. +// For example, opening a file with [iter]. +export type wrongtype = void!; + +// All possible fs error types. +export type error = (noentry | noaccess | exists | wrongtype | io::error)!; + +// File mode information. These bits do not necessarily reflect the underlying +// operating system's mode representation, though they were chosen to be +// consistent with typical Unix file permissions. All implementations shall +// support at least USER_RW, DIR, and REG. +export type mode = enum uint { + // Read, write, and execute permissions for the file owner + USER_RWX = 0o700, + // Read and write permissions for the file owner + USER_RW = 0o600, + // Read and execute permissions for the file owner + USER_RX = 0o500, + // Read permissions for the file owner + USER_R = 0o400, + // Write permissions for the file owner + USER_W = 0o200, + // Execute permissions for the file owner + USER_X = 0o100, + + // Read, write, and execute permissions for group members + GROUP_RWX = 0o070, + // Read and write permissions for group members + GROUP_RW = 0o060, + // Read and execute permissions for group members + GROUP_RX = 0o050, + // Read permissions for group members + GROUP_R = 0o040, + // Write permissions for group members + GROUP_W = 0o020, + // Execute permissions for group members + GROUP_X = 0o010, + + // Read, write, and execute permissions for other users + OTHER_RWX = 0o007, + // Read and write permissions for other users + OTHER_RW = 0o006, + // Read and execute permissions for other users + OTHER_RX = 0o005, + // Read permissions for other users + OTHER_R = 0o004, + // Write permissions for other users + OTHER_W = 0o002, + // Execute permissions for other users + OTHER_X = 0o001, + + // Entry has the set-uid bit set + SETUID = 0o4000, + // Entry has the set-gid bit set + SETGID = 0o2000, + // Entry has the sticky bit set + STICKY = 0o1000, + + // Entry is of an unknown type + UNKNOWN = 0, + // Entry is a FIFO (named pipe) + FIFO = 0o010000, + // Entry is a directory + DIR = 0o040000, + // Entry is a character device + CHR = 0o020000, + // Entry is a block device + BLK = 0o060000, + // Entry is a regular file + REG = 0o100000, + // Entry is a symbolic link + LINK = 0o120000, + // Entry is a Unix socket + SOCK = 0o140000, +}; + +// A mask defining what items are populated in the stat structure. +export type stat_mask = enum uint { + UID = 1 << 0, + GID = 1 << 1, + SIZE = 1 << 2, + INODE = 1 << 3, +}; + +// Information about a file or directory. The mask field defines what other +// fields are set; mode is always set. +export type filestat = struct { + mask: stat_mask, + mode: mode, + uid: uint, + gid: uint, + sz: size, + inode: u64, + // TODO: atime et al +}; + +// An entry in a directory. This may be borrowed from the filesystem's internal +// state; if you want to keep this around beyond one call to [next], use +// [dirent_dup]. +export type dirent = struct { + // The name of this entry. Not fully qualified: for example, + // "foo/bar/baz.txt" would store "baz.txt" here. + name: str, + + // The type of this entry. The permission bits may be unset. + ftype: mode, +}; + +// Duplicates a [dirent] object. Call [dirent_free] to get rid of it later. +export fn dirent_dup(e: *dirent) dirent = { + let new = *e; + new.name = strings::dup(e.name); + return new; +}; + +// Frees a [dirent] object which was duplicated with [dirent_dup]. +export fn dirent_free(e: *dirent) void = free(e.name); + +// Flags to use for opening a file. Not all operating systems support all flags; +// at a minimum, RDONLY, WRONLY, RDWR, and CREATE will be supported. +export type flags = enum int { + RDONLY = 0, + WRONLY = 1, + RDWR = 2, + CREATE = 0o100, + EXCL = 0o200, + NOCTTY = 0o400, + TRUNC = 0o1000, + APPEND = 0o2000, + NONBLOCK = 0o4000, + DSYNC = 0o10000, + SYNC = 0o4010000, + RSYNC = 0o4010000, + DIRECTORY = 0o200000, + NOFOLLOW = 0o400000, + CLOEXEC = 0o2000000, + TMPFILE = 0o20200000, +}; + +export type closefunc = fn(fs: *fs) void; +export type removefunc = fn(fs: *fs, path: str) (void | error); +export type iterfunc = fn(fs: *fs, path: str) (*iterator | error); +export type statfunc = fn(fs: *fs, path: str) (filestat | error); +export type subdirfunc = fn(fs: *fs, path: str) (*fs | error); +export type mkdirfunc = fn(fs: *fs, path: str) (void | error); +export type rmdirfunc = fn(fs: *fs, path: str) (void | error); +export type mksubdirfunc = fn(fs: *fs, path: str) (*fs | error); +export type resolvefunc = fn(fs: *fs, path: str) str; + +export type openfunc = fn( + fs: *fs, + path: str, + flags: flags... +) (*io::stream | error); + +export type createfunc = fn( + fs: *fs, + path: str, + mode: mode, + flags: flags... +) (*io::stream | error); + +// An abstract implementation of a filesystem. To create a custom stream, embed +// this type as the first member of a struct with user-specific data and fill +// out these fields as appropriate. +export type fs = struct { + // Frees resources associated with this filesystem. + close: nullable *closefunc, + + // Opens a file. + open: nullable *openfunc, + + // Creates a new file. + create: nullable *createfunc, + + // Removes a file. + remove: nullable *removefunc, + + // Returns an iterator for a path, which yields the contents of a + // directory. Pass empty string to yield from the root. + // + // The iterator must return all entries without error. If an error would + // occur, it should be identified here and returned upfront. + iter: nullable *iterfunc, + + // Obtains information about a file or directory. If the target is a + // symlink, information is returned about the link, not its target. + stat: nullable *statfunc, + + // Opens a new filesystem for a subdirectory. + subdir: nullable *subdirfunc, + + // Creates a directory. + mkdir: nullable *mkdirfunc, + + // Removes a directory. The target directory must be empty. + rmdir: nullable *rmdirfunc, + + // Creates a directory and returns a subdir for it. + mksubdir: nullable *mksubdirfunc, + + // Resolves a path to its absolute, normalized value. If the fs + // implementation does not provide this, [resolve] presumes that + // relative paths are rooted (i.e. "foo" == "/foo"). + resolve: nullable *resolvefunc, +}; + +export type nextfunc = fn(iter: *iterator) (dirent | void); + +export type iterator = struct { + // Returns the next member of the directory, or void if there are none + // remaining. + next: *nextfunc, +}; diff --git a/fs/util.ha b/fs/util.ha @@ -0,0 +1,98 @@ +use io; +use path; +use strings; + +// Returns a human-friendly representation of an error. +export fn errstr(err: error) const str = match (err) { + noentry => "File or directory not found", + noaccess => "Permission denied", + err: io::error => io::errstr(err), +}; + +// Converts a mode into a Unix-like mode string (e.g. "-rw-r--r--"). The string +// is statically allocated, use [strings::dup] to duplicate it or it will be +// overwritten on subsequent calls. +export fn mode_str(m: mode) const str = { + static let buf: [10]u8 = [0...]; + buf = [ + (if (m & mode::DIR == mode::DIR) 'd' + else if (m & mode::FIFO == mode::FIFO) 'p' + else if (m & mode::SOCK == mode::SOCK) 's' + else if (m & mode::BLK == mode::BLK) 'b' + else if (m & mode::LINK == mode::LINK) 'l' + else if (m & mode::CHR == mode::CHR) 'c' + else '-'): u32: u8, + (if (m & mode::USER_R == mode::USER_R) 'r' else '-'): u32: u8, + (if (m & mode::USER_W == mode::USER_W) 'w' else '-'): u32: u8, + (if (m & mode::SETUID == mode::SETUID) 's' + else if (m & mode::USER_X == mode::USER_X) 'x' + else '-'): u32: u8, + (if (m & mode::GROUP_R == mode::GROUP_R) 'r' else '-'): u32: u8, + (if (m & mode::GROUP_W == mode::GROUP_W) 'w' else '-'): u32: u8, + (if (m & mode::SETGID == mode::SETGID) 's' + else if (m & mode::GROUP_X == mode::GROUP_X) 'x' + else '-'): u32: u8, + (if (m & mode::OTHER_R == mode::OTHER_R) 'r' else '-'): u32: u8, + (if (m & mode::OTHER_W == mode::OTHER_W) 'w' else '-'): u32: u8, + (if (m & mode::STICKY == mode::STICKY) 't' + else if (m & mode::OTHER_X == mode::OTHER_X) 'x' + else '-'): u32: u8, + ]; + return strings::from_utf8(buf); +}; + +@test fn mode_str() void = { + assert(mode_str(0o777: mode) == "-rwxrwxrwx"); + assert(mode_str(mode::DIR | 0o755: mode) == "drwxr-xr-x"); + assert(mode_str(0o755: mode | mode::SETUID) == "-rwsr-xr-x"); + assert(mode_str(0o644: mode) == "-rw-r--r--"); + assert(mode_str(0: mode) == "----------"); +}; + +// Returns the permission bits of a file mode. +export fn mode_perm(m: mode) mode = (m: uint & 0o777u): mode; + +// Returns the type bits of a file mode. +export fn mode_type(m: mode) mode = (m: uint & ~0o777u): mode; + +// Returns true if this item is a regular file. +export fn is_file(mode: mode) bool = mode & mode::REG == mode::REG; + +// Returns true if this item is a FIFO (named pipe). +export fn is_fifo(mode: mode) bool = mode & mode::FIFO == mode::FIFO; + +// Returns true if this item is a directory. +export fn is_dir(mode: mode) bool = mode & mode::DIR == mode::DIR; + +// Returns true if this item is a character device. +export fn is_chdev(mode: mode) bool = mode & mode::CHR == mode::CHR; + +// Returns true if this item is a block device. +export fn is_blockdev(mode: mode) bool = mode & mode::BLK == mode::BLK; + +// Returns true if this item is a symbolic link. +export fn is_link(mode: mode) bool = mode & mode::LINK == mode::LINK; + +// Returns true if this item is a Unix socket. +export fn is_socket(mode: mode) bool = mode & mode::SOCK == mode::SOCK; + +// Reads all entries from a directory. The caller must free the return value +// with [dirents_free]. +export fn readdir(fs: *fs, path: str) ([]dirent | error) = { + let i = iter(fs, path)?; + let ents: []dirent = []; + for (true) { + match (next(i)) { + d: dirent => append(ents, dirent_dup(&d)), + void => break, + }; + }; + return ents; +}; + +// Frees a slice of [dirent]s. +export fn dirents_free(d: []dirent) void = { + for (let i = 0z; i < len(d); i += 1) { + dirent_free(&d[i]); + }; +}; diff --git a/getopt/getopts.ha b/getopt/getopts.ha @@ -0,0 +1,307 @@ +// getopt provides an interface for parsing command line arguments and +// automatically generates a brief help message explaining the command usage. +// See [parse] for the main entry point. +// +// The help text is brief and should serve only as a reminder. It is recommended +// that your command line program be accompanied by a man page to provide +// detailed usage information. +use encoding::utf8; +use fmt; +use io; +use os; +use strings; + +// A flag which does not take a parameter, e.g. "-a". +export type flag = rune; + +// An option with an included parameter, e.g. "-a foo". +export type parameter = str; + +// A command line option. +export type option = (flag, parameter); + +// The result of parsing the set of command line arguments, including any +// options specified and the list of non-option arguments. +export type command = struct { + opts: []option, + args: []str, +}; + +// Help text providing a short, one-line summary of the command; or providing +// the name of an argument. +export type cmd_help = str; + +// Help text for a flag, formatted as "-a: help text". +export type flag_help = (flag, str); + +// Help text for a parameter, formatted as "-a param: help text" where "param" +// is the first string and "help text" is the second string. +export type parameter_help = (flag, str, str); + +// Help text for a command or option. +// +// cmd_help, flag_help, and parameter_help compose such that the help output for +// +// [ +// "foo bars in order", +// ('a', "a help text"), +// ('b', "b help text"), +// ('c', "cflag", "c help text"), +// ('d', "dflag", "d help text"), +// "files...", +// ] +// +// is: +// +// foo: foo bars in order +// +// Usage: foo [-ab] [-c <cflag>] [-d <dflag>] files... +// +// -a: a help text +// -b: b help text +// -c <cflag>: c help text +// -d <dflag>: d help text +export type help = (cmd_help | flag_help | parameter_help); + +// Parses command line arguments and returns a tuple of the options specified, +// and the remaining arguments. If an error occurs, details are printed to +// [os::stderr] and [os::exit] is called with a nonzero exit status. The +// argument list must include the command name as the first item; [os::args] +// fulfills this criteria. +// +// The caller provides [help] arguments to specify which command line flags and +// parameters are supported, and to provide some brief help text which describes +// their use. Provide [flag_help] to add a flag which does not take a parameter, +// and [parameter_help] to add a flag with a required parameter. The first +// [cmd_help] is used as a short, one-line summary of the command's purpose, and +// any later [cmd_help] arguments are used to provide the name of any arguments +// which follow the options list. +// +// By convention, the caller should sort the list of options, first providing +// all flags, then all parameters, alpha-sorted within each group by the flag +// rune. +// +// // Usage for sed +// let cmd = getopt::parse(os::args +// "stream editor", +// ('E', "use extended regular expressions"), +// ('s', "treat files as separate, rather than one continuous stream"), +// ('i', "edit files in place"), +// ('z', "separate lines by NUL characeters"), +// ('e', "script", "execute commands from script"), +// ('f', "file", "execute commands from a file"), +// "files...", +// ); +// defer getopts::finish(&cmd); +// +// for (let i = 0z; i < len(cmd.opts); i += 1) { +// let opt = cmd.opts[i]; +// switch (opt.0) { +// 'E' => extended = true, +// 's' => continuous = false, +// // ... +// 'e' => script = opt.1, +// 'f' => file = opt.1, +// }; +// }; +// +// for (let i = 0z; i < len(cmd.args); i += 1) { +// let arg = cmd.args[i]; +// // ... +// }; +// +// If "-h" is not among the options defined by the caller, the "-h" option will +// will cause a summary of the command usage to be printed to stderr, and +// [os::exit] will be called with a successful exit status. +export fn parse(args: []str, help: help...) command = { + let opts: []option = []; + let i = 1z; + :arg for (i < len(args); i += 1) { + const arg = args[i]; + if (len(arg) == 0 || arg == "-" + || !strings::has_prefix(arg, "-")) { + break; + }; + if (arg == "--") { + i += 1; + break; + }; + + let d = utf8::decode(arg); + assert(utf8::next(&d) as rune == '-'); + let next = utf8::next(&d); + :flag for (next is rune; next = utf8::next(&d)) { + const r = next as rune; + :help for (let j = 0z; j < len(help); j += 1) { + let p: parameter_help = match (help[j]) { + cmd_help => continue :help, + f: flag_help => if (r == f.0) { + append(opts, (r, "")); + continue :flag; + } else continue :help, + p: parameter_help => if (r == p.0) p + else continue :help, + }; + if (len(d.src) == d.offs) { + if (i + 1 >= len(args)) { + errmsg(args[0], "option requires an argument: ", + r, help); + os::exit(1); + }; + i += 1; + append(opts, (r, args[i])); + } else { + let s = strings::from_utf8(d.src[d.offs..]); + append(opts, (r, s)); + }; + continue :arg; + }; + if (r =='h') { + print_help(os::stderr, args[0], help); + os::exit(0); + }; + errmsg(args[0], "unrecognized option: ", r, help); + os::exit(1); + }; + match (next) { + rune => abort(), // Unreachable + void => void, + (utf8::more | utf8::invalid) => { + errmsg(args[9], "invalid UTF-8 in arguments", + void, help); + os::exit(1); + }, + }; + }; + return command { + opts = opts, + args = args[i..], + }; +}; + +// Frees resources associated with the return value of [parse]. +export fn finish(cmd: *command) void = { + if (cmd == null) return; + free(cmd.opts); +}; + +fn _print_usage(s: *io::stream, name: str, indent: bool, help: []help) size = { + let z = fmt::fprint(s, "Usage:", name) as size; + + let started_flags = false; + for (let i = 0z; i < len(help); i += 1) if (help[i] is flag_help) { + if (!started_flags) { + z += fmt::fprint(s, " [-") as size; + started_flags = true; + }; + const help = help[i] as flag_help; + z += fmt::fprint(s, help.0: rune) as size; + }; + if (started_flags) { + z += fmt::fprint(s, "]") as size; + }; + + for (let i = 0z; i < len(help); i += 1) if (help[i] is parameter_help) { + const help = help[i] as parameter_help; + if (indent) { + z += fmt::fprintf(s, "\n\t") as size; + }; + z += fmt::fprintf(s, " [-{} <{}>]", help.0: rune, help.1) as size; + }; + if (indent) { + z += fmt::fprintf(s, "\n\t") as size; + }; + for (let i = 1z; i < len(help); i += 1) if (help[i] is cmd_help) { + z += fmt::fprintf(s, " {}", help[i] as cmd_help: str) as size; + }; + + return z + fmt::fprint(s, "\n") as size; +}; + +// Prints command usage to the provided stream. +export fn print_usage(s: *io::stream, name: str, help: []help) void = { + let z = _print_usage(io::empty, name, false, help); + _print_usage(s, name, if (z > 72) true else false, help); +}; + +// Prints command help to the provided stream. +export fn print_help(s: *io::stream, name: str, help: []help) void = { + if (help[0] is cmd_help) { + fmt::fprintfln(s, "{}: {}\n", name, help[0] as cmd_help: str); + }; + + print_usage(s, name, help); + + for (let i = 0z; i < len(help); i += 1) match (help[i]) { + cmd_help => void, + (flag_help | parameter_help) => { + // Only print this if there are flags to show + fmt::fprint(s, "\n"); + break; + }, + }; + + for (let i = 0z; i < len(help); i += 1) match (help[i]) { + cmd_help => void, + f: flag_help => { + fmt::fprintfln(s, "-{}: {}", f.0: rune, f.1); + }, + p: parameter_help => { + fmt::fprintfln(s, "-{} <{}>: {}", p.0: rune, p.1, p.2); + }, + }; +}; + +fn errmsg(name: str, err: str, opt: (rune | void), help: []help) void = { + fmt::errorfln("{}: {}{}", name, err, match (opt) { + r: rune => r, + void => "", + }); + print_usage(os::stderr, name, help); +}; + +@test fn parse() void = { + let args: []str = ["cat", "-v", "a.out"]; + let cat = parse(args, + "concatenate files", + ('v', "cause Rob Pike to make a USENIX presentation"), + "files...", + ); + defer finish(&cat); + assert(len(cat.args) == 1 && cat.args[0] == "a.out"); + assert(len(cat.opts) == 1 && cat.opts[0].0 == 'v' && cat.opts[0].1 == ""); + + args = ["ls", "-Fahs", "--", "-j"]; + let ls = parse(args, + "list files", + ('F', "Do some stuff"), + ('h', "Do some other stuff"), + ('s', "Do a third type of stuff"), + ('a', "Do a fourth type of stuff"), + "files...", + ); + defer finish(&ls); + assert(len(ls.args) == 1 && ls.args[0] == "-j"); + assert(len(ls.opts) == 4); + assert(ls.opts[0].0 == 'F' && ls.opts[0].1 == ""); + assert(ls.opts[1].0 == 'a' && ls.opts[1].1 == ""); + assert(ls.opts[2].0 == 'h' && ls.opts[2].1 == ""); + assert(ls.opts[3].0 == 's' && ls.opts[3].1 == ""); + + args = ["sed", "-e", "s/C++//g", "-f/tmp/turing.sed", "-"]; + let sed = parse(args, + "edit streams", + ('e', "script", "Add the editing commands specified by the " + "script option to the end of the script of editing " + "commands"), + ('f', "script_file", "Add the editing commands in the file " + "script_file to the end of the script of editing " + "commands"), + "files...", + ); + defer finish(&sed); + assert(len(sed.args) == 1 && sed.args[0] == "-"); + assert(len(sed.opts) == 2); + assert(sed.opts[0].0 == 'e' && sed.opts[0].1 == "s/C++//g"); + assert(sed.opts[1].0 == 'f' && sed.opts[1].1 == "/tmp/turing.sed"); +}; diff --git a/hare/ast/types.ha b/hare/ast/types.ha @@ -0,0 +1,74 @@ +// Identifies a single object, e.g. foo::bar::baz. +export type ident = []str; + +// Maximum length of an identifier, as the sum of the lengths of its parts plus +// one for each namespace deliniation. +// +// In other words, the length of "a::b::c" is 5. +export def IDENT_MAX: size = 255; + +// Frees resources associated with an identifier. +export fn ident_free(ident: ident) void = { + for (let i = 0z; i < len(ident); i += 1) { + free(ident[i]); + }; + free(ident); +}; + +// Returns true if two idents are identical. +export fn ident_eq(a: ident, b: ident) bool = { + if (len(a) != len(b)) { + return false; + }; + for (let i = 0z; i < len(a); i += 1) { + if (a[i] != b[i]) { + return false; + }; + }; + return true; +}; + +// A sub-unit, typically representing a single source file. +export type subunit = struct { + imports: []import, + declarations: []declaration, +}; + +// use module; +export type import_module = ident; + +// use alias = module; +export type import_alias = struct { + ident: ident, + alias: str, +}; + +// use module::{foo, bar, baz}; +export type import_objects = struct { + ident: ident, + objects: []str, +}; + +// An imported module +export type import = (import_module | import_alias | import_objects); + +// Frees resources associated with an import. +export fn import_free(import: import) void = { + match (import) { + m: import_module => ident_free(m: ident), + a: import_alias => { + ident_free(a.ident); + free(a.alias); + }, + o: import_objects => { + ident_free(o.ident); + for (let i = 0z; i < len(o.objects); i += 1) { + free(o.objects[i]); + }; + free(o.objects); + }, + }; +}; + +// TODO +export type declaration = void; diff --git a/hare/ast/unparse.ha b/hare/ast/unparse.ha @@ -0,0 +1,21 @@ +use fmt; +use io; +use strio; + +// Unparses an identifier. +export fn ident_unparse(out: *io::stream, ident: ident) (size | io::error) = { + let n = 0z; + for (let i = 0z; i < len(ident); i += 1) { + n += fmt::fprintf(out, "{}{}", ident[i], + if (i + 1 < len(ident)) "::" + else "")?; + }; + return n; +}; + +// Unparses an identifier into a string. The caller must free the return value. +export fn ident_unparse_s(ident: ident) str = { + let buf = strio::dynamic(); + ident_unparse(buf, ident); + return strio::finish(buf); +}; diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha @@ -0,0 +1,262 @@ +use bufio; +use fmt; +use io; +use io::{mode}; +use strings; + +@test fn unget() void = { + let buf = bufio::fixed(strings::to_utf8("z"), mode::READ); + let lexer = init(buf, "<test>"); + unget(&lexer, 'x'); + unget(&lexer, 'y'); + assert(next(&lexer) as rune == 'y'); + assert(next(&lexer) as rune == 'x'); + assert(next(&lexer) as rune == 'z'); + assert(next(&lexer) is io::EOF); + unget(&lexer, io::EOF); + assert(next(&lexer) is io::EOF); +}; + +@test fn unlex() void = { + let lexer = init(io::empty, "<test>"); + unlex(&lexer, (btoken::IF, location { + path = "<test>", + line = 1234, + col = 1234, + })); + let t = lex(&lexer) as (token, location); + assert(t.0 is btoken); + assert(t.0 as btoken == btoken::IF); + assert(t.1.path == "<test>"); + assert(t.1.line == 1234 && t.1.col == 1234); +}; + +fn litassert(expected: literal, actual: literal) void = match (expected) { + e: u8 => assert(actual as u8 == e), + e: u16 => assert(actual as u16 == e), + e: u32 => assert(actual as u32 == e), + e: u64 => assert(actual as u64 == e), + e: uint => assert(actual as uint == e), + e: uintptr => assert(actual as uintptr == e), + e: i8 => assert(actual as i8 == e), + e: i16 => assert(actual as i16 == e), + e: i32 => assert(actual as i32 == e), + e: i64 => assert(actual as i64 == e), + e: int => assert(actual as int == e), + e: iconst => assert(actual as iconst == e), + e: f32 => assert(actual as f32 == e), + e: f64 => assert(actual as f64 == e), + e: fconst => assert(actual as fconst == e), + e: rune => assert(actual as rune == e), + e: str => assert(actual as str == e), +}; + +fn lextest(in: str, expected: [](uint, uint, token)) void = { + let buf = bufio::fixed(strings::to_utf8(in), mode::READ); + let lexer = init(buf, "<test>"); + for (let i = 0z; i < len(expected); i += 1) { + let eline = expected[i].0, ecol = expected[i].1, + etok = expected[i].2; + let tl = match (lex(&lexer)) { + tl: (token, location) => tl, + io::EOF => { + fmt::errorfln("unexpected EOF at {}", i); + abort(); + }, + err: error => { + fmt::errorfln("{}: {}", i, errstr(err)); + abort(); + }, + }; + let tok = tl.0, loc = tl.1; + match (tok) { + b: btoken => if (!(etok is btoken) || etok as btoken != b) { + fmt::errorfln("bad token at {}: got {}, wanted {}", + i, tokstr(tok), tokstr(etok)); + abort(); + }, + n: name => if (!(etok is name) || etok as name != n) { + fmt::errorfln("bad token at {}: got {}, wanted {}", + i, tokstr(tok), tokstr(etok)); + abort(); + }, + l: literal => if (!(etok is literal)) { + fmt::errorfln("bad token at {}: got {}, wanted {}", + i, tokstr(tok), tokstr(etok)); + abort(); + } else { + litassert(l, etok as literal); + }, + * => abort("TODO"), + }; + assert(loc.path == "<test>"); + if (loc.line != eline || loc.col != ecol) { + fmt::errorfln("bad line/col at {}: got {},{}; wanted {},{}", + i, loc.line, loc.col, eline, ecol); + abort(); + }; + }; + assert(lex(&lexer) is io::EOF); +}; + +@test fn lex1() void = { + const in = "~,{[(}]);"; + const expected: [_](uint, uint, token) = [ + (1, 1, btoken::BNOT), + (1, 2, btoken::COMMA), + (1, 3, btoken::LBRACE), + (1, 4, btoken::LBRACKET), + (1, 5, btoken::LPAREN), + (1, 6, btoken::RBRACE), + (1, 7, btoken::RBRACKET), + (1, 8, btoken::RPAREN), + (1, 9, btoken::SEMICOLON), + ]; + lextest(in, expected); +}; + +@test fn lex2() void = { + // Ends with = to test =, EOF + const in = "^ ^^ ^= * *= % %= + += - -= : :: & && &= | || |= = == / /= ="; + const expected: [_](uint, uint, token) = [ + (1, 1, btoken::BXOR), + (1, 3, btoken::LXOR), + (1, 6, btoken::BXOREQ), + (1, 9, btoken::TIMES), + (1, 11, btoken::TIMESEQ), + (1, 14, btoken::MODULO), + (1, 16, btoken::MODEQ), + (1, 19, btoken::PLUS), + (1, 21, btoken::PLUSEQ), + (1, 24, btoken::MINUS), + (1, 26, btoken::MINUSEQ), + (1, 29, btoken::COLON), + (1, 31, btoken::DOUBLE_COLON), + (1, 34, btoken::BAND), + (1, 36, btoken::LAND), + (1, 39, btoken::ANDEQ), + (1, 42, btoken::BOR), + (1, 44, btoken::LOR), + (1, 47, btoken::OREQ), + (1, 50, btoken::EQUAL), + (1, 52, btoken::LEQUAL), + (1, 55, btoken::DIV), + (1, 57, btoken::DIVEQ), + (1, 60, btoken::EQUAL), + ]; + lextest(in, expected); +}; + +@test fn lex3() void = { + const in = ". .. ... < << <= <<= > >> >= >>= >>"; + const expected: [_](uint, uint, token) = [ + (1, 1, btoken::DOT), + (1, 3, btoken::SLICE), + (1, 6, btoken::ELLIPSIS), + (1, 10, btoken::LESS), + (1, 12, btoken::LSHIFT), + (1, 15, btoken::LESSEQ), + (1, 18, btoken::LSHIFTEQ), + (1, 22, btoken::GREATER), + (1, 24, btoken::RSHIFT), + (1, 27, btoken::GREATEREQ), + (1, 30, btoken::RSHIFTEQ), + (1, 34, btoken::RSHIFT), + ]; + lextest(in, expected); +}; + +@test fn lexname() void = { + const in = "hello world return void foobar"; + const expected: [_](uint, uint, token) = [ + (1, 1, "hello": name), + (1, 7, "world": name), + (1, 13, btoken::RETURN), + (1, 20, btoken::VOID), + (1, 25, "foobar": name), + ]; + lextest(in, expected); +}; + +@test fn keywords() void = { + let keywords = bmap[..btoken::LAST_KEYWORD+1]; + for (let i = 0z; i < len(keywords); i += 1) { + let lexer = init(bufio::fixed( + strings::to_utf8(keywords[i]), mode::READ), + "<test>"); + let tl = match (lex(&lexer)) { + tl: (token, location) => tl, + * => abort(), + }; + let tok = tl.0; + assert(tok is btoken); + assert(tok as btoken == i: btoken); + }; +}; + +@test fn comments() void = { + const in = "hello world // foo\nbar"; + const expected: [_](uint, uint, token) = [ + (1, 1, "hello": name), + (1, 7, "world": name), + (2, 1, "bar": name), + ]; + lextest(in, expected); +}; + +@test fn runes() void = { + const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' " + "'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U12345678'"; + const expected: [_](uint, uint, token) = [ + (1, 1, 'a'), + (1, 5, 'b'), + (1, 9, '\a'), + (1, 14, '\b'), + (1, 19, '\f'), + (1, 24, '\n'), + (1, 29, '\r'), + (1, 34, '\t'), + (1, 39, '\v'), + (1, 44, '\0'), + (1, 49, '\\'), + (1, 54, '\''), + (1, 59, '\x0A'), + (1, 66, '\u1234'), + (1, 75, '\U12345678'), + ]; + lextest(in, expected); +}; + +@test fn strings() void = { + const in = "\"a\" \"b\" \"\\a\" \"\\b\" \"\\f\" \"\\n\" \"\\r\" " + "\"\\t\" \"\\v\" \"\\0\" \"\\\\\" \"\\\'\""; + const expected: [_](uint, uint, token) = [ + (1, 1, "a": literal), + (1, 5, "b": literal), + (1, 9, "\a": literal), + (1, 14, "\b": literal), + (1, 19, "\f": literal), + (1, 24, "\n": literal), + (1, 29, "\r": literal), + (1, 34, "\t": literal), + (1, 39, "\v": literal), + (1, 44, "\0": literal), + (1, 49, "\\": literal), + (1, 54, "\'": literal), + ]; + // TODO: test \x and \u and \U + lextest(in, expected); + const in = "\"ab\\a\\b\\f\\n\\r\\t\\v\\0\\\\\\'\""; + const expected: [_](uint, uint, token) = [ + (1, 1, "ab\a\b\f\n\r\t\v\0\\\'": literal), + ]; + lextest(in, expected); + const in = "\"hello world\" \"こんにちは\" \"return\" \"foo\""; + const expected: [_](uint, uint, token) = [ + (1, 1, "hello world": literal), + (1, 15, "こんにちは": literal), + (1, 23, "return": literal), + (1, 32, "foo": literal), + ]; + lextest(in, expected); +}; diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha @@ -0,0 +1,518 @@ +// hare::lex provides a lexer for Hare source code. +use ascii; +use encoding::utf8; +use fmt; +use io; +use sort; +use strconv; +use strings; + +// State associated with a lexer. +export type lexer = struct { + in: *io::stream, + path: str, + loc: (uint, uint), + un: ((token, location) | void), + rb: [2](rune | io::EOF | void), +}; + +// A syntax error +export type syntax = (location, str)!; + +// All possible lexer errors +export type error = (io::error | syntax)!; + +// Returns a human-friendly string for a given error +export fn errstr(err: error) const str = { + static let buf: [2048]u8 = [0...]; + return match (err) { + err: io::error => io::errstr(err), + s: syntax => fmt::bsprintf(buf, "{}:{},{}: Syntax error: {}", + s.0.path, s.0.line, s.0.col, s.1), + }; +}; + +// Initializes a new lexer for the given input stream. The path is borrowed. +export fn init(in: *io::stream, path: str) lexer = lexer { + in = in, + path = path, + loc = (1, 1), + un = void, + rb = [void...], +}; + +// Returns the next token from the lexer. +export fn lex(lex: *lexer) ((token, location) | io::EOF | error) = { + match (lex.un) { + tok: (token, location) => { + lex.un = void; + return tok; + }, + void => void, + }; + + let loc = location { ... }; + let r: rune = match (nextw(lex)?) { + io::EOF => return io::EOF, + r: (rune, location) => { + loc = r.1; + r.0; + }, + }; + + if (is_name(r, false)) { + unget(lex, r); + return lex_name(lex, loc); + }; + if (ascii::isdigit(r)) { + unget(lex, r); + abort(); // TODO: Literals + }; + + let tok: token = switch (r) { + * => return syntaxerr(loc, "invalid character"), + '"', '\'' => { + unget(lex, r); + return lex_rn_str(lex, loc); + }, + '.', '<', '>' => return lex3(lex, loc, r), + '^', '*', '%', '/', '+', '-', ':', '!', '&', '|', '=' => { + return lex2(lex, loc, r); + }, + '~' => btoken::BNOT, + ',' => btoken::COMMA, + '{' => btoken::LBRACE, + '[' => btoken::LBRACKET, + '(' => btoken::LPAREN, + '}' => btoken::RBRACE, + ']' => btoken::RBRACKET, + ')' => btoken::RPAREN, + ';' => btoken::SEMICOLON, + }; + return (tok, loc); +}; + +fn is_name(r: rune, num: bool) bool = + ascii::isalpha(r) || r == '_' || r == '@' || (num && ascii::isdigit(r)); + +fn ncmp(a: const *void, b: const *void) int = { + let a = a: const *str, b = b: const *str; + return match (ascii::strcmp(*a, *b)) { + void => abort("non-ascii name"), // TODO: Bubble me up + i: int => i, + }; +}; + +fn lex_unicode(lex: *lexer, loc: location, n: size) (rune | error) = { + assert(n < 9); + let buf: [9]u8 = [0...]; + for (let i = 0z; i < n; i += 1z) { + let r = match (next(lex)?) { + io::EOF => return syntaxerr(loc, + "unexpected EOF scanning for escape"), + r: rune => r, + }; + if (!ascii::isxdigit(r)) { + return syntaxerr(loc, + "unexpected rune scanning for escape"); + }; + buf[i] = r: u32: u8; + }; + let s = strings::from_utf8_unsafe(buf[..n]); + return match (strconv::stou32b(s, strconv::base::HEX)) { + (strconv::overflow | strconv::invalid) => abort(), // Invariant + u: u32 => u: rune, + }; +}; + +fn lex_rune(lex: *lexer, loc: location) (rune | error) = { + let r = match (next(lex)?) { + io::EOF => return syntaxerr(loc, + "unexpected EOF scanning for rune"), + r: rune => r, + }; + if (r != '\\') { + return r; + }; + r = match (next(lex)?) { + io::EOF => return syntaxerr(loc, + "unexpected EOF scanning for escape"), + r: rune => r, + }; + return switch (r) { + '\\' => '\\', + '\'' => '\'', + '0' => '\0', + 'a' => '\a', + 'b' => '\b', + 'f' => '\f', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'v' => '\v', + '"' => '\"', + 'x' => lex_unicode(lex, loc, 2), + 'u' => lex_unicode(lex, loc, 4), + 'U' => lex_unicode(lex, loc, 8), + }; +}; + +fn lex_string( + lex: *lexer, + loc: location, +) ((token, location) | io::EOF | error) = { + let chars: []u8 = []; + for (true) match (next(lex)?) { + io::EOF => return syntaxerr(loc, "unexpected EOF scanning string literal"), + r: rune => + if (r == '"') break + else { + unget(lex, r); + r = lex_rune(lex, loc)?; + append(chars, ...utf8::encode_rune(r)); + }, + }; + return (strings::from_utf8(chars): literal, loc); +}; + +fn lex_rn_str( + lex: *lexer, + loc: location, +) ((token, location) | io::EOF | error) = { + let r = match (next(lex)) { + r: rune => r, + (io::EOF | io::error) => abort(), + }; + switch (r) { + '\"' => return lex_string(lex, loc), + '\'' => void, + * => abort(), // Invariant + }; + + // Rune literal + let ret: (token, location) = (lex_rune(lex, loc)?: literal, loc); + match (next(lex)?) { + io::EOF => + return syntaxerr(loc, "unexpected EOF"), + n: rune => if (n != '\'') + return syntaxerr(loc, "expected \"\'\""), + }; + return ret; +}; + +fn lex_name( + lex: *lexer, + loc: location, +) ((token, location) | io::EOF | error) = { + let chars: []u8 = []; + match (next(lex)) { + r: rune => { + assert(is_name(r, false)); + append(chars, ...utf8::encode_rune(r)); + }, + (io::EOF | io::error) => abort(), + }; + + for (true) match (next(lex)?) { + io::EOF => break, + r: rune => { + if (!is_name(r, true)) { + unget(lex, r); + break; + }; + append(chars, ...utf8::encode_rune(r)); + }, + }; + + let n = strings::from_utf8(chars); + return match (sort::search(bmap[..btoken::LAST_KEYWORD+1], + size(str), &n, &ncmp)) { + // TODO: Validate that names are ASCII + null => (n: name: token, loc), + v: *void => { + let tok = v: uintptr - &bmap[0]: uintptr; + tok /= size(str): uintptr; + (tok: btoken: token, loc); + }, + }; +}; + +fn lex2( + lexr: *lexer, + loc: location, + r: rune, +) ((token, location) | io::EOF | error) = { + let n = match (next(lexr)?) { + io::EOF => io::EOF, + r: rune => r, + }; + let tok: token = switch (r) { + '^' => match (n) { + r: rune => switch (r) { + '^' => return (btoken::LXOR: token, loc), + '=' => return (btoken::BXOREQ: token, loc), + * => btoken::BXOR, + }, + io::EOF => btoken::BXOR, + }, + '*' => match (n) { + r: rune => switch (r) { + '=' => return (btoken::TIMESEQ: token, loc), + * => btoken::TIMES, + }, + io::EOF => btoken::TIMES, + }, + '/' => match (n) { + r: rune => switch (r) { + '=' => return (btoken::DIVEQ: token, loc), + '/' => { + // Comment + for (true) match (next(lexr)?) { + io::EOF => break, + r: rune => if (r == '\n') { + break; + }, + }; + return lex(lexr); + }, + * => btoken::DIV, + }, + io::EOF => btoken::DIV, + }, + '%' => match (n) { + r: rune => switch (r) { + '=' => return (btoken::MODEQ: token, loc), + * => btoken::MODULO, + }, + io::EOF => btoken::MODULO, + }, + '+' => match (n) { + r: rune => switch (r) { + '=' => return (btoken::PLUSEQ: token, loc), + * => btoken::PLUS, + }, + io::EOF => btoken::PLUS, + }, + '-' => match (n) { + r: rune => switch (r) { + '=' => return (btoken::MINUSEQ: token, loc), + * => btoken::MINUS, + }, + io::EOF => btoken::MINUS, + }, + ':' => match (n) { + r: rune => switch (r) { + ':' => return (btoken::DOUBLE_COLON: token, loc), + * => btoken::COLON, + }, + io::EOF => btoken::COLON, + }, + '&' => match (n) { + r: rune => switch (r) { + '&' => return (btoken::LAND: token, loc), + '=' => return (btoken::ANDEQ: token, loc), + * => btoken::BAND, + }, + io::EOF => btoken::BAND, + }, + '|' => match (n) { + r: rune => switch (r) { + '|' => return (btoken::LOR: token, loc), + '=' => return (btoken::OREQ: token, loc), + * => btoken::BOR, + }, + io::EOF => btoken::BOR, + }, + '=' => match (n) { + r: rune => switch (r) { + '=' => return (btoken::LEQUAL: token, loc), + * => btoken::EQUAL, + }, + io::EOF => btoken::EQUAL, + }, + * => return syntaxerr(loc, "unknown token sequence"), + }; + unget(lexr, n); + return (tok, loc); +}; + +fn lex3( + lex: *lexer, + loc: location, + r: rune, +) ((token, location) | io::EOF | error) = { + let n = match (next(lex)?) { + io::EOF => return switch (r) { + '.' => (btoken::DOT: token, loc), + '<' => (btoken::LESS: token, loc), + '>' => (btoken::GREATER: token, loc), + }, + r: rune => r, + }; + return switch (r) { + '.' => lex3dot(lex, loc, n), + '<' => lex3lt(lex, loc, n), + '>' => lex3gt(lex, loc, n), + * => syntaxerr(loc, "unknown token sequence"), + }; +}; + +fn lex3dot( + lex: *lexer, + loc: location, + n: rune, +) ((token, location) | io::EOF | error) = { + let tok: token = switch (n) { + '.' => { + let q = match (next(lex)?) { + io::EOF => io::EOF, + r: rune => r, + }; + let t = match (q) { + r: rune => switch (r) { + '.' => return (btoken::ELLIPSIS: token, loc), + * => btoken::SLICE, + }, + io::EOF => btoken::SLICE, + }; + unget(lex, q); + t; + }, + * => { + unget(lex, n); + btoken::DOT; + } + }; + return (tok, loc); +}; + +fn lex3lt( + lex: *lexer, + loc: location, + n: rune, +) ((token, location) | io::EOF | error) = { + let tok: token = switch (n) { + '<' => { + let q = match (next(lex)?) { + io::EOF => io::EOF, + r: rune => r, + }; + let t = match (q) { + r: rune => switch (r) { + '=' => return (btoken::LSHIFTEQ: token, loc), + * => btoken::LSHIFT, + }, + io::EOF => btoken::LSHIFT, + }; + unget(lex, q); + t; + }, + '=' => btoken::LESSEQ, + * => { + unget(lex, n); + btoken::LESS; + } + }; + return (tok, loc); +}; + +fn lex3gt( + lex: *lexer, + loc: location, + n: rune, +) ((token, location) | io::EOF | error) = { + let tok: token = switch (n) { + '>' => { + let q = match (next(lex)?) { + io::EOF => io::EOF, + r: rune => r, + }; + let t = match (q) { + r: rune => switch (r) { + '=' => return (btoken::RSHIFTEQ: token, loc), + * => btoken::RSHIFT, + }, + io::EOF => btoken::RSHIFT, + }; + unget(lex, q); + t; + }, + '=' => btoken::GREATEREQ, + * => { + unget(lex, n); + btoken::GREATER; + } + }; + return (tok, loc); +}; + +// Unlex a single token. The next call to [lex] will return this token, location +// pair. Only one unlex is supported at a time; you must call [lex] before +// calling [unlex] again. +export fn unlex(lex: *lexer, tok: (token, location)) void = { + assert(lex.un is void, "attempted to unlex more than one token"); + lex.un = tok; +}; + +fn next(lex: *lexer) (rune | io::EOF | io::error) = { + match (lex.rb[0]) { + void => void, + r: (rune | io::EOF) => { + lex.rb[0] = lex.rb[1]; + lex.rb[1] = void; + return r; + }, + }; + + for (true) { + return match (io::getrune(lex.in)) { + e: (io::EOF | io::error) => e, + r: rune => { + lexloc(lex, r); + r; + }, + }; + }; + + abort("unreachable"); +}; + +fn nextw(lex: *lexer) ((rune, location) | io::EOF | io::error) = { + for (true) { + let loc = mkloc(lex); + match (next(lex)) { + e: (io::error | io::EOF) => return e, + r: rune => if (!ascii::isspace(r)) { + return (r, loc); + }, + }; + }; + abort(); +}; + +fn lexloc(lex: *lexer, r: rune) void = { + switch (r) { + '\n' => { + lex.loc.0 += 1; + lex.loc.1 = 1; + }, + '\t' => lex.loc.1 += 8, + * => lex.loc.1 += 1, + }; +}; + +fn unget(lex: *lexer, r: (rune | io::EOF)) void = { + if (!(lex.rb[0] is void)) { + assert(lex.rb[1] is void, "ungot too many runes"); + lex.rb[1] = lex.rb[0]; + }; + lex.rb[0] = r; +}; + +fn mkloc(lex: *lexer) location = location { + path = lex.path, + line = lex.loc.0, + col = lex.loc.1, +}; + +fn syntaxerr(loc: location, why: str) error = (loc, why); diff --git a/hare/lex/token.ha b/hare/lex/token.ha @@ -0,0 +1,299 @@ +use encoding::utf8; +use strings; + +// A token with no additional context, such as '+' +export type btoken = enum { + // Keep ordered with bmap + // Alpha shorted + ATTR_FINI, + ATTR_INIT, + ATTR_NORETURN, + ATTR_OFFSET, + ATTR_SYMBOL, + ATTR_TEST, + UNDERSCORE, + ABORT, + ALLOC, + APPEND, + AS, + ASSERT, + BOOL, + BREAK, + CHAR, + CONST, + CONTINUE, + DEF, + DEFER, + ELSE, + ENUM, + EXPORT, + F32, + F64, + FALSE, + FN, + FOR, + FREE, + I16, + I32, + I64, + I8, + IF, + INT, + IS, + LEN, + LET, + MATCH, + NULL, + NULLABLE, + OFFSET, + RETURN, + RUNE, + SIZE, + STATIC, + STR, + STRUCT, + SWITCH, + TRUE, + TYPE, + U16, + U32, + U64, + U8, + UINT, + UINTPTR, + UNION, + USE, + VOID, + LAST_KEYWORD = VOID, + + // Operators + ANDEQ, + BAND, + BNOT, + BOR, + CASE, + COLON, + COMMA, + DIV, + DIVEQ, + DOT, + DOUBLE_COLON, + ELLIPSIS, + EQUAL, + GREATER, + GREATEREQ, + LAND, + LBRACE, + LBRACKET, + LEQUAL, + LESS, + LESSEQ, + LNOT, + LOR, + LPAREN, + LSHIFT, + LSHIFTEQ, + LXOR, + MINUS, + MINUSEQ, + MINUSMINUS, + MODEQ, + MODULO, + NEQUAL, + OREQ, + PLUS, + PLUSEQ, + PLUSPLUS, + RBRACE, + RBRACKET, + RPAREN, + RSHIFT, + RSHIFTEQ, + SEMICOLON, + SLICE, + TIMES, + TIMESEQ, + BXOR, + BXOREQ, +}; + +const bmap: [_]str = [ + // Keep ordered with btoken + "@fini", + "@init", + "@noreturn", + "@offset", + "@symbol", + "@test", + "_", + "abort", + "alloc", + "append", + "as", + "assert", + "bool", + "break", + "char", + "const", + "continue", + "def", + "defer", + "else", + "enum", + "export", + "f32", + "f64", + "false", + "fn", + "for", + "free", + "i16", + "i32", + "i64", + "i8", + "if", + "int", + "is", + "len", + "let", + "match", + "null", + "nullable", + "offset", + "return", + "rune", + "size", + "static", + "str", + "struct", + "switch", + "true", + "type", + "u16", + "u32", + "u64", + "u8", + "uint", + "uintptr", + "union", + "use", + "void", + "&=", + "&", + "~", + "|", + "=>", + ":", + ",", + "/", + "/=", + ".", + "::", + "...", + "=", + ">", + ">=", + "&&", + "{", + "[", + "==", + "<", + "<=", + "!", + "||", + "(", + "<<", + "<<=", + "^^", + "-", + "-=", + "--", + "%=", + "%", + "!=", + "|=", + "+", + "+=", + "++", + "}", + "]", + ")", + ">>", + ">>=", + ";", + "..", + "*", + "*=", + "^", + "^=", +]; + +// A loop label, such as ':example' +export type label = str; + +// A name, such as 'example' +export type name = str; + +// The type of a literal token, such as '1337u32' (U32) +export type literal_type = enum { + U8, + U16, + U32, + U64, + UINT, + UINTPTR, + I8, + I16, + I32, + I64, + INT, + ICONST, + F32, + F64, + FCONST, + RUNE, + STR, +}; + +export type iconst = i64; +export type fconst = f64; + +// A token for a literal value, such as '1337u32' +export type literal = (u8 | u16 | u32 | u64 | uint | uintptr | i8 | i16 | i32 | + i64 | int | iconst | f32 | f64 | fconst | rune | str); + +// A location within a source file. +export type location = struct { + path: str, + line: uint, + col: uint +}; + +// A single lexical token. +export type token = (btoken | label | name | literal); + +// Converts a token to its string representation +export fn tokstr(tok: token) const str = match (tok) { + b: btoken => bmap[b: int], + n: name => n: str, + l: literal => match (l) { + u8 => "u8", + u16 => "u16", + u32 => "u32", + u64 => "u64", + uint => "uint", + uintptr => "uintptr", + i8 => "i8", + i16 => "i16", + i32 => "i32", + i64 => "i64", + int => "int", + iconst => "iconst", + f32 => "f32", + f64 => "f64", + fconst => "fconst", + rune => "rune", + str => "str", + }, + * => abort(), // TODO +}; diff --git a/hare/module/context.ha b/hare/module/context.ha @@ -0,0 +1,84 @@ +use dirs; +use fs; +use hare::ast; +use os; +use path; +use strings; + +// TODO: Specify this at build time once harec supports -D +def DEFAULT_HAREPATH: str = "/usr/src/hare"; + +export type context = struct { + // Filesystem to use for the cache and source files. + fs: *fs::fs, + // List of paths to search, generally populated from HAREPATH plus some + // baked-in default. + paths: []str, + // Path to the Hare cache, generally populated from HARECACHE and + // defaulting to $XDG_CACHE_HOME/hare. + cache: str, + // Build tags to apply to this context. + tags: []tag, +}; + +// Initializes a new context with the system default configuration. The tag list +// is borrowed from the caller. +export fn context_init(tags: []tag) context = { + let ctx = context { + fs = os::cwd, + tags = tags, + paths: []str = match (os::getenv("HAREPATH")) { + void => { + let path: []str = alloc([ + strings::dup(DEFAULT_HAREPATH), + dirs::data("hare"), + ]); + path; + }, + s: str => { + let sl = strings::split(s, ":"); + let path: []str = alloc([], len(sl) + 1); + for (let i = 0z; i < len(sl); i += 1) { + append(path, strings::dup(sl[i])); + }; + append(path, strings::dup(".")); + free(sl); + path; + }, + }, + cache: str = match (os::getenv("HARECACHE")) { + void => dirs::cache("hare"), + s: str => strings::dup(s), + }, + ... + }; + return ctx; +}; + +// Frees resources associated with this context. +export fn context_finish(ctx: *context) void = { + for (let i = 0z; i < len(ctx.paths); i += 1) { + free(ctx.paths[i]); + }; + free(ctx.paths); + free(ctx.cache); +}; + +// Converts an identifier to a partial path (e.g. foo::bar becomes foo/bar). The +// return value must be freed by the caller. +export fn ident_path(name: ast::ident) str = { + let p = path::join(name[0]); + for (let i = 1z; i < len(name); i += 1) { + let q = path::join(p, name[i]); + free(p); + p = q; + }; + return p; +}; + +@test fn ident_path() void = { + let ident: ast::ident = ["foo", "bar", "baz"]; + let p = ident_path(ident); + defer free(p); + assert(p == "foo/bar/baz"); +}; diff --git a/hare/module/scan.ha b/hare/module/scan.ha @@ -0,0 +1,290 @@ +use ascii; +use bytes; +use crypto::sha256; +use encoding::utf8; +use fs; +use hare::ast; +use hare::lex; +use hare::parse; +use hash; +use io; +use path; +use slice; +use strings; +use strio; + +// Scans the files in a directory for eligible build inputs and returns a +// [version] which includes all applicable files and their dependencies. +export fn scan(ctx: *context, path: str) (version | error) = { + let sha = sha256::sha256(); + //defer! hash::close(sha); + let iter = match (fs::iter(ctx.fs, path)) { + fs::wrongtype => { + // Single file case + let inputs: []input = []; + let deps: []ast::ident = []; + let ft = match (type_for_ext(path)) { + void => return module_not_found, + ft: filetype => ft, + }; + let st = fs::stat(ctx.fs, path)?; + let in = input { + path = fs::resolve(ctx.fs, path), + stat = st, + ft = ft, + hash = scan_file(ctx, path, &deps)?, + ... + }; + append(inputs, in); + hash::write(sha, in.hash); + return version { + hash = hash::finish(sha), + basedir = path::dirname(fs::resolve(ctx.fs, path)), + depends = deps, + inputs = inputs, + }; + }, + err: fs::error => return err, + iter: *fs::iterator => iter, + }; + let ver = version { + basedir = strings::dup(path), + ... + }; + scan_directory(ctx, &ver, sha, path, iter)?; + ver.hash = hash::finish(sha); + return ver; +}; + +fn scan_directory( + ctx: *context, + ver: *version, + sha: *hash::hash, + path: str, + iter: *fs::iterator, +) (void | error) = { + for (true) match (fs::next(iter)) { + void => break, + ent: fs::dirent => switch (ent.ftype) { + fs::mode::LINK => abort(), // TODO + fs::mode::DIR => { + let d = strings::to_utf8(ent.name); + if (len(d) == 0 || ( + !strings::has_prefix(ent.name, "+") && + !strings::has_prefix(ent.name, "-"))) { + continue; + }; + if (!eligible(ctx, ent.name, true)) { + continue; + }; + let p = path::join(path, ent.name); + let iter = fs::iter(ctx.fs, p)?; + scan_directory(ctx, ver, sha, p, iter)?; + }, + fs::mode::REG => if (eligible(ctx, ent.name, false)) { + let p = path::join(path, ent.name); + let st = fs::stat(ctx.fs, p)?; + let in = input { + path = fs::resolve(ctx.fs, p), + stat = st, + ft = type_for_ext(ent.name) as filetype, + hash = scan_file(ctx, p, &ver.depends)?, + ... + }; + append(ver.inputs, in); + hash::write(sha, in.hash); + }, + * => void, + }, + }; +}; + +// Looks up a module by its identifier from HAREPATH, and returns a [version] +// which includes all eligible build inputs. +export fn lookup(ctx: *context, name: ast::ident) (version | error) = { + let ipath = ident_path(name); + for (let i = len(ctx.paths); i > 0; i -= 1) { + let cand = path::join(ctx.paths[i - 1], ipath); + defer free(cand); + match (scan(ctx, cand)) { + v: version => return v, + e: error => void, + }; + }; + return module_not_found; +}; + +fn eligible(ctx: *context, name: str, dir: bool) bool = { + if (!dir) { + let eligible = false; + const ext = path::extension(name); + static const exts = [".ha", ".s"]; + for (let i = 0z; i < len(exts); i += 1) { + if (exts[i] == ext) { + eligible = true; + break; + }; + }; + if (!eligible) { + return false; + }; + }; + + // XXX: It might be nice if the stdlib offered search functions which + // support multiple needles + let p = strings::index(name, '+'); + let m = strings::index(name, '-'); + if (p is void && m is void) { + return true; + }; + let i: size = + if (p is void && m is size) m: size + else if (m is void && p is size) p: size + else if (m: size < p: size) m: size + else p: size; + let tags = match (strings::index(name, '.')) { + void => strings::sub(name, i, strings::end), + e: size => strings::sub(name, i, e), + }; + let tags = match (parse_tags(tags)) { + void => return false, + t: []tag => t, + }; + defer tags_free(tags); + return tags_compat(ctx.tags, tags); +}; + +@test fn eligible() void = { + let ctx = context { + tags = [ + tag { name = "incl", mode = tag_mode::INCLUSIVE }, + tag { name = "excl", mode = tag_mode::EXCLUSIVE }, + ], + }; + assert(eligible(&ctx, "foo.ha", false)); + assert(eligible(&ctx, "foo.s", false)); + assert(eligible(&ctx, "foo+incl.ha", false)); + assert(eligible(&ctx, "foo-excl.ha", false)); + assert(eligible(&ctx, "foo-other.ha", false)); + assert(eligible(&ctx, "foo+incl-excl.ha", false)); + + assert(!eligible(&ctx, "foo.txt", false)); + assert(!eligible(&ctx, "foo-incl.ha", false)); + assert(!eligible(&ctx, "foo+excl.ha", false)); + assert(!eligible(&ctx, "foo+other.ha", false)); + assert(!eligible(&ctx, "foo-incl+excl.ha", false)); + + assert(eligible(&ctx, "+incl", true)); + assert(eligible(&ctx, "-excl", true)); + assert(!eligible(&ctx, "-incl", true)); + assert(!eligible(&ctx, "+excl", true)); +}; + +fn type_for_ext(name: str) (filetype | void) = { + const ext = path::extension(name); + return + if (ext == ".ha") filetype::HARE + else if (ext == ".s") filetype::ASSEMBLY + else void; +}; + +fn scan_file( + ctx: *context, + path: str, + deps: *[]ast::ident, +) ([]u8 | error) = { + let f = fs::open(ctx.fs, path)?; + defer io::close(f); + let sha = sha256::sha256(); + //defer! hash::close(sha); + let tee = io::tee(f, hash::writer(sha)); + defer io::close(tee); + + let lexer = lex::init(tee, path); + let imports = parse::imports(&lexer)?; + for (let i = 0z; i < len(imports); i += 1) { + let ident = match (imports[i]) { + m: ast::import_module => m: ast::ident, + a: ast::import_alias => a.ident, + o: ast::import_objects => o.ident, + }; + if (!have_ident(deps, ident)) { + append(*deps, ident); + }; + }; + + io::copy(io::empty, tee)?; // Finish spooling out the file for the SHA + return hash::finish(sha); +}; + +fn have_ident(sl: *[]ast::ident, id: ast::ident) bool = { + // XXX: We shouldn't have to deref sl here + for (let i = 0z; i < len(*sl); i += 1) { + if (ast::ident_eq(sl[i], id)) { + return true; + }; + }; + return false; +}; + +// Parses a set of build tags, returning void if the string is an invalid tag +// set. The caller must free the return value with [tags_free]. +export fn parse_tags(in: str) ([]tag | void) = { + let tags: []tag = []; + // defer! tags_free(tags); + let iter = strings::iter(in); + for (true) { + let t = tag { ... }; + let m = match (strings::next(&iter)) { + void => break, + r: rune => r, + }; + t.mode = switch (m) { + * => return, + '+' => tag_mode::INCLUSIVE, + '-' => tag_mode::EXCLUSIVE, + }; + let buf = strio::dynamic(); + for (true) match (strings::next(&iter)) { + void => break, + r: rune => { + if (ascii::isalnum(r) || r == '_') { + strio::append_rune(buf, r); + } else { + strings::push(&iter, r); + break; + }; + }, + }; + t.name = strio::finish(buf); + append(tags, t); + }; + return tags; +}; + +// Frees a set of tags. +export fn tags_free(tags: []tag) void = { + for (let i = 0z; i < len(tags); i += 1) { + free(tags[i].name); + }; + free(tags); +}; + +// Compares two tag sets and tells you if they are compatible. +export fn tags_compat(have: []tag, want: []tag) bool = { + // XXX: O(n²), lame + for (let i = 0z; i < len(want); i += 1) { + let present = false; + for (let j = 0z; j < len(have); j += 1) { + if (have[j].name == want[i].name) { + present = have[j].mode == tag_mode::INCLUSIVE; + break; + }; + }; + switch (want[i].mode) { + tag_mode::INCLUSIVE => if (!present) return false, + tag_mode::EXCLUSIVE => if (present) return false, + }; + }; + return true; +}; diff --git a/hare/module/types.ha b/hare/module/types.ha @@ -0,0 +1,57 @@ +use fs; +use hare::ast; +use hare::parse; +use io; + +// The inclusive/exclusive state for a build tag. +export type tag_mode = enum { + INCLUSIVE, + EXCLUSIVE, +}; + +// A build tag, e.g. +x86_64. +export type tag = struct { + name: str, + mode: tag_mode, +}; + +// The manifest for a particular module, with some number of inputs, and +// versions. +export type manifest = struct { + inputs: []input, + versions: []version, +}; + +// A module version: a set of possible input files for that module. +export type version = struct { + hash: []u8, + basedir: str, + depends: []ast::ident, + inputs: []input, +}; + +export type filetype = enum { + HARE, + ASSEMBLY, +}; + +// An input to a module, generally a source file. +export type input = struct { + hash: []u8, + path: str, + ft: filetype, + stat: fs::filestat, +}; + +// The requested module could not be found. +export type module_not_found = void!; + +// All possible error types. +export type error = (fs::error | io::error | parse::error | module_not_found)!; + +export fn errstr(err: error) const str = match (err) { + err: fs::error => fs::errstr(err), + err: io::error => io::errstr(err), + err: parse::error => parse::errstr(err), + module_not_found => "Module not found", +}; diff --git a/hare/parse/+test.ha b/hare/parse/+test.ha @@ -0,0 +1,167 @@ +use bufio; +use fmt; +use hare::ast; +use hare::lex; +use io::{mode}; +use strings; + +@test fn ident() void = { + { + const in = "foo"; + let buf = bufio::fixed(strings::to_utf8(in), mode::READ); + let lexer = lex::init(buf, "<test>"); + let ident = ident(&lexer) as ast::ident; + defer ast::ident_free(ident); + assert(len(ident) == 1); + assert(ident[0] == "foo"); + assert(lex::lex(&lexer) is io::EOF); + }; + + { + const in = "foo::bar"; + let buf = bufio::fixed(strings::to_utf8(in), mode::READ); + let lexer = lex::init(buf, "<test>"); + let ident = ident(&lexer) as ast::ident; + defer ast::ident_free(ident); + assert(len(ident) == 2); + assert(ident[0] == "foo" && ident[1] == "bar"); + assert(lex::lex(&lexer) is io::EOF); + }; + + { + const in = "foo::bar::baz"; + let buf = bufio::fixed(strings::to_utf8(in), mode::READ); + let lexer = lex::init(buf, "<test>"); + let ident = ident(&lexer) as ast::ident; + defer ast::ident_free(ident); + assert(len(ident) == 3); + assert(ident[0] == "foo" && ident[1] == "bar" + && ident[2] == "baz"); + assert(lex::lex(&lexer) is io::EOF); + }; + + { + const in = "foo::bar;"; + let buf = bufio::fixed(strings::to_utf8(in), mode::READ); + let lexer = lex::init(buf, "<test>"); + let ident = ident(&lexer) as ast::ident; + defer ast::ident_free(ident); + assert(len(ident) == 2); + assert(ident[0] == "foo" && ident[1] == "bar"); + let tok = lex::lex(&lexer) as (lex::token, lex::location); + assert(tok.0 as lex::btoken == lex::btoken::SEMICOLON); + }; +}; + +@test fn imports() void = { + { + const in = "use foo;"; + let buf = bufio::fixed(strings::to_utf8(in), mode::READ); + let lexer = lex::init(buf, "<test>"); + let mods = imports(&lexer) as []ast::import; + defer for (let i = 0z; i < len(mods); i += 1) { + ast::import_free(mods[i]); + }; + + assert(len(mods) == 1); + assert(mods[0] is ast::import_module); + + let mod = mods[0] as ast::import_module; + assert(len(mod) == 1 && mod[0] == "foo"); + assert(lex::lex(&lexer) is io::EOF); + }; + + { + const in = + "use foo;\n" + "use bar;\n" + "use baz::bat;\n\n" + "export fn main() void = void;"; + let buf = bufio::fixed(strings::to_utf8(in), mode::READ); + let lexer = lex::init(buf, "<test>"); + let mods = imports(&lexer) as []ast::import; + defer for (let i = 0z; i < len(mods); i += 1) { + ast::import_free(mods[i]); + }; + + assert(len(mods) == 3); + let expected: [_][]str = [["foo"], ["bar"], ["baz", "bat"]]; + + for (let i = 0z; i < len(mods); i += 1) { + assert(mods[i] is ast::import_module); + let mod = mods[i] as ast::import_module; + assert(len(mod) == len(expected[i])); + for (let j = 0z; j < len(expected[i]); j += 1z) { + assert(mod[j] == expected[i][j]); + }; + }; + + let tok = lex::lex(&lexer) as (lex::token, lex::location); + assert(tok.0 as lex::btoken == lex::btoken::EXPORT); + }; + + { + const in = + "use foo = bar;\n" + "use baz = bat;\n" + "use qux = quux::corge;\n" + "export fn main() void = void;"; + let buf = bufio::fixed(strings::to_utf8(in), mode::READ); + let lexer = lex::init(buf, "<test>"); + let mods = imports(&lexer) as []ast::import; + defer for (let i = 0z; i < len(mods); i += 1) { + ast::import_free(mods[i]); + }; + + assert(len(mods) == 3); + let expected: [_](str, []str) = [ + ("foo", ["bar"]), + ("baz", ["bat"]), + ("qux", ["quux", "corge"]) + ]; + + for (let i = 0z; i < len(mods); i += 1) { + assert(mods[i] is ast::import_alias); + let mod = mods[i] as ast::import_alias; + assert(mod.alias == expected[i].0); + assert(len(mod.ident) == len(expected[i].1)); + for (let j = 0z; j < len(expected[i].1); j += 1z) { + assert(mod.ident[j] == expected[i].1[j]); + }; + }; + }; + + { + const in = + "use foo::{bar};\n" + "use baz::{bat, qux};\n" + "use quux::corge::{grault, garply,};\n" + "export fn main() void = void;"; + let buf = bufio::fixed(strings::to_utf8(in), mode::READ); + let lexer = lex::init(buf, "<test>"); + let mods = imports(&lexer) as []ast::import; + defer for (let i = 0z; i < len(mods); i += 1) { + ast::import_free(mods[i]); + }; + + assert(len(mods) == 3); + let expected: [_]([]str, []str) = [ + (["foo"], ["bar"]), + (["baz"], ["bat", "qux"]), + (["quux", "corge"], ["grault", "garply"]) + ]; + + for (let i = 0z; i < len(mods); i += 1) { + assert(mods[i] is ast::import_objects); + let mod = mods[i] as ast::import_objects; + assert(len(mod.objects) == len(expected[i].1)); + for (let j = 0z; j < len(expected[i].0); j += 1z) { + assert(mod.objects[j] == expected[i].1[j]); + }; + assert(len(mod.ident) == len(expected[i].0)); + for (let j = 0z; j < len(expected[i].0); j += 1z) { + assert(mod.ident[j] == expected[i].0[j]); + }; + }; + }; +}; diff --git a/hare/parse/parse.ha b/hare/parse/parse.ha @@ -0,0 +1,96 @@ +use hare::ast; +use hare::lex; +use hare::lex::{btoken}; +use slice; + +fn ident_trailing(lexer: *lex::lexer) ((ast::ident, bool) | error) = { + let ident: []str = []; + let z = 0z; + for (true) { + let name = match (try_name(lexer)?) { + n: lex::name => n, + void => return (ident: ast::ident, true), + }; + append(ident, name: str); + z += len(name); + match (try_btoken(lexer, btoken::DOUBLE_COLON)?) { + void => break, + * => void, // Grab the next ident + }; + z += 1; + }; + if (z > ast::IDENT_MAX) { + ast::ident_free(ident: ast::ident); + return syntaxerr(mkloc(lexer), + "Identifier exceeds maximum length"); + }; + return (ident: ast::ident, false); +}; + +// Parses a single identifier, i.e. foo::bar::baz +export fn ident(lexer: *lex::lexer) (ast::ident | error) = { + let ident = ident_trailing(lexer)?; + synassert(mkloc(lexer), !ident.1, "Unexpected trailing :: in ident"); + return ident.0; +}; + +fn parse_name_list(lexer: *lex::lexer) ([]str | error) = { + let names: []str = []; + for (true) { + append(names, want_name(lexer)?: str); + switch (want_btoken(lexer, btoken::COMMA, btoken::RBRACE)?) { + btoken::COMMA => match (try_btoken(lexer, btoken::RBRACE)?) { + void => void, + * => return names, + }, + btoken::RBRACE => return names, + * => abort(), // Unreachable + }; + }; + abort(); +}; + +// Parses the import list for a sub-unit +export fn imports(lexer: *lex::lexer) ([]ast::import | error) = { + let imports: []ast::import = []; + for (true) { + match (try_btoken(lexer, btoken::USE)?) { + void => break, + * => void, + }; + + let name = ident_trailing(lexer)?; + + switch (want_btoken(lexer, btoken::SEMICOLON, btoken::LBRACE, + btoken::EQUAL)?) { + btoken::SEMICOLON => { + synassert(mkloc(lexer), !name.1, + "Unexpected trailing :: in ident")?; + append(imports, name.0: ast::import_module); + }, + btoken::LBRACE => { + synassert(mkloc(lexer), name.1, + "Expected trailing :: in ident")?; + let objects = parse_name_list(lexer)?; + append(imports, ast::import_objects { + ident = name.0, + objects = objects, + }); + want_btoken(lexer, btoken::SEMICOLON)?; + }, + btoken::EQUAL => { + synassert(mkloc(lexer), + len(name.0) == 1 && !name.1, + "Expected name, not ident")?; + let ident = ident(lexer)?; + append(imports, ast::import_alias { + ident = ident, + alias = name.0[0], + }); + want_btoken(lexer, btoken::SEMICOLON)?; + }, + * => abort(), // Unreachable + }; + }; + return imports; +}; diff --git a/hare/parse/types.ha b/hare/parse/types.ha @@ -0,0 +1,16 @@ +use hare::lex; + +// All possible error types +export type error = lex::error!; + +// Convert an error into a human-friendly string +export fn errstr(err: error) const str = lex::errstr(err: lex::error); + +fn syntaxerr(loc: lex::location, why: str) lex::error = + (loc, why): lex::syntax: lex::error; + +fn mkloc(lex: *lex::lexer) lex::location = lex::location { + path = lex.path, + line = lex.loc.0, + col = lex.loc.1, +}; diff --git a/hare/parse/util.ha b/hare/parse/util.ha @@ -0,0 +1,89 @@ +use hare::ast; +use hare::lex; + +// Requires the next token to be a name. Returns that name, or an error. +fn want_name(lexer: *lex::lexer) (lex::name | error) = { + match (lex::lex(lexer)?) { + io::EOF => return syntaxerr(mkloc(lexer), + "Expected name, found EOF"), + t: (lex::token, lex::location) => match (t.0) { + n: lex::name => return n, + // TODO: Use fmt+lex::tokstr here: + * => return syntaxerr(mkloc(lexer), + "Expected name, got <something else>"), + }, + }; +}; + +// Looks for a matching name from the lexer, and if not present, unlexes the +// token and returns void. If found, the token is consumed from the lexer and is +// returned. +fn try_name(lexer: *lex::lexer) (lex::name | error | void) = { + let tuple = match (lex::lex(lexer)?) { + io::EOF => return, + t: (lex::token, lex::location) => match (t.0) { + n: lex::name => return n, + * => t, + }, + }; + lex::unlex(lexer, tuple); +}; + +// Requires the next token to be a name. Returns that name, or an error. +fn want_btoken( + lexer: *lex::lexer, + want: lex::btoken... +) (lex::btoken | error) = { + match (lex::lex(lexer)?) { + io::EOF => return syntaxerr(mkloc(lexer), + "Expected name, found EOF"), + t: (lex::token, lex::location) => match (t.0) { + b: lex::btoken => { + for (let i = 0z; i < len(want); i += 1) { + if (b == want[i]) { + return b; + }; + }; + // TODO: Use fmt+lex::tokstr here: + return syntaxerr(mkloc(lexer), + "Expected <something>, got <something else>"); + }, + // TODO: Use fmt+lex::tokstr here: + * => return syntaxerr(mkloc(lexer), + "Expected <something>, got <something else>"), + }, + }; +}; + +// Looks for a matching btoken from the lexer, and if not present, unlexes the +// token and returns void. If found, the token is consumed from the lexer and is +// returned. +fn try_btoken( + lexer: *lex::lexer, + want: lex::btoken... +) (lex::btoken | error | void) = { + let tok = lex::lex(lexer); + let tuple = match (tok?) { + io::EOF => return, + t: (lex::token, lex::location) => { + match (t.0) { + b: lex::btoken => + for (let i = 0z; i < len(want); i += 1) { + if (b == want[i]) { + return b; + }; + }, + * => void, + }; + t; + }, + }; + lex::unlex(lexer, tuple); +}; + +// Returns a syntax error if cond is false and void otherwise +fn synassert(loc: lex::location, cond: bool, msg: str) (void | error) = { + if (!cond) { + return syntaxerr(loc, msg); + }; +}; diff --git a/hash/fnv/fnv.ha b/hash/fnv/fnv.ha @@ -0,0 +1,183 @@ +// Implements the Fowler–Noll–Vo (FNV) hash function. This hash is recommended +// for hash map keys and similar applications. It is a non-cryptographic hash. +use endian; +use hash; +use io; +use strings; + +def prime32: u32 = 16777619; +def prime64: u64 = 1099511628211; +def basis32: u32 = 2166136261; +def basis64: u64 = 14695981039346656037; + +type state32 = struct { + hash: hash::hash, + v: u32, +}; + +type state64 = struct { + hash: hash::hash, + v: u64, +}; + +// Creates a [hash::hash] which computes the FNV-1 32-bit hash function. +// +// Unless you have a reason to use this, [fnv32a] is recommended instead. +export fn fnv32() *hash::hash = alloc(state32 { + hash = hash::hash { + stream = io::stream { + writer = &fnv32_write, + closer = &fnv_close, + }, + sum = &fnv32_sum, + reset = &fnv32_reset, + sz = 4, + }, + v = basis32, +}): *hash::hash; + +// Creates a [hash::hash] which computes the FNV-1a 32-bit hash function. +export fn fnv32a() *hash::hash = alloc(state32 { + hash = hash::hash { + stream = io::stream { + writer = &fnv32a_write, + closer = &fnv_close, + }, + sum = &fnv32_sum, + reset = &fnv32_reset, + sz = 4, + }, + v = basis32, +}): *hash::hash; + +// Creates a [hash::hash] which computes the FNV-1 64-bit hash function. +// +// Unless you have a reason to use this, [fnv64a] is recommended instead. +export fn fnv64() *hash::hash = alloc(state64 { + hash = hash::hash { + stream = io::stream { + writer = &fnv64_write, + closer = &fnv_close, + }, + sum = &fnv64_sum, + reset = &fnv64_reset, + sz = 8, + }, + v = basis64, +}): *hash::hash; + +// Creates a [hash::hash] which computes the FNV-1a 64-bit hash function. +export fn fnv64a() *hash::hash = alloc(state64 { + hash = hash::hash { + stream = io::stream { + writer = &fnv64a_write, + closer = &fnv_close, + }, + sum = &fnv64_sum, + reset = &fnv64_reset, + sz = 8, + }, + v = basis64, +}): *hash::hash; + +fn fnv_close(s: *io::stream) void = free(s); + +fn fnv32_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let s = s: *state32; + for (let i = 0z; i < len(buf); i += 1) { + s.v *= prime32; + s.v ^= buf[i]; + }; + return len(buf); +}; + +fn fnv32a_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let s = s: *state32; + for (let i = 0z; i < len(buf); i += 1) { + s.v ^= buf[i]; + s.v *= prime32; + }; + return len(buf); +}; + +fn fnv32_reset(h: *hash::hash) void = { + let h = h: *state32; + h.v = basis32; +}; + +fn fnv32_sum(h: *hash::hash) []u8 = { + let h = h: *state32; + let buf: [4]u8 = [0...]; + endian::host.putu32(buf, h.v); + let sl: []u8 = alloc([], 4); + append(sl, ...buf); + return sl; +}; + +fn fnv64_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let s = s: *state64; + for (let i = 0z; i < len(buf); i += 1) { + s.v *= prime64; + s.v ^= buf[i]; + }; + return len(buf); +}; + +fn fnv64a_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let s = s: *state64; + for (let i = 0z; i < len(buf); i += 1) { + s.v ^= buf[i]; + s.v *= prime64; + }; + return len(buf); +}; + +fn fnv64_reset(h: *hash::hash) void = { + let h = h: *state64; + h.v = basis64; +}; + +fn fnv64_sum(h: *hash::hash) []u8 = { + let h = h: *state64; + let buf: [8]u8 = [0...]; + endian::host.putu64(buf, h.v); + let sl: []u8 = alloc([], 8); + append(sl, ...buf); + return sl; +}; + +// Returns the sum of a 32-bit FNV hash. +export fn sum32(h: *hash::hash) u32 = { + assert(h.reset == &fnv32_reset); + let h = h: *state32; + return h.v; +}; + +// Returns the sum of a 64-bit FNV hash. +export fn sum64(h: *hash::hash) u64 = { + assert(h.reset == &fnv64_reset); + let h = h: *state64; + return h.v; +}; + +@test fn fnv32() void = { + // TODO: Expand these tests + // I am too tired + const vectors: [_](str, u32) = [ + ("", 2166136261), + ("hello world", 1418570095), + ("Hare is a cool language", 2663852071), + ("'UNIX was not designed to stop its users from doing stupid things, as that would also stop them from doing clever things' - Doug Gwyn", 1203174417), + ("'Life is too short to run proprietary software' - Bdale Garbee", 493463614), + ("'The central enemy of reliability is complexity.' - Geer et al", 3263526736), + ("'A language that doesn’t have everything is actually easier to program in than some that do.' - Dennis Ritchie", 3069348265), + ]; + let hash = fnv32(); + defer hash::close(hash); + for (let i = 0z; i < len(vectors); i += 1) { + let vec = vectors[i]; + hash::reset(hash); + hash::write(hash, strings::to_utf8(vec.0)); + assert(sum32(hash) == vec.1); + }; +}; diff --git a/hash/hash.ha b/hash/hash.ha @@ -0,0 +1,46 @@ +use io; +// TODO: Let caller supply the output buffer, to avoid the slice allocation + +// The general purpose interface for a hashing function. +export type hash = struct { + // A stream which only supports writes and never returns errors. + stream: io::stream, + + // Returns the current hash. + sum: *fn(hash: *hash) []u8, + + // Resets the hash function to its initial state. + reset: *fn(hash: *hash) void, + + // Size of the hash in bytes. + sz: size, +}; + +// Returns a writable [io::stream] for a given hash. +export fn writer(h: *hash) *io::stream = &h.stream; + +// Writes an input to the hash function. +export fn write(h: *hash, buf: const []u8) size = + io::write(&h.stream, buf) as size; + +// Finalizes the hash, frees resources associated with the hash, and returns the +// sum. The return value is heap allocated, the caller needs to free it. +export fn finish(h: *hash) []u8 = { + let sum = sum(h); + io::close(&h.stream); + return sum; +}; + +// Closes a hash, freeing its resources and discarding the checksum. +export fn close(h: *hash) void = io::close(&h.stream); + +// Returns the current sum. The return value is heap allocated, the caller +// needs to free it. +export fn sum(h: *hash) []u8 = h.sum(h); + +// Resets the hash function to its initial state. +export fn reset(h: *hash) void = h.reset(h); + +// Returns the size of the hash in bytes. This is consistent regardless +// of the hash state. +export fn sz(h: *hash) size = h.sz; diff --git a/io/+test/copy.ha b/io/+test/copy.ha @@ -0,0 +1,93 @@ +fn test_copier_read(s: *stream, buf: []u8) (size | EOF | error) = { + let stream = s: *test_stream; + if (stream.r == 0) { + assert(len(buf) > 42); + stream.nreads += 1; + stream.r = 42; + return 42; + } else { + return EOF; + }; +}; + +fn test_copier_open() test_stream = { + let stream = test_stream_open(); + stream.stream.reader = &test_copier_read; + return stream; +}; + +fn test_copier_copy(a: *stream, b: *stream) (size | error) = { + assert(a != b); + assert(a.reader == &test_copier_read && b.reader == &test_copier_read); + let stream = a: *test_stream; + stream.w = 62893; + return 1337; +}; + +fn test_copy_unsupported(a: *stream, b: *stream) (size | error) = unsupported; + +fn io::println(msg: str) void; +fn strconv::ztos(z: size) str; + +@test fn copy() void = { + let a = test_copier_open(), b = test_copier_open(); + match (copy(&b.stream, &a.stream)) { + n: size => { + assert(n == 42); + assert(a.r == 42); + assert(b.w == 42); + }, + error => abort(), + }; + close(&a: *stream); + close(&b: *stream); + + a = test_copier_open(); + b = test_copier_open(); + a.stream.copier = &test_copier_copy; + b.stream.copier = &test_copier_copy; + match (copy(&b.stream, &a.stream)) { + n: size => { + assert(n == 1337); + assert(b.w == 62893); + }, + error => abort(), + }; + close(&a: *stream); + close(&b: *stream); + + // Fallback + a = test_copier_open(); + b = test_copier_open(); + a.stream.copier = &test_copy_unsupported; + b.stream.copier = &test_copy_unsupported; + match (copy(&b.stream, &a.stream)) { + n: size => { + assert(n == 42); + assert(a.r == 42); + assert(b.w == 42); + }, + error => abort(), + }; + close(&a: *stream); + close(&b: *stream); + + // Fallback (+short writes) + a = test_copier_open(); + b = test_copier_open(); + a.stream.copier = &test_copy_unsupported; + b.stream.copier = &test_copy_unsupported; + b.stream.writer = &test_stream_write_short; + match (copy(&b.stream, &a.stream)) { + n: size => { + assert(n == 42); + assert(a.r == 42); + assert(a.nreads == 1); + assert(b.w == 42); + assert(b.nwrites > 1); + }, + error => abort(), + }; + close(&a: *stream); + close(&b: *stream); +}; diff --git a/io/+test/limit.ha b/io/+test/limit.ha @@ -0,0 +1,36 @@ +@test fn limit() void = { + let buf: [15z]u8 = [0...]; + let source_stream = test_stream_open(); + defer close(&source_stream.stream); + + let r_stream = limit_reader(&source_stream.stream, 20); + match (write(r_stream, buf)) { + unsupported => void, + * => abort(), + }; + match (read(r_stream, buf)) { + n: size => assert(n == 15), + error => abort(), + }; + match (read(r_stream, buf)) { + n: size => assert(n == 5), + error => abort(), + }; + close(r_stream); + + let w_stream = limit_writer(&source_stream.stream, 20); + match (read(w_stream, buf)) { + unsupported => void, + * => abort(), + }; + match (write(w_stream, buf)) { + n: size => assert(n == 15), + error => abort(), + }; + match (write(w_stream, buf)) { + n: size => assert(n == 5), + error => abort(), + }; + close(w_stream); + +}; diff --git a/io/+test/stream.ha b/io/+test/stream.ha @@ -0,0 +1,51 @@ +use strings; + +type test_stream = struct { + stream: stream, + r: size, + nreads: size, + w: size, + nwrites: size, +}; + +fn test_stream_read(s: *stream, buf: []u8) (size | EOF | error) = { + let stream = s: *test_stream; + stream.r += len(buf); + stream.nreads += 1; + return len(buf); +}; + +fn test_stream_write(s: *stream, buf: const []u8) (size | error) = { + let stream = s: *test_stream; + stream.w += len(buf); + stream.nwrites += 1; + return len(buf); +}; + +fn test_stream_write_short(s: *stream, buf: const []u8) (size | error) = { + let stream = s: *test_stream; + stream.nwrites += 1; + if (len(buf) > 10) { + stream.w += len(buf) / 2; + return len(buf) / 2; + } else { + stream.w += len(buf); + return len(buf); + }; +}; + +fn test_stream_open() test_stream = test_stream { + stream = stream { + name = strings::dup("test_stream"), + reader = &test_stream_read, + writer = &test_stream_write, + closer = &test_stream_close, + ... + }, + ... +}; + +fn test_stream_close(s: *stream) void = { + let stream = s: *test_stream; + free(stream.stream.name); +}; diff --git a/io/+test/strings.ha b/io/+test/strings.ha @@ -0,0 +1,44 @@ +use encoding::utf8; +use rt; + +type bufstream = struct { + stream: stream, + buf: []u8, +}; + +@test fn getrune() void = { + let bs = bufstream { + stream = stream { + name = "buffer", + reader = &bs_read, + ... + }, + buf = [ + 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, + 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF, 0x00, + ], + }; + let in = &bs.stream; + const expected: [_](rune | utf8::invalid | EOF | error) = [ + 'こ', 'ん', 'に', 'ち', 'は', '\0', EOF, + ]; + for (let i = 0z; i < len(expected); i += 1) { + let want = expected[i]; + match (getrune(in)) { + r: rune => assert(want is rune && want as rune == r), + EOF => assert(want is EOF), + * => abort(), + }; + }; +}; + +fn bs_read(s: *stream, buf: []u8) (size | error | EOF) = { + let stream = s: *bufstream; + if (len(stream.buf) == 0) { + return EOF; + }; + const n = if (len(buf) > len(stream.buf)) len(stream.buf) else len(buf); + buf[..n] = stream.buf[..n]; + stream.buf = stream.buf[n..]; + return n; +}; diff --git a/io/arch+aarch64.ha b/io/arch+aarch64.ha @@ -0,0 +1 @@ +export type off = i64; diff --git a/io/arch+x86_64.ha b/io/arch+x86_64.ha @@ -0,0 +1 @@ +export type off = i64; diff --git a/io/copy.ha b/io/copy.ha @@ -0,0 +1,28 @@ +// Copies data from one stream into another. Note that this function will never +// return if the source stream is infinite. +export fn copy(dest: *stream, src: *stream) (error | size) = { + match (dest.copier) { + null => void, + c: *copier => match (c(dest, src)) { + err: error => match (err) { + unsupported => void, // Use fallback + * => return err, + }, + s: size => return s, + }, + }; + + let w = 0z; + static let buf: [4096]u8 = [0...]; + for (true) { + match (read(src, buf[..])?) { + n: size => for (let i = 0z; i < n) { + let r = write(dest, buf[i..n])?; + w += r; + i += r; + }, + EOF => break, + }; + }; + return w; +}; diff --git a/io/limit.ha b/io/limit.ha @@ -0,0 +1,60 @@ +use strings; + +type limited_stream = struct { + stream: stream, + source: *stream, + limit: size, +}; + +fn limited_stream_new(source: *stream, limit: size) *limited_stream = { + return alloc(limited_stream { + stream = stream { + name = strings::dup(source.name), + closer = &limited_close, + ... + }, + source = source, + limit = limit, + }); +}; + +// Create an overlay stream that only allows a limited amount of bytes to be +// read from the underlying stream. +export fn limit_reader(source: *stream, limit: size) *stream = { + let stream = limited_stream_new(source, limit); + stream.stream.reader = &limited_read; + return &stream.stream; +}; + +// Create an overlay stream that only allows a limited amount of bytes to be +// written to the underlying stream. +export fn limit_writer(source: *stream, limit: size) *stream = { + let stream = limited_stream_new(source, limit); + stream.stream.writer = &limited_write; + return &stream.stream; +}; + +fn limited_read(s: *stream, buf: []u8) (size | EOF | error) = { + let stream = s: *limited_stream; + if (len(buf) > stream.limit) { + buf = buf[..stream.limit]; + }; + stream.limit -= len(buf); + return read(stream.source, buf); +}; + +fn limited_write(s: *stream, buf: const []u8) (size | error) = { + let stream = s: *limited_stream; + let slice = if (len(buf) > stream.limit) { + buf[..stream.limit]; + } else { + buf[..]; + }; + stream.limit -= len(slice); + return write(stream.source, slice); +}; + +fn limited_close(s: *stream) void = { + free(s.name); + free(s); +}; diff --git a/io/println.ha b/io/println.ha @@ -0,0 +1,24 @@ +use rt; + +// TEMP: This is due to be rewritten to be less shit +export fn println(msgs: str...) void = { + for (let i = 0z; i < len(msgs); i += 1) { + let msg = msgs[i]; + rt::write(1, msg: *const char, len(msg)); + if (i + 1 < len(msgs)) { + rt::write(1, " ": *const char, 1); + }; + }; + rt::write(1, "\n": *const char, 1); +}; + +export fn errorln(msgs: str...) void = { + for (let i = 0z; i < len(msgs); i += 1) { + let msg = msgs[i]; + rt::write(2, msg: *const char, len(msg)); + if (i + 1 < len(msgs)) { + rt::write(2, " ": *const char, 1); + }; + }; + rt::write(2, "\n": *const char, 1); +}; diff --git a/io/stream.ha b/io/stream.ha @@ -0,0 +1,84 @@ +// A stream of bytes which supports some subset of read, write, close, or seek +// operations. To create a custom stream, embed this type as the first member of +// a struct with user-specific data and fill out these fields as appropriate. +// +// type my_stream = struct { +// io::stream, +// fd: int, +// }; +// +// fn open(path: str) *io::stream = { +// let fd = // ... +// let stream = alloc(*my_stream, my_stream { +// name = strings::dup(path), +// reader = &my_stream_read, +// writer = &my_stream_write, +// closer = null, +// fd: fd, +// ... +// }); +// return &stream.stream; +// }; +export type stream = struct { + name: str, + reader: nullable *reader, + writer: nullable *writer, + closer: nullable *closer, + copier: nullable *copier, + seeker: nullable *seeker, +}; + +// Reads up to len(buf) bytes from the reader into the given buffer, returning +// the number of bytes read. +export fn read(s: *stream, buf: []u8) (size | EOF | error) = { + return match (s.reader) { + null => unsupported, + r: *reader => r(s, buf), + }; +}; + +// Writes up to len(buf) bytes to the stream from the given buffer, returning +// the number of bytes written. +export fn write(s: *stream, buf: const []u8) (size | error) = { + return match (s.writer) { + null => unsupported, + w: *writer => w(s, buf), + }; +}; + +// Closes the stream. +export fn close(s: *stream) (error | void) = { + return match (s.closer) { + null => unsupported, + c: *closer => c(s), + }; +}; + +// Sets the offset within the stream. +export fn seek(s: *stream, off: off, w: whence) (off | error) = { + return match (s.seeker) { + null => unsupported, + sk: *seeker => sk(s, off, w), + }; +}; + +// Returns the current offset within the stream. +export fn tell(s: *stream) (off | error) = { + return match (s.seeker) { + null => unsupported, + sk: *seeker => sk(s, 0, whence::CUR), + }; +}; + +let _empty: io::stream = io::stream { + reader = &empty_read, + writer = &empty_write, + ... +}; + +// A [stream] which always reads EOF and discards any writes. +export let empty: *io::stream = &_empty; + +fn empty_read(s: *stream, buf: []u8) (size | EOF | error) = EOF; + +fn empty_write(s: *stream, buf: const []u8) (size | error) = len(buf); diff --git a/io/strings.ha b/io/strings.ha @@ -0,0 +1,34 @@ +use encoding::utf8; +use types; + +// TODO: Do we want some kind of io::text_stream? + +// Reads a rune from a UTF-8 stream. +export fn getrune(in: *io::stream) (rune | utf8::invalid | EOF | error) = { + let b: [4]u8 = [0...]; + match (read(in, b[..1])) { + e: (error | EOF) => return e, + n: size => assert(n == 1), + }; + + const sz = utf8::utf8sz(b[0]); + if (sz == types::SIZE_MAX) { + return utf8::invalid; + }; + + if (sz == 1) { + return b[0]: u32: rune; + }; + + match (read(in, b[1..sz])) { + e: (error | EOF) => return e, + n: size => assert(n == sz - 1), + }; + + let dec = utf8::decode(b[..sz]); + return match (utf8::next(&dec)) { + r: rune => r, + utf8::invalid => utf8::invalid, + (void | utf8::more) => EOF, + }; +}; diff --git a/io/tee.ha b/io/tee.ha @@ -0,0 +1,34 @@ +type tee_stream = struct { + stream: stream, + source: *stream, + sink: *stream, +}; + +// Creates a reader which copies reads into a sink before forwarding them to the +// caller. Does not close the secondary streams when the tee stream is closed. +export fn tee(source: *stream, sink: *stream) *stream = { + return alloc(tee_stream { + stream = stream { + reader = &tee_read, + closer = &tee_close, + ... + }, + source = source, + sink = sink, + }): *io::stream; +}; + +fn tee_read(s: *stream, buf: []u8) (size | EOF | error) = { + let s = s: *tee_stream; + let z = match (read(s.source, buf)) { + err: error => return err, + EOF => return EOF, + z: size => z, + }; + for (let n = 0z; n < z) { + n += write(s.sink, buf[n..z])?; + }; + return z; +}; + +fn tee_close(s: *stream) void = free(s); diff --git a/io/types.ha b/io/types.ha @@ -0,0 +1,78 @@ +// An error produced by the underlying source. +export type os_error = struct { + string: *fn(data: *void) str, + data: *void, +}!; + +// An error indicating that the underlying stream has been closed. +export type closed = void!; + +// An error indicating that the requested operation is not supported. +export type unsupported = void!; + +// Any error which may be returned from an I/O function. +export type error = (os_error | closed | unsupported)!; + +// Indicates an end-of-file condition. +export type EOF = void; + +// Converts an I/O error into a user-friendly string. +export fn errstr(err: error) str = { + return match (err) { + err: os_error => err.string(err.data), + unsupported => "The requested operation is not supported", + closed => "This stream has been closed", + }; +}; + +// Used to indicate if a stream should be used for reading, or writing, or both. +export type mode = enum u8 { + NONE = 0, + READ = 1 << 0, + WRITE = 1 << 1, + RDWR = READ | WRITE, +}; + +// From "whence" a seek operation should occur. +export type whence = enum { + SET = 0, + CUR = 1, + END = 2, +}; + +// The interface for a stream which can be read from. Reads up to len(buf) +// bytes from the reader into the given buffer, returning the number of bytes +// read or an error. +export type reader = fn(s: *stream, buf: []u8) (size | EOF | error); + +// The interface for a stream which can be written to. Writes up to len(buf) +// bytes to the writer from the given buffer, returning the number of bytes +// written or an error. +export type writer = fn(s: *stream, buf: const []u8) (size | error); + +// The interface for a stream which can be closed. This function should close +// the underlying resources and free everything except for the stream pointer +// itself. The other stream functions may be called after close is called; it is +// their responsibility to return [io::closed] in this case. +export type closer = fn(s: *stream) void; + +// The interface for a stream which has first-class support for copying data +// from another stream. Often this only works if the second stream is of the +// same underlying stream type. This is optional, [io::copy] still works even +// with a stream which does not implement this (it falls back to calling read +// and write in a loop). +// +// Returns the number of bytes copied, or an error if one occured. Do not close +// either stream. If the operation is unsupported for this particular pair of +// streams, return [io::unsupported] to have [io::copy] proceed with its +// fallback implementation. +export type copier = fn(to: *stream, from: *stream) (size | error); + +// The interface for a stream which can be seeked. Sets the offset for the next +// read or write to offset, interpreted according to whence: +// whence::SET means relative to the start of the file, +// whence::CUR means relative to the current offset, and +// whence::END means relative to the end. +// +// Returns the new offset relative to the start or an error. +export type seeker = fn(s: *stream, off: off, w: whence) (off | error); diff --git a/math/random/random.ha b/math/random/random.ha @@ -0,0 +1,35 @@ +// math::random provides a pseudorandom number generator, which yields a +// deterministic sequence of psuedo-random numbers based on a seed value. +// +// Beware! This module is NOT suitable for generating genuinely random data for +// cryptographic use. See [crypto::random] for cryptographically secure random +// number generation. + +// State for a pseudorandom number generator. +export type random = u64; + +// Initializes a pseudorandom number generator with a given seed. This seed will +// yield the same sequence of psuedo-random numbers if used again. +export fn init(seed: u64) random = seed; + +// Returns a psuedo-random 64-bit unsigned integer. +export fn next(r: *random) u64 = { + // SplitMix64 + *r += 0x9e3779b97f4a7c15; + *r = (*r ^ *r >> 30) * 0xbf58476d1ce4e5b9; + *r = (*r ^ *r >> 27) * 0x94d049bb133111eb; + return *r ^ *r >> 31; +}; + +@test fn rng() void = { + let r = init(0); + let expected: [_]u64 = [ + 16294208416658607535, + 15501543990041496116, + 15737388954706874752, + 15091258616627000950, + ]; + for (let i = 0z; i < len(expected); i += 1) { + assert(next(&r) == expected[i]); + }; +}; diff --git a/os/+linux/dirfdfs.ha b/os/+linux/dirfdfs.ha @@ -0,0 +1,356 @@ +use bytes; +use encoding::utf8; +use fs; +use io; +use path; +use rt; +use strings; + +// Controls how symlinks are followed (or not) in a dirfd filesystem. Support +// for this feature varies, you should gate usage of this enum behind a build +// tag. +// +// Note that on Linux, specifying BENEATH or IN_ROOT will also disable magic +// symlinks. +export type resolve_flags = enum { + NORMAL, + + // Does not allow symlink resolution to occur for any symlinks which + // would refer to any anscestor of the fd directory. This disables all + // absolute symlinks, and any call to open or create with an absolute + // path. + BENEATH, + + // Treat the directory fd as the root directory. This affects + // open/create for absolute paths, as well as absolute path resolution + // of symlinks. The effects are similar to chroot. + IN_ROOT, + + // Disables symlink resolution entirely. + NO_SYMLINKS, + + // Disallows traversal of mountpoints during path resolution. This is + // not recommended for general use, as bind mounts are extensively used + // on many systems. + NO_XDEV, +}; + +type os_filesystem = struct { + fs: fs::fs, + dirfd: int, + resolve: resolve_flags, +}; + +fn static_dirfdopen(fd: int, filesystem: *os_filesystem) *fs::fs = { + *filesystem = os_filesystem { + fs = fs::fs { + open = &fs_open, + create = &fs_create, + remove = &fs_remove, + iter = &fs_iter, + stat = &fs_stat, + subdir = &fs_subdir, + mkdir = &fs_mkdir, + rmdir = &fs_rmdir, + resolve = &fs_resolve, + ... + }, + dirfd = fd, + }; + return &filesystem.fs; +}; + +// Opens a file descriptor as an [fs::fs]. This file descriptor must be a +// directory file. The file will be closed when the fs is closed. +// +// If no other flags are provided to [fs::open] and [fs::create] when used with +// a dirfdfs, [fs::flags::NOCTTY] and [fs::flags::CLOEXEC] are used when opening +// the file. If you pass your own flags, it is recommended that you add these +// unless you know that you do not want them. +export fn dirfdopen(fd: int, resolve: resolve_flags...) *fs::fs = { + let ofs = alloc(os_filesystem { ... }); + let fs = static_dirfdopen(fd, ofs); + for (let i = 0z; i < len(resolve); i += 1) { + ofs.resolve |= resolve[i]; + }; + fs.close = &fs_close; + return fs; +}; + +// Clones a dirfd filesystem, optionally adding additional [resolve_flags] +// constraints. +export fn dirfs_clone(fs: *fs::fs, resolve: resolve_flags...) *fs::fs = { + assert(fs.open == &fs_open); + let fs = fs: *os_filesystem; + let new = alloc(*fs); + for (let i = 0z; i < len(resolve); i += 1) { + new.resolve |= resolve[i]; + }; + new.dirfd = rt::dup(new.dirfd) as int; + return &new.fs; +}; + +fn _fs_open( + fs: *fs::fs, + path: str, + mode: io::mode, + oh: *rt::open_how, +) (*io::stream | fs::error) = { + let fs = fs: *os_filesystem; + + oh.resolve = 0u64; + if (fs.resolve & resolve_flags::BENEATH == resolve_flags::BENEATH) { + oh.resolve |= rt::RESOLVE_BENEATH | rt::RESOLVE_NO_MAGICLINKS; + }; + if (fs.resolve & resolve_flags::IN_ROOT == resolve_flags::IN_ROOT) { + oh.resolve |= rt::RESOLVE_IN_ROOT | rt::RESOLVE_NO_MAGICLINKS; + }; + if (fs.resolve & resolve_flags::NO_SYMLINKS == resolve_flags::NO_SYMLINKS) { + oh.resolve |= rt::RESOLVE_NO_SYMLINKS; + }; + if (fs.resolve & resolve_flags::NO_XDEV == resolve_flags::NO_XDEV) { + oh.resolve |= rt::RESOLVE_NO_XDEV; + }; + + let fd = match (rt::openat2(fs.dirfd, path, oh, size(rt::open_how))) { + err: rt::errno => return errno_to_io(err), + fd: int => fd, + }; + + return fdopen(fd, path, mode); +}; + +fn fs_open( + fs: *fs::fs, + path: str, + flags: fs::flags... +) (*io::stream | fs::error) = { + let oflags = 0; + let iomode = io::mode::NONE; + if (len(flags) == 0z) { + oflags |= (fs::flags::NOCTTY + | fs::flags::CLOEXEC + | fs::flags::RDONLY): int; + }; + for (let i = 0z; i < len(flags); i += 1z) { + oflags |= flags[i]: int; + }; + + if (oflags: fs::flags & fs::flags::RDWR == fs::flags::RDWR) { + iomode = io::mode::RDWR; + } else if (oflags: fs::flags & fs::flags::RDONLY == fs::flags::RDONLY) { + iomode = io::mode::READ; + } else if (oflags: fs::flags & fs::flags::RDONLY == fs::flags::RDONLY) { + iomode = io::mode::WRITE; + }; + + let oh = rt::open_how { + flags = oflags: u64, + ... + }; + return _fs_open(fs, path, iomode, &oh); +}; + +fn fs_create( + fs: *fs::fs, + path: str, + mode: fs::mode, + flags: fs::flags... +) (*io::stream | fs::error) = { + let oflags = 0; + let iomode = io::mode::NONE; + if (len(flags) == 0z) { + oflags |= (fs::flags::NOCTTY + | fs::flags::CLOEXEC + | fs::flags::WRONLY): int; + }; + for (let i = 0z; i < len(flags); i += 1z) { + oflags |= flags[i]: int; + }; + oflags |= fs::flags::CREATE: int; + + if (oflags: fs::flags & fs::flags::RDWR == fs::flags::RDWR) { + iomode = io::mode::RDWR; + } else if (oflags: fs::flags & fs::flags::RDONLY == fs::flags::RDONLY) { + iomode = io::mode::READ; + } else if (oflags: fs::flags & fs::flags::RDONLY == fs::flags::RDONLY) { + iomode = io::mode::WRITE; + }; + + let oh = rt::open_how { + flags = oflags: u64, + mode = mode: u64, + ... + }; + return _fs_open(fs, path, iomode, &oh); +}; + +fn fs_remove(fs: *fs::fs, path: str) (void | fs::error) = { + let fs = fs: *os_filesystem; + match (rt::unlinkat(fs.dirfd, path, 0)) { + err: rt::errno => return errno_to_io(err), + void => void, + }; +}; + +fn fs_stat(fs: *fs::fs, path: str) (fs::filestat | fs::error) = { + let fs = fs: *os_filesystem; + let st = rt::st { ... }; + match (rt::fstatat(fs.dirfd, path, &st, rt::AT_SYMLINK_NOFOLLOW)) { + err: rt::errno => return errno_to_io(err), + void => void, + }; + return fs::filestat { + mask = fs::stat_mask::UID + | fs::stat_mask::GID + | fs::stat_mask::SIZE + | fs::stat_mask::INODE, + mode = st.mode: fs::mode, + uid = st.uid, + uid = st.gid, + sz = st.sz, + inode = st.ino, + }; +}; + +fn fs_subdir(fs: *fs::fs, path: str) (*fs::fs | fs::error) = { + let fs = fs: *os_filesystem; + let oh = rt::open_how { + flags = (rt::O_RDONLY | rt::O_CLOEXEC | rt::O_DIRECTORY): u64, + ... + }; + + let fd: int = match (rt::openat2(fs.dirfd, path, + &oh, size(rt::open_how))) { + err: rt::errno => return errno_to_io(err), + n: int => n, + }; + + return dirfdopen(fd); +}; + +fn fs_rmdir(fs: *fs::fs, path: str) (void | fs::error) = { + let fs = fs: *os_filesystem; + match (rt::unlinkat(fs.dirfd, path, rt::AT_REMOVEDIR)) { + err: rt::errno => return errno_to_io(err), + void => void, + }; +}; + +fn fs_mkdir(fs: *fs::fs, path: str) (void | fs::error) = { + let fs = fs: *os_filesystem; + return match (rt::mkdirat(fs.dirfd, path, 0o755)) { + err: rt::errno => switch (err) { + rt::EEXIST => fs::exists, + * => errno_to_io(err), + }, + void => void, + }; +}; + +fn resolve_part(parts: *[]str, part: str) void = { + if (part == ".") { + // no-op + void; + } else if (part == "..") { + // XXX: We should not have to dereference this + if (len(*parts) != 0) { + delete(parts[len(*parts) - 1]); + }; + } else { + append(*parts, part); + }; +}; + +fn fs_resolve(fs: *fs::fs, path: str) str = { + let parts: []str = []; + if (!path::abs(path)) { + let iter = path::iter(getcwd()); + for (true) match (path::next(&iter)) { + void => break, + p: str => resolve_part(&parts, p), + }; + }; + let iter = path::iter(path); + for (true) match (path::next(&iter)) { + void => break, + p: str => resolve_part(&parts, p), + }; + return path::join(parts...); +}; + +fn fs_close(fs: *fs::fs) void = { + let fs = fs: *os_filesystem; + rt::close(fs.dirfd); +}; + +def BUFSIZ: size = 2048; + +// Based on musl's readdir +type os_iterator = struct { + iter: fs::iterator, + fd: int, + buf_pos: size, + buf_end: size, + buf: [BUFSIZ]u8, +}; + +fn fs_iter(fs: *fs::fs, path: str) (*fs::iterator | fs::error) = { + let fs = fs: *os_filesystem; + let oh = rt::open_how { + flags = (rt::O_RDONLY | rt::O_CLOEXEC | rt::O_DIRECTORY): u64, + ... + }; + let fd: int = match (rt::openat2(fs.dirfd, path, + &oh, size(rt::open_how))) { + err: rt::errno => { + if (err: int == rt::ENOTDIR) { + return fs::wrongtype; + }; + return errno_to_io(err); + }, + n: int => n, + }; + + let iter = alloc(os_iterator { + iter = fs::iterator { + next = &iter_next, + }, + fd = fd, + ... + }); + return &iter.iter; +}; + +fn iter_next(iter: *fs::iterator) (fs::dirent | void) = { + let iter = iter: *os_iterator; + if (iter.buf_pos >= iter.buf_end) { + let n = rt::getdents64(iter.fd, &iter.buf, BUFSIZ) as size; + if (n == 0) { + rt::close(iter.fd); + free(iter); + return; + }; + iter.buf_end = n; + iter.buf_pos = 0; + }; + let de = &iter.buf[iter.buf_pos]: *rt::dirent64; + iter.buf_pos += de.d_reclen; + let name = strings::from_c(&de.d_name: *const char); + + let ftype: fs::mode = switch (de.d_type) { + rt::DT_UNKNOWN => fs::mode::UNKNOWN, + rt::DT_FIFO => fs::mode::FIFO, + rt::DT_CHR => fs::mode::CHR, + rt::DT_DIR => fs::mode::DIR, + rt::DT_BLK => fs::mode::BLK, + rt::DT_REG => fs::mode::REG, + rt::DT_LNK => fs::mode::LINK, + rt::DT_SOCK => fs::mode::SOCK, + * => fs::mode::UNKNOWN, + }; + return fs::dirent { + name = name, + ftype = ftype, + }; +}; diff --git a/os/+linux/environ.ha b/os/+linux/environ.ha @@ -0,0 +1,112 @@ +use bytes; +use rt; +use strings; +use types; + +// The command line arguments provided to the program. By convention, the first +// member is usually the name of the program. +export let args: []str = []; + +// Statically allocate arg strings if there are few enough arguments, saves a +// syscall if we don't need it. +let args_static: [32]str = [""...]; + +@init fn init_environ() void = { + if (rt::argc < len(args_static)) { + args = args_static[..rt::argc]; + for (let i = 0z; i < rt::argc; i += 1) { + args[i] = strings::from_c(rt::argv[i]); + }; + } else { + args = alloc([], rt::argc); + for (let i = 0z; i < rt::argc; i += 1) { + append(args, strings::from_c(rt::argv[i])); + }; + }; + +}; + +@fini fn fini_environ() void = { + if (rt::argc >= len(args_static)) { + free(args); + }; +}; + +// Looks up an environment variable and returns its value, or void if unset. +export fn getenv(name: const str) (str | void) = { + const name_b = strings::to_utf8(name); + for (let i = 0z; rt::envp[i] != null; i += 1) { + const item = rt::envp[i]: *[*]u8; + const eq: size = match (bytes::index(item[..], '=': u32: u8)) { + void => abort("Environment violates System-V invariants"), + i: size => i, + }; + if (bytes::equal(name_b, item[..eq])) { + const ln = strings::c_strlen(item: *const char); + return strings::from_utf8(item[eq+1..ln]); + }; + }; +}; + +// Looks up an environment variable and returns its value, or a default value if +// unset. +export fn tryenv(name: const str, default: str) str = match (getenv(name)) { + s: str => s, + void => default, +}; + +let envp: []str = []; + +// Returns a slice of the environment strings in the form KEY=VALUE. +export fn getenvs() []str = { + if (len(envp) != 0) { + return envp; + }; + for (let i = 0z; rt::envp[i] != null; i += 1) { + append(envp, strings::from_c(rt::envp[i]: *const char)); + }; + return envp; +}; + +let uts: rt::utsname = rt::utsname { ... }; +let uts_valid: bool = false; + +// Returns the host kernel name +export fn sysname() const str = { + if (!uts_valid) { + rt::uname(&uts) as void; + }; + return strings::from_c(&uts.sysname: *const char); +}; + +// Returns the host system hostname +export fn hostname() const str = { + if (!uts_valid) { + rt::uname(&uts) as void; + }; + return strings::from_c(&uts.nodename: *const char); +}; + +// Returns the host kernel version +export fn release() const str = { + if (!uts_valid) { + rt::uname(&uts) as void; + }; + return strings::from_c(&uts.release: *const char); +}; + +// Returns the host operating system version +export fn version() const str = { + if (!uts_valid) { + rt::uname(&uts) as void; + }; + return strings::from_c(&uts.version: *const char); +}; + +// Returns the host CPU architecture +export fn machine() const str = { + if (!uts_valid) { + rt::uname(&uts) as void; + }; + return strings::from_c(&uts.machine: *const char); +}; diff --git a/os/+linux/errors.ha b/os/+linux/errors.ha @@ -0,0 +1,16 @@ +use io; +use rt; + +fn io_errstr(data: *void) str = { + const errno = data: uintptr: int: rt::errno; + return rt::errstr(errno); +}; + +// TODO: Implement the inverse of this and make it public +fn errno_to_io(err: rt::errno) io::error = { + let err = io::os_error { + string = &io_errstr, + data = err: uintptr: *void, + }; + return err: io::error; +}; diff --git a/os/+linux/exit.ha b/os/+linux/exit.ha @@ -0,0 +1,4 @@ +use rt; + +// Exit the program with the provided status code. +export @noreturn fn exit(status: int) void = rt::exit(status); diff --git a/os/+linux/fdstream.ha b/os/+linux/fdstream.ha @@ -0,0 +1,131 @@ +use io; +use rt; +use strings; + +type fd_stream = struct { + stream: io::stream, + fd: int, +}; + +fn static_fdopen( + fd: int, name: str, mode: io::mode, stream: *fd_stream, +) *io::stream = { + *stream = fd_stream { + stream = io::stream { + name = name, + closer = &fd_close_static, + copier = &fd_copy, + seeker = &fd_seek, + ... + }, + fd = fd, + }; + if (mode & io::mode::READ == io::mode::READ) { + stream.stream.reader = &fd_read; + }; + if (mode & io::mode::WRITE == io::mode::WRITE) { + stream.stream.writer = &fd_write; + }; + return &stream.stream; +}; + +// Opens a Unix file descriptor as an io::stream. +export fn fdopen(fd: int, name: str, mode: io::mode) *io::stream = { + let stream = alloc(fd_stream { ... }); + static_fdopen(fd, strings::dup(name), mode, stream); + stream.stream.closer = &fd_close; + return &stream.stream; +}; + +fn is_fdstream(s: *io::stream) bool = { + return s.reader == &fd_read + || s.writer == &fd_write + || s.closer == &fd_close + || s.closer == &fd_close_static + || s.copier == &fd_copy; +}; + +// Returns the file descriptor for a given [io::stream]. If there is no fd +// associated with this stream, void is returned. +export fn streamfd(s: *io::stream) (int | void) = { + if (!is_fdstream(s)) { + return; + }; + let stream = s: *fd_stream; + return stream.fd; +}; + +fn fd_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { + let stream = s: *fd_stream; + return match (rt::read(stream.fd, buf: *[*]u8, len(buf))) { + err: rt::errno => errno_to_io(err), + n: size => switch (n) { + 0 => io::EOF, + * => n, + }, + }; +}; + +fn fd_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let stream = s: *fd_stream; + return match (rt::write(stream.fd, buf: *const [*]u8, len(buf))) { + err: rt::errno => errno_to_io(err), + n: size => n, + }; +}; + +fn fd_close(s: *io::stream) void = { + let stream = s: *fd_stream; + rt::close(stream.fd); + free(s.name); + free(stream); +}; + +fn fd_close_static(s: *io::stream) void = { + let stream = s: *fd_stream; + rt::close(stream.fd); + free(stream); +}; + +def SENDFILE_MAX: size = 2147479552z; + +fn fd_copy(to: *io::stream, from: *io::stream) (size | io::error) = { + if (!is_fdstream(from)) { + return io::unsupported; + }; + + let to = to: *fd_stream, from = from: *fd_stream; + let sum = 0z; + for (true) { + let n = match (rt::sendfile(to.fd, from.fd, + null, SENDFILE_MAX)) { + err: rt::errno => switch (err) { + rt::EINVAL => { + if (sum == 0) { + return io::unsupported; + }; + return errno_to_io(err); + }, + * => return errno_to_io(err), + }, + n: size => switch (n) { + 0 => return sum, + * => n, + }, + }; + sum += n; + }; + return sum; +}; + +fn fd_seek( + s: *io::stream, + off: io::off, + whence: io::whence, +) (io::off | io::error) = { + let stream = s: *fd_stream; + return match (rt::lseek(stream.fd, off: i64, whence: uint)) { + err: rt::errno => errno_to_io(err), + n: i64 => n: io::off, + }; +}; diff --git a/os/+linux/fs.ha b/os/+linux/fs.ha @@ -0,0 +1,50 @@ +use fs; +use path; +use rt; +use strings; + +@init fn init() void = { + static let root_fs = os_filesystem { ... }; + let dirfd = rt::open("/", + rt::O_RDONLY | rt::O_DIRECTORY | rt::O_CLOEXEC, + 0u) as int; + root = static_dirfdopen(dirfd, &root_fs); + + static let cwd_fs = os_filesystem { ... }; + cwd = static_dirfdopen(rt::AT_FDCWD, &cwd_fs); +}; + +// Returns the current working directory. The return value is statically +// allocated and must be duplicated (see [strings::dup]) before calling getcwd +// again. +export fn getcwd() str = strings::from_c(rt::getcwd() as *const char); + +// Change the current working directory. +export fn chdir(target: (*fs::fs | str)) (void | fs::error) = { + const path: str = match (target) { + fs: *fs::fs => { + assert(fs.open == &fs_open); + let fs = fs: *os_filesystem; + return match (rt::fchdir(fs.dirfd)) { + err: rt::errno => errno_to_io(err): fs::error, + void => void, + }; + }, + s: str => s, + }; + return match (rt::chdir(path)) { + err: rt::errno => errno_to_io(err): fs::error, + void => void, + }; +}; + +// Changes the root directory of the process. Generally requires the caller to +// have root or otherwise elevated permissions. +// +// This function is not appropriate for sandboxing. +export fn chroot(target: str) (void | fs::error) = { + return match (rt::chroot(target)) { + err: rt::errno => errno_to_io(err): fs::error, + void => void, + }; +}; diff --git a/os/+linux/open.ha b/os/+linux/open.ha @@ -0,0 +1,27 @@ +use fs; +use io; +use path; + +// Opens a file. +// +// If no flags are provided, [fs::flags::RDONLY], [fs::flags::NOCTTY], +// [fs::flags::CLOEXEC] are used when opening the file. If you pass your own +// flags, it is recommended that you add the latter two unless you know that you +// do not want them. +export fn open(path: str, flags: fs::flags...) (*io::stream | fs::error) = + fs::open(cwd, path, flags...); + +// Creates a new file and opens it for writing. +// +// If no flags are provided, [fs::flags::WRONLY], [fs::flags::NOCTTY], +// [fs::flags::CLOEXEC] are used when opening the file. If you pass your own +// flags, it is recommended that you add the latter two unless you know that you +// do not want them. +// +// Only the permission bits of the mode are used. If other bits are set, they +// are discarded. +export fn create( + path: str, + mode: fs::mode, + flags: fs::flags... +) (*io::stream | fs::error) = fs::create(cwd, path, mode, flags...); diff --git a/os/+linux/stdfd.ha b/os/+linux/stdfd.ha @@ -0,0 +1,11 @@ +use io; + +let static_stdin: fd_stream = fd_stream { ... }; +let static_stdout: fd_stream = fd_stream { ... }; +let static_stderr: fd_stream = fd_stream { ... }; + +@init fn init_stdfd() void = { + stdin = static_fdopen(0, "<stdin>", io::mode::READ, &static_stdin); + stdout = static_fdopen(1, "<stdout>", io::mode::WRITE, &static_stdout); + stderr = static_fdopen(2, "<stderr>", io::mode::WRITE, &static_stderr); +}; diff --git a/os/exec/+linux.ha b/os/exec/+linux.ha @@ -0,0 +1,24 @@ +// TODO: This file doesn't need to exist once we have working forward +// declarations + +export type platform_cmd = int; + +// Stores information about a child process. +export type process = int; + +// Stores information about an exited process. +export type status = struct { + status: int, + // Not all of these members are supported on all operating systems. + // Only utime and stime are guaranteed to be available. + rusage: struct { + // TODO: utime, stime + maxrss: u64, + minflt: u64, + majflt: u64, + inblock: u64, + oublock: u64, + nvcsw: u64, + nivcsw: u64, + }, +}; diff --git a/os/exec/cmd+linux.ha b/os/exec/cmd+linux.ha @@ -0,0 +1,94 @@ +use rt; +use strings; +use os; + +// Forks the current process, returning the pid of the child (to the parent) and +// void (to the child), or an error. +export fn fork() (int | void | error) = match (rt::fork()) { + err: rt::errno => errno_to_os(err), + i: (int | void) => i, +}; + +fn errno_errstr(data: *void) str = { + const errno = data: uintptr: int: rt::errno; + return rt::errstr(errno); +}; + +fn errno_to_os(err: rt::errno) os_error = { + return os_error { + string = &errno_errstr, + data = err: uintptr: *void, + }; +}; + +fn open(path: str) (platform_cmd | os_error) = { + match (rt::access(path, rt::X_OK)) { + err: rt::errno => errno_to_os(err), + b: bool => if (!b) { + return errno_to_os(rt::EACCES); + }, + }; + // O_PATH is used because it allows us to use an executable for which we + // have execute permissions, but not read permissions. + return match (rt::open(path, rt::O_PATH, 0u)) { + fd: int => fd, + err: rt::errno => errno_to_os(err), + }; +}; + +fn platform_finish(cmd: *command) void = rt::close(cmd.platform); + +fn platform_exec(cmd: *command) os_error = { + // We don't worry about freeing the return values from strings::to_c + // because once we exec(2) our heap is fried anyway + let argv: []nullable *const char = alloc([], len(cmd.argv) + 1z); + for (let i = 0z; i < len(cmd.argv); i += 1z) { + append(argv, strings::to_c(cmd.argv[i])); + }; + append(argv, null); + + let envp: nullable *[*]nullable *const char = null; + if (len(cmd.env) != 0) { + let env: []nullable *const char = alloc([], len(cmd.env) + 1); + for (let i = 0z; i < len(cmd.env); i += 1) { + append(env, strings::to_c(cmd.env[i])); + }; + append(env, null); + envp = env: *[*]nullable *const char; + }; + + return errno_to_os(rt::execveat(cmd.platform, strings::c_empty, + argv: *[*]nullable *const char, envp, rt::AT_EMPTY_PATH)); +}; + +fn platform_start(cmd: *command) (os_error | process) = { + // TODO: Let the user configure clone more to their taste (e.g. SIGCHLD) + let pipe: [2]int = [0...]; + match (rt::pipe2(&pipe, rt::O_CLOEXEC)) { + err: rt::errno => return errno_to_os(err), + void => void, + }; + + match (rt::clone(null, 0, null, null, 0)) { + err: rt::errno => return errno_to_os(err), + pid: int => { + rt::close(pipe[1]); + let errno: int = 0; + return match (rt::read(pipe[0], &errno, size(int))) { + err: rt::errno => errno_to_os(err), + n: size => switch (n) { + size(int) => errno_to_os(errno), + * => abort("Unexpected rt::read result"), + 0 => pid, + }, + }; + }, + void => { + rt::close(pipe[0]); + let err = platform_exec(cmd); + let errno = err.data: uintptr: int; + rt::write(pipe[1], &errno, size(int)); + rt::exit(1); + }, + }; +}; diff --git a/os/exec/cmd.ha b/os/exec/cmd.ha @@ -0,0 +1,119 @@ +use ascii; +use os; +use strings; + +// Prepares a [command] based on its name and a list of arguments. The argument +// list should not start with the command name; it will be added for you. The +// argument list is borrowed from the strings you pass into this command. +// +// If 'name' does not contain a '/', the $PATH will be consulted to find the +// correct executable. If path resolution fails, nocmd is returned. +// +// let cmd = exec::cmd("echo", "hello world"); +// let proc = exec::start(&cmd); +// let status = exec::wait(&proc); +// assert(exec::status(status) == 0); +// +// By default, the new command will inherit the current process's environment. +export fn cmd(name: str, args: str...) (command | error) = { + let env = os::getenvs(); + let cmd = command { + platform: platform_cmd = + if (strings::contains(name, '/')) match (open(name)) { + err: os_error => return nocmd, + p: platform_cmd => p, + } else match (lookup(name)) { + void => return nocmd, + p: platform_cmd => p, + }, + argv = alloc([], len(args) + 1z), + env = alloc([], len(env)), + ... + }; + append(cmd.argv, name, ...args); + append(cmd.env, ...env); + return cmd; +}; + +// Sets the 0th value of argv for this command. It is uncommon to need this. +export fn setname(cmd: *command, name: str) void = { + free(cmd.argv[0]); + cmd.argv[0] = name; +}; + +// Frees state associated with a command. You only need to call this if you do +// not execute the command with [exec] or [start]; in those cases the state is +// cleaned up for you. +export fn finish(cmd: *command) void = { + platform_finish(cmd); + free(cmd.argv); +}; + +// Executes a prepared command in the current address space, overwriting the +// running process with the new command. +export @noreturn fn exec(cmd: *command) void = { + defer finish(cmd); // Note: doesn't happen if exec succeeds + platform_exec(cmd); + abort("os::exec::exec failed"); +}; + +// Starts a prepared command in a new process. +export fn start(cmd: *command) (error | process) = { + defer finish(cmd); + return match (platform_start(cmd)) { + err: os_error => err, + proc: process => proc, + }; +}; + +// Empties the environment variables for the command. By default, the command +// inherits the environment of the parent process. +export fn clearenv(cmd: *command) void = { + cmd.env = []; +}; + +// Adds or sets a variable in the command environment. This does not affect the +// current process environment. The 'key' must be a valid environment variable +// name per POSIX definition 3.235. This includes underscores and alphanumeric +// ASCII characters, and cannot begin with a number. +export fn setenv(cmd: *command, key: str, value: str) void = { + let iter = strings::iter(key); + for (let i = 0z; true; i += 1) match (strings::next(&iter)) { + void => break, + r: rune => if (i == 0) assert(r == '_' || ascii::isalpha(r), + "Invalid environment variable") + else assert(r == '_' || ascii::isalnum(r), + "Invalid environment variable"), + }; + + // XXX: This can be a binary search + let fullkey = strings::concat(key, "="); + defer free(fullkey); + for (let i = 0z; i < len(cmd.env); i += 1) { + if (strings::has_prefix(cmd.env[i], fullkey)) { + delete(cmd.env[i]); + break; + }; + }; + append(cmd.env, strings::concat(fullkey, value)); +}; + +fn lookup(name: str) (platform_cmd | void) = { + const path = match (os::getenv("PATH")) { + void => return, + s: str => s, + }; + let tok = strings::tokenize(path, ":"); + for (true) { + const item = match (strings::next_token(&tok)) { + void => break, + s: str => s, + }; + let path = strings::concat(item, "/", name); + defer free(path); + match (open(path)) { + err: os_error => continue, + p: platform_cmd => return p, + }; + }; +}; diff --git a/os/exec/process+linux.ha b/os/exec/process+linux.ha @@ -0,0 +1,86 @@ +use rt; +use fmt; +// TODO: Add function to wait on all/any children + +fn rusage(st: *status, ru: *rt::rusage) void = { + st.rusage.maxrss = ru.ru_maxrss; + st.rusage.minflt = ru.ru_minflt; + st.rusage.majflt = ru.ru_majflt; + st.rusage.inblock = ru.ru_inblock; + st.rusage.oublock = ru.ru_oublock; + st.rusage.nvcsw = ru.ru_nvcsw; + st.rusage.nivcsw = ru.ru_nivcsw; +}; + +// Waits for a process to complete, then returns its status information. +export fn wait(proc: *process) (status | error) = { + let ru: rt::rusage = rt::rusage { ... }; + let st: status = status { ... }; + match (rt::wait4(*proc, &st.status, 0, &ru)) { + err: rt::errno => errno_to_os(err), + pid: int => assert(pid == *proc), + }; + rusage(&st, &ru); + return st; +}; + +// Checks for process completion, returning its status information on +// completion, or void if it is still running. +export fn peek(proc: *process) (status | void | error) = { + let ru: rt::rusage = rt::rusage { ... }; + let st: status = status { ... }; + match (rt::wait4(*proc, &st.status, 0, &ru)) { + err: rt::errno => errno_to_os(err), + pid: int => switch (pid) { + 0 => return void, + * => assert(pid == *proc), + }, + }; + rusage(&st, &ru); + return st; +}; + +// The exit status code of a process. +export type exited = int; + +// The signal number which caused a process to terminate. +export type signaled = int; + +// The exit status of a process. +export type exit_status = (exited | signaled); + +// Returns a human friendly string describing the exit status. +export fn exitstr(status: exit_status) const str = { + static let buf: [1024]u8 = [0...]; + return match (status) { + i: exited => switch (i) { + 0 => "exited normally", + * => fmt::bsprintf(buf, "exited with status {}", i: int), + }, + // TODO: Add signal name + s: signaled => fmt::bsprintf(buf, "exited with signal {}", s: int), + }; +}; + +// Returns the exit status of a completed process. +export fn exit(stat: *status) exit_status = { + if (rt::wifexited(stat.status)) { + return rt::wexitstatus(stat.status): exited; + }; + if (rt::wifsignaled(stat.status)) { + return rt::wtermsig(stat.status): signaled; + }; + abort("Unexpected exit status"); +}; + +// Checks the exit status of a completed process, returning void if successful, +// or its status code as an error type if not. +export fn check(stat: *status) (void | exit_status!) = { + if (rt::wifexited(stat.status)) { + return switch (rt::wexitstatus(stat.status)) { + 0 => void, + * => exit(stat), + }; + }; + return exit(stat); +}; diff --git a/os/exec/types.ha b/os/exec/types.ha @@ -0,0 +1,27 @@ +// An executable command. +export type command = struct { + platform: platform_cmd, + argv: []str, + env: []str, +}; + +// Returned when path resolution fails to find a command by its name. +export type nocmd = void!; + +// An error provided by the operating system. +export type os_error = struct { + string: *fn(data: *void) str, + data: *void, +}!; + +// All errors that can be returned from os::exec. +export type error = (nocmd | os_error)!; + +// Returns a human-readable message for the given error. +export fn errstr(err: error) const str = { + return match (err) { + err: os_error => err.string(err.data), + nocmd => "Command not found", + }; +}; + diff --git a/os/fs.ha b/os/fs.ha @@ -0,0 +1,46 @@ +use fs; +use path; + +// Provides an implementation of [fs::fs] for the host filesystem. +export let root: *fs::fs = null: *fs::fs; + +// Provides an implementation of [fs::fs] for the current working directory. +export let cwd: *fs::fs = null: *fs::fs; + +// Removes a file. +export fn remove(path: str) (void | fs::error) = fs::remove(cwd, path); + +// Creates an [fs::iterator] for a given directory to read its contents. +export fn iterdir(path: str) (*fs::iterator | fs::error) = fs::iter(cwd, path); + +// Reads all entries from a directory. The caller must free the return value +// with [fs::dirents_free]. +export fn readdir(path: str) ([]fs::dirent | fs::error) = fs::readdir(cwd, path); + +// Returns file information for a given path. +export fn stat(path: str) (fs::filestat | fs::error) = fs::stat(cwd, path); + +// Opens a directory as a filesystem. +export fn diropen(path: str) (*fs::fs | fs::error) = fs::subdir(cwd, path); + +// Creates a directory. +export fn mkdir(path: str) (void | fs::error) = fs::mkdir(cwd, path); + +// Creates a directory, and all non-extant directories in its path. +export fn mkdirs(path: str) (void | fs::error) = fs::mkdirs(cwd, path); + +// Removes a directory. The target directory must be empty; see [rmdirall] to +// remove its contents as well. +export fn rmdir(path: str) (void | fs::error) = fs::rmdir(cwd, path); + +// Removes a directory, and anything in it. +export fn rmdirall(path: str) (void | fs::error) = fs::rmdirall(cwd, path); + +// Creates a directory and returns a subdir for it. Some filesystems support +// doing this operation atomically, but if not, a fallback is used. +export fn mksubdir(path: str) (*fs::fs | fs::error) = fs::mksubdir(cwd, path); + +// Resolves a path to its absolute, normalized value. This consoldates ./ and +// ../ sequences, roots the path, and returns a new path. The caller must free +// the return value. +export fn resolve(path: str) str = fs::resolve(cwd, path); diff --git a/os/stdfd.ha b/os/stdfd.ha @@ -0,0 +1,10 @@ +use io; + +// The standard input. +export let stdin: *io::stream = null: *io::stream; + +// The standard output. +export let stdout: *io::stream = null: *io::stream; + +// The standard error. +export let stderr: *io::stream = null: *io::stream; diff --git a/path/+linux.ha b/path/+linux.ha @@ -0,0 +1,2 @@ +// Path separator, platform-specific. +export def PATHSEP: u8 = '/': u32: u8; diff --git a/path/iter.ha b/path/iter.ha @@ -0,0 +1,69 @@ +use bytes; +use strings; + +export type iflags = enum uint { + NONE = 0, + ABSOLUTE = 1 << 0, +}; + +// An iterator which yields each component of a path. +export type iterator = struct { + tok: bytes::tokenizer, + flags: iflags, +}; + +let pathsep: []u8 = [PATHSEP]; + +// Returns an iterator which yields each component of a path. If the path is +// absolute, the first component will be the root path (e.g. /). +export fn iter(path: str) iterator = { + let flags = iflags::NONE; + let pb = strings::to_utf8(path); + if (len(pb) > 0 && pb[0] == PATHSEP) { + flags |= iflags::ABSOLUTE; + pb = pb[1..]; + }; + if (len(pb) > 1 && pb[len(pb) - 1] == PATHSEP) { + pb = pb[..len(pb) - 1]; + }; + + return iterator { + tok = bytes::tokenize(pb, pathsep), + flags = flags, + }; +}; + +// Returns the next path component from an iterator, or void if none remain. +export fn next(iter: *iterator) (str | void) = { + if (iter.flags & iflags::ABSOLUTE == iflags::ABSOLUTE) { + iter.flags &= ~iflags::ABSOLUTE; + static assert(PATHSEP <= 0x7F); + return strings::from_utf8_unsafe(pathsep); + }; + return match (bytes::next_token(&iter.tok)) { + b: []u8 => strings::from_utf8_unsafe(b), + void => void, + }; +}; + +@test fn iter() void = { + assert(PATHSEP == '/': u32: u8); // meh + let i = iter("/foo/bar/baz"); + assert(next(&i) as str == "/"); + assert(next(&i) as str == "foo"); + assert(next(&i) as str == "bar"); + assert(next(&i) as str == "baz"); + assert(next(&i) is void); + let i = iter("foo/bar/baz/"); + assert(next(&i) as str == "foo"); + assert(next(&i) as str == "bar"); + assert(next(&i) as str == "baz"); + assert(next(&i) is void); + let i = iter("foo"); + assert(next(&i) as str == "foo"); + assert(next(&i) is void); + + let i = iter("/"); + assert(next(&i) as str == "/"); + assert(next(&i) is void); +}; diff --git a/path/join.ha b/path/join.ha @@ -0,0 +1,65 @@ +use bytes; +use bufio; +use strings; +use io; + +// Joins together several path components with the path separator. The caller +// must free the return value. +export fn join(paths: str...) str = { + // TODO: Normalize inputs so that if they end with a / we don't double + // up on delimiters + let sink = bufio::dynamic(io::mode::WRITE); + let utf8 = true; + for (let i = 0z; i < len(paths); i += 1) { + let buf = strings::to_utf8(paths[i]); + let l = len(buf); + if (l == 0) continue; + for (l > 0 && buf[l - 1] == PATHSEP) { + l -= 1; + }; + for (let q = 0z; q < l) { + let w = io::write(sink, buf[q..l]) as size; + q += w; + }; + if (i + 1 < len(paths)) { + assert(io::write(sink, [PATHSEP]) as size == 1); + }; + }; + + return strings::from_utf8_unsafe(bufio::finish(sink)); +}; + +@test fn join() void = { + assert(PATHSEP == '/': u32: u8); // TODO: meh + let i = join("foo"); + defer free(i); + assert(i == "foo"); + + let p = join(i, "bar", "baz"); + defer free(p); + assert(p == "foo/bar/baz"); + + let q = join(p, "bat", "bad"); + defer free(q); + assert(q == "foo/bar/baz/bat/bad"); + + let r = join(p, q); + defer free(r); + assert(r == "foo/bar/baz/foo/bar/baz/bat/bad"); + + let p = join("foo/", "bar"); + defer free(p); + assert(p == "foo/bar"); + + let p = join("foo///", "bar"); + defer free(p); + assert(p == "foo/bar"); + + let p = join("foo", "", "bar"); + defer free(p); + assert(p == "foo/bar"); + + let p = join("/", "foo", "bar", "baz"); + defer free(p); + assert(p == "/foo/bar/baz"); +}; diff --git a/path/names.ha b/path/names.ha @@ -0,0 +1,79 @@ +use bytes; +use encoding::utf8; +use strings; + +// Returns the directory name for a given path. For a path to a file name, this +// returns the directory in which that file resides. For a path to a directory, +// this returns the path to its parent directory. The return value is borrowed +// from the input, use [dup] to extend its lifetime. +export fn dirname(path: str) str = { + let b = strings::to_utf8(path); + let i = match (bytes::rindex(b, PATHSEP)) { + void => return path, + z: size => z, + }; + if (i == 0) { + i += 1; + }; + return strings::from_utf8_unsafe(b[..i]); +}; + +@test fn dirname() void = { + assert(dirname("/foo/bar") == "/foo"); + assert(dirname("/foo") == "/"); + assert(dirname("/") == "/"); + assert(dirname("foo/bar") == "foo"); + assert(dirname("foo") == "foo"); +}; + +// Returns the final component of a given path. For a path to a file name, this +// returns the file name. For a path to a directory, this returns the directory +// name. The return value is borrowed from the input, use [dup] to extend its +// lifetime. +export fn basename(path: str) str = { + let b = strings::to_utf8(path); + let i = match (bytes::rindex(b, PATHSEP)) { + void => return path, + z: size => if (z + 1 < len(b)) z + 1z else 0z, + }; + return strings::from_utf8_unsafe(b[i..]); +}; + +@test fn basename() void = { + assert(basename("/foo/bar") == "bar"); + assert(basename("/foo") == "foo"); + assert(basename("/") == "/"); + assert(basename("foo/bar") == "bar"); + assert(basename("foo") == "foo"); +}; + +// Returns the file extension for a path. The return value is borrowed from the +// input, see [strings::dup] to extend its lifetime. +// +// The return value includes the '.' character. +// +// extension("foo/example") => "" +// extension("foo/example.txt") => ".txt" +// extension("foo/example.tar.gz") => ".tar.gz" +export fn extension(p: str) str = { + let b = strings::to_utf8(p); + if (len(b) == 0 || b[len(b) - 1] == PATHSEP) { + return ""; + }; + let b = strings::to_utf8(basename(p)); + let i = match (bytes::index(b, '.': u32: u8)) { + void => return "", + z: size => z, + }; + let e = b[i..]; + return strings::from_utf8_unsafe(e); +}; + +@test fn extension() void = { + assert(extension("") == ""); + assert(extension("foo/") == ""); + assert(extension("foo/bar") == ""); + assert(extension("foo/bar.txt") == ".txt"); + assert(extension("foo/bar.tar.gz") == ".tar.gz"); + assert(extension("foo.bar/baz.ha") == ".ha"); +}; diff --git a/path/util.ha b/path/util.ha @@ -0,0 +1,10 @@ +use strings; + +// Returns true if a path is an absolute path. +export fn abs(path: str) bool = { + let b = strings::to_utf8(path); + if (len(b) == 0) { + return false; + }; + return b[0] == PATHSEP; +}; diff --git a/rt/+aarch64/jmp.ha b/rt/+aarch64/jmp.ha @@ -0,0 +1 @@ +type arch_jmpbuf = [22]u64; diff --git a/rt/+aarch64/longjmp.s b/rt/+aarch64/longjmp.s @@ -0,0 +1,20 @@ +.global rt.longjmp +.type rt.longjmp,@function +rt.longjmp: + // IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers + ldp x19, x20, [x0,#0] + ldp x21, x22, [x0,#16] + ldp x23, x24, [x0,#32] + ldp x25, x26, [x0,#48] + ldp x27, x28, [x0,#64] + ldp x29, x30, [x0,#80] + ldr x2, [x0,#104] + mov sp, x2 + ldp d8 , d9, [x0,#112] + ldp d10, d11, [x0,#128] + ldp d12, d13, [x0,#144] + ldp d14, d15, [x0,#160] + + cmp w1, 0 + csinc w0, w1, wzr, ne + br x30 diff --git a/rt/+aarch64/restore.s b/rt/+aarch64/restore.s @@ -0,0 +1,11 @@ +// Copied from musl + +.global rt.restore +.global rt.restore_si +.type rt.restore,@function +.type rt.restore_si,@function +rt.restore: +rt.restore_si: + mov x8,#139 + svc 0 + diff --git a/rt/+aarch64/setjmp.s b/rt/+aarch64/setjmp.s @@ -0,0 +1,18 @@ +.global rt.setjmp +.type rt.setjmp,@function +rt.setjmp: + // IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers + stp x19, x20, [x0,#0] + stp x21, x22, [x0,#16] + stp x23, x24, [x0,#32] + stp x25, x26, [x0,#48] + stp x27, x28, [x0,#64] + stp x29, x30, [x0,#80] + mov x2, sp + str x2, [x0,#104] + stp d8, d9, [x0,#112] + stp d10, d11, [x0,#128] + stp d12, d13, [x0,#144] + stp d14, d15, [x0,#160] + mov x0, #0 + ret diff --git a/rt/+linux/+aarch64.ha b/rt/+linux/+aarch64.ha @@ -0,0 +1,22 @@ +// Returns the new PID to the parent, void to the child, or errno if something +// goes wrong. +export fn clone( + stack: nullable *void, + flags: int, + parent_tid: nullable *int, + child_tid: nullable *int, + tls: u64, +) (int | void | errno) = { + return match (wrap_return(syscall5(SYS_clone, + flags: u64, + stack: uintptr: u64, + parent_tid: uintptr: u64, + tls, + child_tid: uintptr: u64))) { + u: u64 => switch (u) { + 0 => void, + * => u: int, + }, + err: errno => err, + }; +}; diff --git a/rt/+linux/+x86_64.ha b/rt/+linux/+x86_64.ha @@ -0,0 +1,22 @@ +// Returns the new PID to the parent, void to the child, or errno if something +// goes wrong. +export fn clone( + stack: nullable *void, + flags: int, + parent_tid: nullable *int, + child_tid: nullable *int, + tls: u64, +) (int | void | errno) = { + return match (wrap_return(syscall5(SYS_clone, + flags: u64, + stack: uintptr: u64, + parent_tid: uintptr: u64, + child_tid: uintptr: u64, + tls))) { + u: u64 => switch (u) { + 0 => void, + * => u: int, + }, + err: errno => err, + }; +}; diff --git a/rt/+linux/abort.ha b/rt/+linux/abort.ha @@ -0,0 +1,25 @@ +export @noreturn @symbol("rt.abort") fn _abort(msg: str) void = { + const prefix = "Abort: "; + write(2, prefix: *const char, len(prefix)); + write(2, msg: *const char, len(msg)); + write(2, "\n": *const char, 1); + kill(getpid(), SIGABRT); +}; + +// See harec:include/gen.h +const reasons: [_]str = [ + "slice or array access out of bounds", // 0 + "type assertion failed", // 1 + "out of memory", // 2 +]; + +export @noreturn fn abort_fixed(loc: str, i: int) void = { + const prefix = "Abort: "; + const sep = ": "; + write(2, prefix: *const char, len(prefix)); + write(2, loc: *const char, len(loc)); + write(2, sep: *const char, len(sep)); + write(2, reasons[i]: *const char, len(reasons[i])); + write(2, "\n": *const char, 1); + kill(getpid(), SIGABRT); +}; diff --git a/rt/+linux/env.ha b/rt/+linux/env.ha @@ -0,0 +1,3 @@ +export let argc: size = 0; +export let argv: *[*]*char = null: *[*]*char; +export let envp: *[*]nullable *char = null: *[*]nullable *char; diff --git a/rt/+linux/errno.ha b/rt/+linux/errno.ha @@ -0,0 +1,423 @@ +// Represents an error returned from the Linux kernel. +export type errno = int!; + +// Given an integer error number, wraps it in an error type. +export fn wrap_errno(err: int) errno = err: errno; + +// Checks the return value from a Linux syscall and, if found to be in error, +// returns the appropriate error. Otherwise, returns the original value. +fn wrap_return(r: u64) (errno | u64) = { + if (r > -4096u64) { + return (-(r: i64)): int: errno; + }; + return r; +}; + +// Obtains a human-friendly reading of an [errno] (e.g. "Operation not +// permitted"). +export fn errstr(err: errno) str = { + return switch (err: int) { + EPERM => "Operation not permitted", + ENOENT => "No such file or directory", + ESRCH => "No such process", + EINTR => "Interrupted system call", + EIO => "Input/output error", + ENXIO => "No such device or address", + E2BIG => "Argument list too long", + ENOEXEC => "Exec format error", + EBADF => "Bad file descriptor", + ECHILD => "No child processes", + EAGAIN => "Resource temporarily unavailable", + ENOMEM => "Cannot allocate memory", + EACCES => "Permission denied", + EFAULT => "Bad address", + ENOTBLK => "Block device required", + EBUSY => "Device or resource busy", + EEXIST => "File exists", + EXDEV => "Invalid cross-device link", + ENODEV => "No such device", + ENOTDIR => "Not a directory", + EISDIR => "Is a directory", + EINVAL => "Invalid argument", + ENFILE => "Too many open files in system", + EMFILE => "Too many open files", + ENOTTY => "Inappropriate ioctl for device", + ETXTBSY => "Text file busy", + EFBIG => "File too large", + ENOSPC => "No space left on device", + ESPIPE => "Illegal seek", + EROFS => "Read-only file system", + EMLINK => "Too many links", + EPIPE => "Broken pipe", + EDOM => "Numerical argument out of domain", + ERANGE => "Numerical result out of range", + EDEADLK => "Resource deadlock avoided", + ENAMETOOLONG => "File name too long", + ENOLCK => "No locks available", + ENOSYS => "Function not implemented", + ENOTEMPTY => "Directory not empty", + ELOOP => "Too many levels of symbolic links", + ENOMSG => "No message of desired type", + EIDRM => "Identifier removed", + ECHRNG => "Channel number out of range", + EL2NSYNC => "Level 2 not synchronized", + EL3HLT => "Level 3 halted", + EL3RST => "Level 3 reset", + ELNRNG => "Link number out of range", + EUNATCH => "Protocol driver not attached", + ENOCSI => "No CSI structure available", + EL2HLT => "Level 2 halted", + EBADE => "Invalid exchange", + EBADR => "Invalid request descriptor", + EXFULL => "Exchange full", + ENOANO => "No anode", + EBADRQC => "Invalid request code", + EBADSLT => "Invalid slot", + EBFONT => "Bad font file format", + ENOSTR => "Device not a stream", + ENODATA => "No data available", + ETIME => "Timer expired", + ENOSR => "Out of streams resources", + ENONET => "Machine is not on the network", + ENOPKG => "Package not installed", + EREMOTE => "Object is remote", + ENOLINK => "Link has been severed", + EADV => "Advertise error", + ESRMNT => "Srmount error", + ECOMM => "Communication error on send", + EPROTO => "Protocol error", + EMULTIHOP => "Multihop attempted", + EDOTDOT => "RFS specific error", + EBADMSG => "Bad message", + EOVERFLOW => "Value too large for defined data type", + ENOTUNIQ => "Name not unique on network", + EBADFD => "File descriptor in bad state", + EREMCHG => "Remote address changed", + ELIBACC => "Can not access a needed shared library", + ELIBBAD => "Accessing a corrupted shared library", + ELIBSCN => ".lib section in a.out corrupted", + ELIBMAX => "Attempting to link in too many shared libraries", + ELIBEXEC => "Cannot exec a shared library directly", + EILSEQ => "Invalid or incomplete multibyte or wide character", + ERESTART => "Interrupted system call should be restarted", + ESTRPIPE => "Streams pipe error", + EUSERS => "Too many users", + ENOTSOCK => "Socket operation on non-socket", + EDESTADDRREQ => "Destination address required", + EMSGSIZE => "Message too long", + EPROTOTYPE => "Protocol wrong type for socket", + ENOPROTOOPT => "Protocol not available", + EPROTONOSUPPORT => "Protocol not supported", + ESOCKTNOSUPPORT => "Socket type not supported", + EOPNOTSUPP => "Operation not supported", + EPFNOSUPPORT => "Protocol family not supported", + EAFNOSUPPORT => "Address family not supported by protocol", + EADDRINUSE => "Address already in use", + EADDRNOTAVAIL => "Cannot assign requested address", + ENETDOWN => "Network is down", + ENETUNREACH => "Network is unreachable", + ENETRESET => "Network dropped connection on reset", + ECONNABORTED => "Software caused connection abort", + ECONNRESET => "Connection reset by peer", + ENOBUFS => "No buffer space available", + EISCONN => "Transport endpoint is already connected", + ENOTCONN => "Transport endpoint is not connected", + ESHUTDOWN => "Cannot send after transport endpoint shutdown", + ETOOMANYREFS => "Too many references: cannot splice", + ETIMEDOUT => "Connection timed out", + ECONNREFUSED => "Connection refused", + EHOSTDOWN => "Host is down", + EHOSTUNREACH => "No route to host", + EALREADY => "Operation already in progress", + EINPROGRESS => "Operation now in progress", + ESTALE => "Stale file handle", + EUCLEAN => "Structure needs cleaning", + ENOTNAM => "Not a XENIX named type file", + ENAVAIL => "No XENIX semaphores available", + EISNAM => "Is a named type file", + EREMOTEIO => "Remote I/O error", + EDQUOT => "Disk quota exceeded", + ENOMEDIUM => "No medium found", + EMEDIUMTYPE => "Wrong medium type", + ECANCELED => "Operation canceled", + ENOKEY => "Required key not available", + EKEYEXPIRED => "Key has expired", + EKEYREVOKED => "Key has been revoked", + EKEYREJECTED => "Key was rejected by service", + EOWNERDEAD => "Owner died", + ENOTRECOVERABLE => "State not recoverable", + ERFKILL => "Operation not possible due to RF-kill", + EHWPOISON => "Memory page has hardware error", + * => "Unknown Linux error code", // TODO: snprintf to add errno? + }; +}; + +// Gets the programmer-friendly name for an [errno] (e.g. EPERM). +export fn errname(err: errno) str = { + return switch (err: int) { + EPERM => "EPERM", + ENOENT => "ENOENT", + ESRCH => "ESRCH", + EINTR => "EINTR", + EIO => "EIO", + ENXIO => "ENXIO", + E2BIG => "E2BIG", + ENOEXEC => "ENOEXEC", + EBADF => "EBADF", + ECHILD => "ECHILD", + EAGAIN => "EAGAIN", + ENOMEM => "ENOMEM", + EACCES => "EACCES", + EFAULT => "EFAULT", + ENOTBLK => "ENOTBLK", + EBUSY => "EBUSY", + EEXIST => "EEXIST", + EXDEV => "EXDEV", + ENODEV => "ENODEV", + ENOTDIR => "ENOTDIR", + EISDIR => "EISDIR", + EINVAL => "EINVAL", + ENFILE => "ENFILE", + EMFILE => "EMFILE", + ENOTTY => "ENOTTY", + ETXTBSY => "ETXTBSY", + EFBIG => "EFBIG", + ENOSPC => "ENOSPC", + ESPIPE => "ESPIPE", + EROFS => "EROFS", + EMLINK => "EMLINK", + EPIPE => "EPIPE", + EDOM => "EDOM", + ERANGE => "ERANGE", + EDEADLK => "EDEADLK", + ENAMETOOLONG => "ENAMETOOLONG", + ENOLCK => "ENOLCK", + ENOSYS => "ENOSYS", + ENOTEMPTY => "ENOTEMPTY", + ELOOP => "ELOOP", + ENOMSG => "ENOMSG", + EIDRM => "EIDRM", + ECHRNG => "ECHRNG", + EL2NSYNC => "EL2NSYNC", + EL3HLT => "EL3HLT", + EL3RST => "EL3RST", + ELNRNG => "ELNRNG", + EUNATCH => "EUNATCH", + ENOCSI => "ENOCSI", + EL2HLT => "EL2HLT", + EBADE => "EBADE", + EBADR => "EBADR", + EXFULL => "EXFULL", + ENOANO => "ENOANO", + EBADRQC => "EBADRQC", + EBADSLT => "EBADSLT", + EBFONT => "EBFONT", + ENOSTR => "ENOSTR", + ENODATA => "ENODATA", + ETIME => "ETIME", + ENOSR => "ENOSR", + ENONET => "ENONET", + ENOPKG => "ENOPKG", + EREMOTE => "EREMOTE", + ENOLINK => "ENOLINK", + EADV => "EADV", + ESRMNT => "ESRMNT", + ECOMM => "ECOMM", + EPROTO => "EPROTO", + EMULTIHOP => "EMULTIHOP", + EDOTDOT => "EDOTDOT", + EBADMSG => "EBADMSG", + EOVERFLOW => "EOVERFLOW", + ENOTUNIQ => "ENOTUNIQ", + EBADFD => "EBADFD", + EREMCHG => "EREMCHG", + ELIBACC => "ELIBACC", + ELIBBAD => "ELIBBAD", + ELIBSCN => "ELIBSCN", + ELIBMAX => "ELIBMAX", + ELIBEXEC => "ELIBEXEC", + EILSEQ => "EILSEQ", + ERESTART => "ERESTART", + ESTRPIPE => "ESTRPIPE", + EUSERS => "EUSERS", + ENOTSOCK => "ENOTSOCK", + EDESTADDRREQ => "EDESTADDRREQ", + EMSGSIZE => "EMSGSIZE", + EPROTOTYPE => "EPROTOTYPE", + ENOPROTOOPT => "ENOPROTOOPT", + EPROTONOSUPPORT => "EPROTONOSUPPORT", + ESOCKTNOSUPPORT => "ESOCKTNOSUPPORT", + EOPNOTSUPP => "EOPNOTSUPP", + EPFNOSUPPORT => "EPFNOSUPPORT", + EAFNOSUPPORT => "EAFNOSUPPORT", + EADDRINUSE => "EADDRINUSE", + EADDRNOTAVAIL => "EADDRNOTAVAIL", + ENETDOWN => "ENETDOWN", + ENETUNREACH => "ENETUNREACH", + ENETRESET => "ENETRESET", + ECONNABORTED => "ECONNABORTED", + ECONNRESET => "ECONNRESET", + ENOBUFS => "ENOBUFS", + EISCONN => "EISCONN", + ENOTCONN => "ENOTCONN", + ESHUTDOWN => "ESHUTDOWN", + ETOOMANYREFS => "ETOOMANYREFS", + ETIMEDOUT => "ETIMEDOUT", + ECONNREFUSED => "ECONNREFUSED", + EHOSTDOWN => "EHOSTDOWN", + EHOSTUNREACH => "EHOSTUNREACH", + EALREADY => "EALREADY", + EINPROGRESS => "EINPROGRESS", + ESTALE => "ESTALE", + EUCLEAN => "EUCLEAN", + ENOTNAM => "ENOTNAM", + ENAVAIL => "ENAVAIL", + EISNAM => "EISNAM", + EREMOTEIO => "EREMOTEIO", + EDQUOT => "EDQUOT", + ENOMEDIUM => "ENOMEDIUM", + EMEDIUMTYPE => "EMEDIUMTYPE", + ECANCELED => "ECANCELED", + ENOKEY => "ENOKEY", + EKEYEXPIRED => "EKEYEXPIRED", + EKEYREVOKED => "EKEYREVOKED", + EKEYREJECTED => "EKEYREJECTED", + EOWNERDEAD => "EOWNERDEAD", + ENOTRECOVERABLE => "ENOTRECOVERABLE", + ERFKILL => "ERFKILL", + EHWPOISON => "EHWPOISON", + * => "[unknown errno]", // TODO: snprintf to add errno? + }; +}; + +export def EPERM: int = 1; +export def ENOENT: int = 2; +export def ESRCH: int = 3; +export def EINTR: int = 4; +export def EIO: int = 5; +export def ENXIO: int = 6; +export def E2BIG: int = 7; +export def ENOEXEC: int = 8; +export def EBADF: int = 9; +export def ECHILD: int = 10; +export def EAGAIN: int = 11; +export def ENOMEM: int = 12; +export def EACCES: int = 13; +export def EFAULT: int = 14; +export def ENOTBLK: int = 15; +export def EBUSY: int = 16; +export def EEXIST: int = 17; +export def EXDEV: int = 18; +export def ENODEV: int = 19; +export def ENOTDIR: int = 20; +export def EISDIR: int = 21; +export def EINVAL: int = 22; +export def ENFILE: int = 23; +export def EMFILE: int = 24; +export def ENOTTY: int = 25; +export def ETXTBSY: int = 26; +export def EFBIG: int = 27; +export def ENOSPC: int = 28; +export def ESPIPE: int = 29; +export def EROFS: int = 30; +export def EMLINK: int = 31; +export def EPIPE: int = 32; +export def EDOM: int = 33; +export def ERANGE: int = 34; +export def EDEADLK: int = 35; +export def ENAMETOOLONG: int = 36; +export def ENOLCK: int = 37; +export def ENOSYS: int = 38; +export def ENOTEMPTY: int = 39; +export def ELOOP: int = 40; +export def ENOMSG: int = 42; +export def EIDRM: int = 43; +export def ECHRNG: int = 44; +export def EL2NSYNC: int = 45; +export def EL3HLT: int = 46; +export def EL3RST: int = 47; +export def ELNRNG: int = 48; +export def EUNATCH: int = 49; +export def ENOCSI: int = 50; +export def EL2HLT: int = 51; +export def EBADE: int = 52; +export def EBADR: int = 53; +export def EXFULL: int = 54; +export def ENOANO: int = 55; +export def EBADRQC: int = 56; +export def EBADSLT: int = 57; +export def EBFONT: int = 59; +export def ENOSTR: int = 60; +export def ENODATA: int = 61; +export def ETIME: int = 62; +export def ENOSR: int = 63; +export def ENONET: int = 64; +export def ENOPKG: int = 65; +export def EREMOTE: int = 66; +export def ENOLINK: int = 67; +export def EADV: int = 68; +export def ESRMNT: int = 69; +export def ECOMM: int = 70; +export def EPROTO: int = 71; +export def EMULTIHOP: int = 72; +export def EDOTDOT: int = 73; +export def EBADMSG: int = 74; +export def EOVERFLOW: int = 75; +export def ENOTUNIQ: int = 76; +export def EBADFD: int = 77; +export def EREMCHG: int = 78; +export def ELIBACC: int = 79; +export def ELIBBAD: int = 80; +export def ELIBSCN: int = 81; +export def ELIBMAX: int = 82; +export def ELIBEXEC: int = 83; +export def EILSEQ: int = 84; +export def ERESTART: int = 85; +export def ESTRPIPE: int = 86; +export def EUSERS: int = 87; +export def ENOTSOCK: int = 88; +export def EDESTADDRREQ: int = 89; +export def EMSGSIZE: int = 90; +export def EPROTOTYPE: int = 91; +export def ENOPROTOOPT: int = 92; +export def EPROTONOSUPPORT: int = 93; +export def ESOCKTNOSUPPORT: int = 94; +export def EOPNOTSUPP: int = 95; +export def EPFNOSUPPORT: int = 96; +export def EAFNOSUPPORT: int = 97; +export def EADDRINUSE: int = 98; +export def EADDRNOTAVAIL: int = 99; +export def ENETDOWN: int = 100; +export def ENETUNREACH: int = 101; +export def ENETRESET: int = 102; +export def ECONNABORTED: int = 103; +export def ECONNRESET: int = 104; +export def ENOBUFS: int = 105; +export def EISCONN: int = 106; +export def ENOTCONN: int = 107; +export def ESHUTDOWN: int = 108; +export def ETOOMANYREFS: int = 109; +export def ETIMEDOUT: int = 110; +export def ECONNREFUSED: int = 111; +export def EHOSTDOWN: int = 112; +export def EHOSTUNREACH: int = 113; +export def EALREADY: int = 114; +export def EINPROGRESS: int = 115; +export def ESTALE: int = 116; +export def EUCLEAN: int = 117; +export def ENOTNAM: int = 118; +export def ENAVAIL: int = 119; +export def EISNAM: int = 120; +export def EREMOTEIO: int = 121; +export def EDQUOT: int = 122; +export def ENOMEDIUM: int = 123; +export def EMEDIUMTYPE: int = 124; +export def ECANCELED: int = 125; +export def ENOKEY: int = 126; +export def EKEYEXPIRED: int = 127; +export def EKEYREVOKED: int = 128; +export def EKEYREJECTED: int = 129; +export def EOWNERDEAD: int = 130; +export def ENOTRECOVERABLE: int = 131; +export def ERFKILL: int = 132; +export def EHWPOISON: int = 133; diff --git a/rt/+linux/segmalloc.ha b/rt/+linux/segmalloc.ha @@ -0,0 +1,26 @@ +// Allocates a segment. +fn segmalloc(n: size) nullable *void = { + return match(mmap(null, n, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0)) { + err: errno => { + assert(err == ENOMEM: errno); + null; + }, + p: *void => p, + }; +}; + +// Frees a segment allocated with segmalloc. +fn segfree(p: *void, s: size) void = { + match (munmap(p, s)) { + err: errno => abort("munmap failed"), + void => void, + }; +}; + +// Marks a segment as writable and drops the execute bit. +fn segwrite(seg: *void, n: size) void = mprotect(seg, n, PROT_READ | PROT_WRITE); + +// Marks a segment as executable and drops the write bit. +fn segexec(seg: *void, n: size) void = mprotect(seg, n, PROT_READ | PROT_EXEC); diff --git a/rt/+linux/signal.ha b/rt/+linux/signal.ha @@ -0,0 +1,44 @@ +// TODO: work when _NSIG != 64 + +export fn sigemptyset(set: *sigset) int = { + if (set.__val[0] == 0) { + return 1; + }; + set.__val[0] = 0; + return 0; +}; + +export fn sigaddset(set: *sigset, signum: int) (int | errno) = { + if (signum > NSIG) { + return EINVAL; + }; + signum -= 1; + set.__val[0] |= (1 << signum): u64; + return 0; +}; + +export fn sigdelset(set: *sigset, signum: int) (int | errno) = { + if (signum > NSIG) { + return EINVAL; + }; + signum -= 1; + set.__val[0] &= ~(1 << signum: u64); + return 0; +}; + +export fn sigismember(set: *sigset, signum: int) (int | errno) = { + if (signum > NSIG) { + return EINVAL; + }; + signum -= 1; + if ((set.__val[0] & (1 << signum: u64)) != 0) { + return 1; + } else { + return 0; + }; +}; + +export fn sigfillset(set: *sigset) (int | errno) = { + set.__val[0] = 0xffffffffffffffff: u64; + return 0; +}; diff --git a/rt/+linux/start+aarch64.s b/rt/+linux/start+aarch64.s @@ -0,0 +1,8 @@ +.text +.global _start +_start: + mov x29, #0 + mov x30, #0 + mov x0, sp + add sp, x0, #-16 + b rt.start_linux diff --git a/rt/+linux/start+x86_64.s b/rt/+linux/start+x86_64.s @@ -0,0 +1,6 @@ +.text +.global _start +_start: + xor %rbp, %rbp + movq %rsp, %rdi + call rt.start_linux diff --git a/rt/+linux/start.ha b/rt/+linux/start.ha @@ -0,0 +1,7 @@ +export @noreturn fn start_linux(iv: *[*]uintptr) void = { + // TODO: Find & parse auxv + argc = iv[0]: size; + argv = &iv[1]: *[*]*char; + envp = &argv[argc + 1]: *[*]nullable *char; + start_ha(); +}; diff --git a/rt/+linux/stat.ha b/rt/+linux/stat.ha @@ -0,0 +1,55 @@ +fn mkdev(major: u32, minor: u32) dev_t = + ((major: u64 & 0xFFFFF000) << 32) | + ((major: u64 & 0x00000FFF) << 8) | + ((minor: u64 & 0xFFFFFF00) << 12) | + (minor: u64 & 0x000000FF); + +fn fstatat_statx( + dirfd: int, + path: path, + flags: int, + mask: uint, + statbuf: *stx, +) (void | errno) = { + let path = kpath(path)?; + wrap_return(syscall5(SYS_statx, + dirfd: u64, path: uintptr: u64, flags: u64, + mask: u64, statbuf: uintptr: u64))?; + return; +}; + +export fn fstatat( + dirfd: int, + path: path, + statbuf: *st, + flags: int, +) (errno | void) = { + let path = kpath(path)?; + let statxbuf = stx { ... }; + fstatat_statx(dirfd, path, flags, STATX_BASIC_STATS, &statxbuf)?; + statbuf.dev = mkdev(statxbuf.dev_major, statxbuf.dev_minor); + statbuf.ino = statxbuf.ino; + statbuf.mode = statxbuf.mode; + statbuf.nlink = statxbuf.nlink; + statbuf.uid = statxbuf.uid; + statbuf.gid = statxbuf.gid; + statbuf.rdev = mkdev(statxbuf.dev_major, statxbuf.dev_minor); + statbuf.sz = statxbuf.sz; + statbuf.blksz = statxbuf.blksize; + statbuf.blocks = statxbuf.blocks; + statbuf.atime.tv_sec = statxbuf.atime.tv_sec; + statbuf.atime.tv_nsec = statxbuf.atime.tv_nsec: i64; + statbuf.mtime.tv_sec = statxbuf.mtime.tv_sec; + statbuf.mtime.tv_nsec = statxbuf.mtime.tv_nsec: i64; + statbuf.ctime.tv_sec = statxbuf.ctime.tv_sec; + statbuf.ctime.tv_nsec = statxbuf.ctime.tv_nsec: i64; +}; + +export fn stat(path: path, statbuf: *st) (errno | void) = + fstatat(AT_FDCWD, path, statbuf, 0); + +export fn fstat(fd: int, statbuf: *st) (errno | void) = + fstatat(fd, "", statbuf, AT_EMPTY_PATH); + +export fn lstat(path: path, statbuf: *st) (errno | void) = + fstatat(AT_FDCWD, path, statbuf, AT_SYMLINK_NOFOLLOW); diff --git a/rt/+linux/syscall+aarch64.s b/rt/+linux/syscall+aarch64.s @@ -0,0 +1,111 @@ +.section .text.rt.syscall0 +.global rt.syscall0 +rt.syscall0: + sub sp, sp, #16 + str x0, [sp, 8] + ldr x8, [sp, 8] + svc 0 + add sp, sp, 16 + ret + +.section .text.rt.syscall1 +.global rt.syscall1 +rt.syscall1: + sub sp, sp, #16 + str x0, [sp, 8] + str x1, [sp] + ldr x8, [sp, 8] + ldr x0, [sp] + svc 0 + add sp, sp, 16 + ret + +.section .text.rt.syscall2 +.global rt.syscall2 +rt.syscall2: + sub sp, sp, #32 + str x0, [sp, 24] + str x1, [sp, 16] + str x2, [sp, 8] + ldr x8, [sp, 24] + ldr x0, [sp, 16] + ldr x1, [sp, 8] + svc 0 + add sp, sp, 32 + ret + +.section .text.rt.syscall3 +.global rt.syscall3 +rt.syscall3: + sub sp, sp, #32 + str x0, [sp, 24] + str x1, [sp, 16] + str x2, [sp, 8] + str x3, [sp] + ldr x8, [sp, 24] + ldr x0, [sp, 16] + ldr x1, [sp, 8] + ldr x2, [sp] + svc 0 + add sp, sp, 32 + ret + +.section .text.rt.syscall4 +.global rt.syscall4 +rt.syscall4: + sub sp, sp, #48 + str x0, [sp, 40] + str x1, [sp, 32] + str x2, [sp, 24] + str x3, [sp, 16] + str x4, [sp, 8] + ldr x8, [sp, 40] + ldr x0, [sp, 32] + ldr x1, [sp, 24] + ldr x2, [sp, 16] + ldr x3, [sp, 8] + svc 0 + add sp, sp, 48 + ret + +.section .text.rt.syscall5 +.global rt.syscall5 +rt.syscall5: + sub sp, sp, #48 + str x0, [sp, 40] + str x1, [sp, 32] + str x2, [sp, 24] + str x3, [sp, 16] + str x4, [sp, 8] + str x5, [sp] + ldr x8, [sp, 40] + ldr x0, [sp, 32] + ldr x1, [sp, 24] + ldr x2, [sp, 16] + ldr x3, [sp, 8] + ldr x4, [sp] + svc 0 + add sp, sp, 48 + ret + +.section .text.rt.syscall6 +.global rt.syscall6 +rt.syscall6: + sub sp, sp, #64 + str x0, [sp, 56] + str x1, [sp, 48] + str x2, [sp, 40] + str x3, [sp, 32] + str x4, [sp, 24] + str x5, [sp, 16] + str x6, [sp, 8] + ldr x8, [sp, 56] + ldr x0, [sp, 48] + ldr x1, [sp, 40] + ldr x2, [sp, 32] + ldr x3, [sp, 24] + ldr x4, [sp, 16] + ldr x5, [sp, 8] + svc 0 + add sp, sp, 64 + ret diff --git a/rt/+linux/syscall+x86_64.s b/rt/+linux/syscall+x86_64.s @@ -0,0 +1,69 @@ +.section .text.rt.syscall0 +.global rt.syscall0 +rt.syscall0: + movq %rdi, %rax + syscall + ret + +.section .text.rt.syscall1 +.global rt.syscall1 +rt.syscall1: + movq %rdi, %rax + movq %rsi, %rdi + syscall + ret + +.section .text.rt.syscall2 +.global rt.syscall2 +rt.syscall2: + movq %rdi, %rax + movq %rsi, %rdi + movq %rdx, %rsi + syscall + ret + +.section .text.rt.syscall3 +.global rt.syscall3 +rt.syscall3: + movq %rdi, %rax + movq %rsi, %rdi + movq %rdx, %rsi + movq %rcx, %rdx + syscall + ret + +.section .text.rt.syscall4 +.global rt.syscall4 +rt.syscall4: + movq %rdi, %rax + movq %r8, %r10 + movq %rsi, %rdi + movq %rdx, %rsi + movq %rcx, %rdx + syscall + ret + +.section .text.rt.syscall5 +.global rt.syscall5 +rt.syscall5: + movq %rdi, %rax + movq %r8, %r10 + movq %rsi, %rdi + movq %r9, %r8 + movq %rdx, %rsi + movq %rcx, %rdx + syscall + ret + +.section .text.rt.syscall6 +.global rt.syscall6 +rt.syscall6: + movq %rdi, %rax + movq %r8, %r10 + movq %rsi, %rdi + movq %r9, %r8 + movq %rdx, %rsi + movq 8(%rsp), %r9 + movq %rcx, %rdx + syscall + ret diff --git a/rt/+linux/syscallno+aarch64.ha b/rt/+linux/syscallno+aarch64.ha @@ -0,0 +1,293 @@ +export def SYS_io_setup: u64 = 0; +export def SYS_io_destroy: u64 = 1; +export def SYS_io_submit: u64 = 2; +export def SYS_io_cancel: u64 = 3; +export def SYS_io_getevents: u64 = 4; +export def SYS_setxattr: u64 = 5; +export def SYS_lsetxattr: u64 = 6; +export def SYS_fsetxattr: u64 = 7; +export def SYS_getxattr: u64 = 8; +export def SYS_lgetxattr: u64 = 9; +export def SYS_fgetxattr: u64 = 10; +export def SYS_listxattr: u64 = 11; +export def SYS_llistxattr: u64 = 12; +export def SYS_flistxattr: u64 = 13; +export def SYS_removexattr: u64 = 14; +export def SYS_lremovexattr: u64 = 15; +export def SYS_fremovexattr: u64 = 16; +export def SYS_getcwd: u64 = 17; +export def SYS_lookup_dcookie: u64 = 18; +export def SYS_eventfd2: u64 = 19; +export def SYS_epoll_create1: u64 = 20; +export def SYS_epoll_ctl: u64 = 21; +export def SYS_epoll_pwait: u64 = 22; +export def SYS_dup: u64 = 23; +export def SYS_dup3: u64 = 24; +export def SYS_fcntl: u64 = 25; +export def SYS_inotify_init1: u64 = 26; +export def SYS_inotify_add_watch: u64 = 27; +export def SYS_inotify_rm_watch: u64 = 28; +export def SYS_ioctl: u64 = 29; +export def SYS_ioprio_set: u64 = 30; +export def SYS_ioprio_get: u64 = 31; +export def SYS_flock: u64 = 32; +export def SYS_mknodat: u64 = 33; +export def SYS_mkdirat: u64 = 34; +export def SYS_unlinkat: u64 = 35; +export def SYS_symlinkat: u64 = 36; +export def SYS_linkat: u64 = 37; +export def SYS_renameat: u64 = 38; +export def SYS_umount2: u64 = 39; +export def SYS_mount: u64 = 40; +export def SYS_pivot_root: u64 = 41; +export def SYS_nfsservctl: u64 = 42; +export def SYS_statfs: u64 = 43; +export def SYS_fstatfs: u64 = 44; +export def SYS_truncate: u64 = 45; +export def SYS_ftruncate: u64 = 46; +export def SYS_fallocate: u64 = 47; +export def SYS_faccessat: u64 = 48; +export def SYS_chdir: u64 = 49; +export def SYS_fchdir: u64 = 50; +export def SYS_chroot: u64 = 51; +export def SYS_fchmod: u64 = 52; +export def SYS_fchmodat: u64 = 53; +export def SYS_fchownat: u64 = 54; +export def SYS_fchown: u64 = 55; +export def SYS_openat: u64 = 56; +export def SYS_close: u64 = 57; +export def SYS_vhangup: u64 = 58; +export def SYS_pipe2: u64 = 59; +export def SYS_quotactl: u64 = 60; +export def SYS_getdents64: u64 = 61; +export def SYS_lseek: u64 = 62; +export def SYS_read: u64 = 63; +export def SYS_write: u64 = 64; +export def SYS_readv: u64 = 65; +export def SYS_writev: u64 = 66; +export def SYS_pread64: u64 = 67; +export def SYS_pwrite64: u64 = 68; +export def SYS_preadv: u64 = 69; +export def SYS_pwritev: u64 = 70; +export def SYS_sendfile: u64 = 71; +export def SYS_pselect6: u64 = 72; +export def SYS_ppoll: u64 = 73; +export def SYS_signalfd4: u64 = 74; +export def SYS_vmsplice: u64 = 75; +export def SYS_splice: u64 = 76; +export def SYS_tee: u64 = 77; +export def SYS_readlinkat: u64 = 78; +export def SYS_newfstatat: u64 = 79; +export def SYS_fstat: u64 = 80; +export def SYS_sync: u64 = 81; +export def SYS_fsync: u64 = 82; +export def SYS_fdatasync: u64 = 83; +export def SYS_sync_file_range: u64 = 84; +export def SYS_timerfd_create: u64 = 85; +export def SYS_timerfd_settime: u64 = 86; +export def SYS_timerfd_gettime: u64 = 87; +export def SYS_utimensat: u64 = 88; +export def SYS_acct: u64 = 89; +export def SYS_capget: u64 = 90; +export def SYS_capset: u64 = 91; +export def SYS_personality: u64 = 92; +export def SYS_exit: u64 = 93; +export def SYS_exit_group: u64 = 94; +export def SYS_waitid: u64 = 95; +export def SYS_set_tid_address: u64 = 96; +export def SYS_unshare: u64 = 97; +export def SYS_futex: u64 = 98; +export def SYS_set_robust_list: u64 = 99; +export def SYS_get_robust_list: u64 = 100; +export def SYS_nanosleep: u64 = 101; +export def SYS_getitimer: u64 = 102; +export def SYS_setitimer: u64 = 103; +export def SYS_kexec_load: u64 = 104; +export def SYS_init_module: u64 = 105; +export def SYS_delete_module: u64 = 106; +export def SYS_timer_create: u64 = 107; +export def SYS_timer_gettime: u64 = 108; +export def SYS_timer_getoverrun: u64 = 109; +export def SYS_timer_settime: u64 = 110; +export def SYS_timer_delete: u64 = 111; +export def SYS_clock_settime: u64 = 112; +export def SYS_clock_gettime: u64 = 113; +export def SYS_clock_getres: u64 = 114; +export def SYS_clock_nanosleep: u64 = 115; +export def SYS_syslog: u64 = 116; +export def SYS_ptrace: u64 = 117; +export def SYS_sched_setparam: u64 = 118; +export def SYS_sched_setscheduler: u64 = 119; +export def SYS_sched_getscheduler: u64 = 120; +export def SYS_sched_getparam: u64 = 121; +export def SYS_sched_setaffinity: u64 = 122; +export def SYS_sched_getaffinity: u64 = 123; +export def SYS_sched_yield: u64 = 124; +export def SYS_sched_get_priority_max: u64 = 125; +export def SYS_sched_get_priority_min: u64 = 126; +export def SYS_sched_rr_get_interval: u64 = 127; +export def SYS_restart_syscall: u64 = 128; +export def SYS_kill: u64 = 129; +export def SYS_tkill: u64 = 130; +export def SYS_tgkill: u64 = 131; +export def SYS_sigaltstack: u64 = 132; +export def SYS_rt_sigsuspend: u64 = 133; +export def SYS_rt_sigaction: u64 = 134; +export def SYS_rt_sigprocmask: u64 = 135; +export def SYS_rt_sigpending: u64 = 136; +export def SYS_rt_sigtimedwait: u64 = 137; +export def SYS_rt_sigqueueinfo: u64 = 138; +export def SYS_rt_sigreturn: u64 = 139; +export def SYS_setpriority: u64 = 140; +export def SYS_getpriority: u64 = 141; +export def SYS_reboot: u64 = 142; +export def SYS_setregid: u64 = 143; +export def SYS_setgid: u64 = 144; +export def SYS_setreuid: u64 = 145; +export def SYS_setuid: u64 = 146; +export def SYS_setresuid: u64 = 147; +export def SYS_getresuid: u64 = 148; +export def SYS_setresgid: u64 = 149; +export def SYS_getresgid: u64 = 150; +export def SYS_setfsuid: u64 = 151; +export def SYS_setfsgid: u64 = 152; +export def SYS_times: u64 = 153; +export def SYS_setpgid: u64 = 154; +export def SYS_getpgid: u64 = 155; +export def SYS_getsid: u64 = 156; +export def SYS_setsid: u64 = 157; +export def SYS_getgroups: u64 = 158; +export def SYS_setgroups: u64 = 159; +export def SYS_uname: u64 = 160; +export def SYS_sethostname: u64 = 161; +export def SYS_setdomainname: u64 = 162; +export def SYS_getrlimit: u64 = 163; +export def SYS_setrlimit: u64 = 164; +export def SYS_getrusage: u64 = 165; +export def SYS_umask: u64 = 166; +export def SYS_prctl: u64 = 167; +export def SYS_getcpu: u64 = 168; +export def SYS_gettimeofday: u64 = 169; +export def SYS_settimeofday: u64 = 170; +export def SYS_adjtimex: u64 = 171; +export def SYS_getpid: u64 = 172; +export def SYS_getppid: u64 = 173; +export def SYS_getuid: u64 = 174; +export def SYS_geteuid: u64 = 175; +export def SYS_getgid: u64 = 176; +export def SYS_getegid: u64 = 177; +export def SYS_gettid: u64 = 178; +export def SYS_sysinfo: u64 = 179; +export def SYS_mq_open: u64 = 180; +export def SYS_mq_unlink: u64 = 181; +export def SYS_mq_timedsend: u64 = 182; +export def SYS_mq_timedreceive: u64 = 183; +export def SYS_mq_notify: u64 = 184; +export def SYS_mq_getsetattr: u64 = 185; +export def SYS_msgget: u64 = 186; +export def SYS_msgctl: u64 = 187; +export def SYS_msgrcv: u64 = 188; +export def SYS_msgsnd: u64 = 189; +export def SYS_semget: u64 = 190; +export def SYS_semctl: u64 = 191; +export def SYS_semtimedop: u64 = 192; +export def SYS_semop: u64 = 193; +export def SYS_shmget: u64 = 194; +export def SYS_shmctl: u64 = 195; +export def SYS_shmat: u64 = 196; +export def SYS_shmdt: u64 = 197; +export def SYS_socket: u64 = 198; +export def SYS_socketpair: u64 = 199; +export def SYS_bind: u64 = 200; +export def SYS_listen: u64 = 201; +export def SYS_accept: u64 = 202; +export def SYS_connect: u64 = 203; +export def SYS_getsockname: u64 = 204; +export def SYS_getpeername: u64 = 205; +export def SYS_sendto: u64 = 206; +export def SYS_recvfrom: u64 = 207; +export def SYS_setsockopt: u64 = 208; +export def SYS_getsockopt: u64 = 209; +export def SYS_shutdown: u64 = 210; +export def SYS_sendmsg: u64 = 211; +export def SYS_recvmsg: u64 = 212; +export def SYS_readahead: u64 = 213; +export def SYS_brk: u64 = 214; +export def SYS_munmap: u64 = 215; +export def SYS_mremap: u64 = 216; +export def SYS_add_key: u64 = 217; +export def SYS_request_key: u64 = 218; +export def SYS_keyctl: u64 = 219; +export def SYS_clone: u64 = 220; +export def SYS_execve: u64 = 221; +export def SYS_mmap: u64 = 222; +export def SYS_fadvise64: u64 = 223; +export def SYS_swapon: u64 = 224; +export def SYS_swapoff: u64 = 225; +export def SYS_mprotect: u64 = 226; +export def SYS_msync: u64 = 227; +export def SYS_mlock: u64 = 228; +export def SYS_munlock: u64 = 229; +export def SYS_mlockall: u64 = 230; +export def SYS_munlockall: u64 = 231; +export def SYS_mincore: u64 = 232; +export def SYS_madvise: u64 = 233; +export def SYS_remap_file_pages: u64 = 234; +export def SYS_mbind: u64 = 235; +export def SYS_get_mempolicy: u64 = 236; +export def SYS_set_mempolicy: u64 = 237; +export def SYS_migrate_pages: u64 = 238; +export def SYS_move_pages: u64 = 239; +export def SYS_rt_tgsigqueueinfo: u64 = 240; +export def SYS_perf_event_open: u64 = 241; +export def SYS_accept4: u64 = 242; +export def SYS_recvmmsg: u64 = 243; +export def SYS_wait4: u64 = 260; +export def SYS_prlimit64: u64 = 261; +export def SYS_fanotify_init: u64 = 262; +export def SYS_fanotify_mark: u64 = 263; +export def SYS_name_to_handle_at: u64 = 264; +export def SYS_open_by_handle_at: u64 = 265; +export def SYS_clock_adjtime: u64 = 266; +export def SYS_syncfs: u64 = 267; +export def SYS_setns: u64 = 268; +export def SYS_sendmmsg: u64 = 269; +export def SYS_process_vm_readv: u64 = 270; +export def SYS_process_vm_writev: u64 = 271; +export def SYS_kcmp: u64 = 272; +export def SYS_finit_module: u64 = 273; +export def SYS_sched_setattr: u64 = 274; +export def SYS_sched_getattr: u64 = 275; +export def SYS_renameat2: u64 = 276; +export def SYS_seccomp: u64 = 277; +export def SYS_getrandom: u64 = 278; +export def SYS_memfd_create: u64 = 279; +export def SYS_bpf: u64 = 280; +export def SYS_execveat: u64 = 281; +export def SYS_userfaultfd: u64 = 282; +export def SYS_membarrier: u64 = 283; +export def SYS_mlock2: u64 = 284; +export def SYS_copy_file_range: u64 = 285; +export def SYS_preadv2: u64 = 286; +export def SYS_pwritev2: u64 = 287; +export def SYS_pkey_mprotect: u64 = 288; +export def SYS_pkey_alloc: u64 = 289; +export def SYS_pkey_free: u64 = 290; +export def SYS_statx: u64 = 291; +export def SYS_io_pgetevents: u64 = 292; +export def SYS_rseq: u64 = 293; +export def SYS_kexec_file_load: u64 = 294; +export def SYS_pidfd_send_signal: u64 = 424; +export def SYS_io_uring_setup: u64 = 425; +export def SYS_io_uring_enter: u64 = 426; +export def SYS_io_uring_register: u64 = 427; +export def SYS_open_tree: u64 = 428; +export def SYS_move_mount: u64 = 429; +export def SYS_fsopen: u64 = 430; +export def SYS_fsconfig: u64 = 431; +export def SYS_fsmount: u64 = 432; +export def SYS_fspick: u64 = 433; +export def SYS_pidfd_open: u64 = 434; +export def SYS_clone3: u64 = 435; +export def SYS_openat2: u64 = 437; +export def SYS_faccessat2: u64 = 439; diff --git a/rt/+linux/syscallno+x86_64.ha b/rt/+linux/syscallno+x86_64.ha @@ -0,0 +1,347 @@ +export def SYS_read: u64 = 0; +export def SYS_write: u64 = 1; +export def SYS_open: u64 = 2; +export def SYS_close: u64 = 3; +export def SYS_stat: u64 = 4; +export def SYS_fstat: u64 = 5; +export def SYS_lstat: u64 = 6; +export def SYS_poll: u64 = 7; +export def SYS_lseek: u64 = 8; +export def SYS_mmap: u64 = 9; +export def SYS_mprotect: u64 = 10; +export def SYS_munmap: u64 = 11; +export def SYS_brk: u64 = 12; +export def SYS_rt_sigaction: u64 = 13; +export def SYS_rt_sigprocmask: u64 = 14; +export def SYS_rt_sigreturn: u64 = 15; +export def SYS_ioctl: u64 = 16; +export def SYS_pread64: u64 = 17; +export def SYS_pwrite64: u64 = 18; +export def SYS_readv: u64 = 19; +export def SYS_writev: u64 = 20; +export def SYS_access: u64 = 21; +export def SYS_pipe: u64 = 22; +export def SYS_select: u64 = 23; +export def SYS_sched_yield: u64 = 24; +export def SYS_mremap: u64 = 25; +export def SYS_msync: u64 = 26; +export def SYS_mincore: u64 = 27; +export def SYS_madvise: u64 = 28; +export def SYS_shmget: u64 = 29; +export def SYS_shmat: u64 = 30; +export def SYS_shmctl: u64 = 31; +export def SYS_dup: u64 = 32; +export def SYS_dup2: u64 = 33; +export def SYS_pause: u64 = 34; +export def SYS_nanosleep: u64 = 35; +export def SYS_getitimer: u64 = 36; +export def SYS_alarm: u64 = 37; +export def SYS_setitimer: u64 = 38; +export def SYS_getpid: u64 = 39; +export def SYS_sendfile: u64 = 40; +export def SYS_socket: u64 = 41; +export def SYS_connect: u64 = 42; +export def SYS_accept: u64 = 43; +export def SYS_sendto: u64 = 44; +export def SYS_recvfrom: u64 = 45; +export def SYS_sendmsg: u64 = 46; +export def SYS_recvmsg: u64 = 47; +export def SYS_shutdown: u64 = 48; +export def SYS_bind: u64 = 49; +export def SYS_listen: u64 = 50; +export def SYS_getsockname: u64 = 51; +export def SYS_getpeername: u64 = 52; +export def SYS_socketpair: u64 = 53; +export def SYS_setsockopt: u64 = 54; +export def SYS_getsockopt: u64 = 55; +export def SYS_clone: u64 = 56; +export def SYS_fork: u64 = 57; +export def SYS_vfork: u64 = 58; +export def SYS_execve: u64 = 59; +export def SYS_exit: u64 = 60; +export def SYS_wait4: u64 = 61; +export def SYS_kill: u64 = 62; +export def SYS_uname: u64 = 63; +export def SYS_semget: u64 = 64; +export def SYS_semop: u64 = 65; +export def SYS_semctl: u64 = 66; +export def SYS_shmdt: u64 = 67; +export def SYS_msgget: u64 = 68; +export def SYS_msgsnd: u64 = 69; +export def SYS_msgrcv: u64 = 70; +export def SYS_msgctl: u64 = 71; +export def SYS_fcntl: u64 = 72; +export def SYS_flock: u64 = 73; +export def SYS_fsync: u64 = 74; +export def SYS_fdatasync: u64 = 75; +export def SYS_truncate: u64 = 76; +export def SYS_ftruncate: u64 = 77; +export def SYS_getdents: u64 = 78; +export def SYS_getcwd: u64 = 79; +export def SYS_chdir: u64 = 80; +export def SYS_fchdir: u64 = 81; +export def SYS_rename: u64 = 82; +export def SYS_mkdir: u64 = 83; +export def SYS_rmdir: u64 = 84; +export def SYS_creat: u64 = 85; +export def SYS_link: u64 = 86; +export def SYS_unlink: u64 = 87; +export def SYS_symlink: u64 = 88; +export def SYS_readlink: u64 = 89; +export def SYS_chmod: u64 = 90; +export def SYS_fchmod: u64 = 91; +export def SYS_chown: u64 = 92; +export def SYS_fchown: u64 = 93; +export def SYS_lchown: u64 = 94; +export def SYS_umask: u64 = 95; +export def SYS_gettimeofday: u64 = 96; +export def SYS_getrlimit: u64 = 97; +export def SYS_getrusage: u64 = 98; +export def SYS_sysinfo: u64 = 99; +export def SYS_times: u64 = 100; +export def SYS_ptrace: u64 = 101; +export def SYS_getuid: u64 = 102; +export def SYS_syslog: u64 = 103; +export def SYS_getgid: u64 = 104; +export def SYS_setuid: u64 = 105; +export def SYS_setgid: u64 = 106; +export def SYS_geteuid: u64 = 107; +export def SYS_getegid: u64 = 108; +export def SYS_setpgid: u64 = 109; +export def SYS_getppid: u64 = 110; +export def SYS_getpgrp: u64 = 111; +export def SYS_setsid: u64 = 112; +export def SYS_setreuid: u64 = 113; +export def SYS_setregid: u64 = 114; +export def SYS_getgroups: u64 = 115; +export def SYS_setgroups: u64 = 116; +export def SYS_setresuid: u64 = 117; +export def SYS_getresuid: u64 = 118; +export def SYS_setresgid: u64 = 119; +export def SYS_getresgid: u64 = 120; +export def SYS_getpgid: u64 = 121; +export def SYS_setfsuid: u64 = 122; +export def SYS_setfsgid: u64 = 123; +export def SYS_getsid: u64 = 124; +export def SYS_capget: u64 = 125; +export def SYS_capset: u64 = 126; +export def SYS_rt_sigpending: u64 = 127; +export def SYS_rt_sigtimedwait: u64 = 128; +export def SYS_rt_sigqueueinfo: u64 = 129; +export def SYS_rt_sigsuspend: u64 = 130; +export def SYS_sigaltstack: u64 = 131; +export def SYS_utime: u64 = 132; +export def SYS_mknod: u64 = 133; +export def SYS_uselib: u64 = 134; +export def SYS_personality: u64 = 135; +export def SYS_ustat: u64 = 136; +export def SYS_statfs: u64 = 137; +export def SYS_fstatfs: u64 = 138; +export def SYS_sysfs: u64 = 139; +export def SYS_getpriority: u64 = 140; +export def SYS_setpriority: u64 = 141; +export def SYS_sched_setparam: u64 = 142; +export def SYS_sched_getparam: u64 = 143; +export def SYS_sched_setscheduler: u64 = 144; +export def SYS_sched_getscheduler: u64 = 145; +export def SYS_sched_get_priority_max: u64 = 146; +export def SYS_sched_get_priority_min: u64 = 147; +export def SYS_sched_rr_get_interval: u64 = 148; +export def SYS_mlock: u64 = 149; +export def SYS_munlock: u64 = 150; +export def SYS_mlockall: u64 = 151; +export def SYS_munlockall: u64 = 152; +export def SYS_vhangup: u64 = 153; +export def SYS_modify_ldt: u64 = 154; +export def SYS_pivot_root: u64 = 155; +export def SYS__sysctl: u64 = 156; +export def SYS_prctl: u64 = 157; +export def SYS_arch_prctl: u64 = 158; +export def SYS_adjtimex: u64 = 159; +export def SYS_setrlimit: u64 = 160; +export def SYS_chroot: u64 = 161; +export def SYS_sync: u64 = 162; +export def SYS_acct: u64 = 163; +export def SYS_settimeofday: u64 = 164; +export def SYS_mount: u64 = 165; +export def SYS_umount2: u64 = 166; +export def SYS_swapon: u64 = 167; +export def SYS_swapoff: u64 = 168; +export def SYS_reboot: u64 = 169; +export def SYS_sethostname: u64 = 170; +export def SYS_setdomainname: u64 = 171; +export def SYS_iopl: u64 = 172; +export def SYS_ioperm: u64 = 173; +export def SYS_create_module: u64 = 174; +export def SYS_init_module: u64 = 175; +export def SYS_delete_module: u64 = 176; +export def SYS_get_kernel_syms: u64 = 177; +export def SYS_query_module: u64 = 178; +export def SYS_quotactl: u64 = 179; +export def SYS_nfsservctl: u64 = 180; +export def SYS_getpmsg: u64 = 181; +export def SYS_putpmsg: u64 = 182; +export def SYS_afs_syscall: u64 = 183; +export def SYS_tuxcall: u64 = 184; +export def SYS_security: u64 = 185; +export def SYS_gettid: u64 = 186; +export def SYS_readahead: u64 = 187; +export def SYS_setxattr: u64 = 188; +export def SYS_lsetxattr: u64 = 189; +export def SYS_fsetxattr: u64 = 190; +export def SYS_getxattr: u64 = 191; +export def SYS_lgetxattr: u64 = 192; +export def SYS_fgetxattr: u64 = 193; +export def SYS_listxattr: u64 = 194; +export def SYS_llistxattr: u64 = 195; +export def SYS_flistxattr: u64 = 196; +export def SYS_removexattr: u64 = 197; +export def SYS_lremovexattr: u64 = 198; +export def SYS_fremovexattr: u64 = 199; +export def SYS_tkill: u64 = 200; +export def SYS_time: u64 = 201; +export def SYS_futex: u64 = 202; +export def SYS_sched_setaffinity: u64 = 203; +export def SYS_sched_getaffinity: u64 = 204; +export def SYS_set_thread_area: u64 = 205; +export def SYS_io_setup: u64 = 206; +export def SYS_io_destroy: u64 = 207; +export def SYS_io_getevents: u64 = 208; +export def SYS_io_submit: u64 = 209; +export def SYS_io_cancel: u64 = 210; +export def SYS_get_thread_area: u64 = 211; +export def SYS_lookup_dcookie: u64 = 212; +export def SYS_epoll_create: u64 = 213; +export def SYS_epoll_ctl_old: u64 = 214; +export def SYS_epoll_wait_old: u64 = 215; +export def SYS_remap_file_pages: u64 = 216; +export def SYS_getdents64: u64 = 217; +export def SYS_set_tid_address: u64 = 218; +export def SYS_restart_syscall: u64 = 219; +export def SYS_semtimedop: u64 = 220; +export def SYS_fadvise64: u64 = 221; +export def SYS_timer_create: u64 = 222; +export def SYS_timer_settime: u64 = 223; +export def SYS_timer_gettime: u64 = 224; +export def SYS_timer_getoverrun: u64 = 225; +export def SYS_timer_delete: u64 = 226; +export def SYS_clock_settime: u64 = 227; +export def SYS_clock_gettime: u64 = 228; +export def SYS_clock_getres: u64 = 229; +export def SYS_clock_nanosleep: u64 = 230; +export def SYS_exit_group: u64 = 231; +export def SYS_epoll_wait: u64 = 232; +export def SYS_epoll_ctl: u64 = 233; +export def SYS_tgkill: u64 = 234; +export def SYS_utimes: u64 = 235; +export def SYS_vserver: u64 = 236; +export def SYS_mbind: u64 = 237; +export def SYS_set_mempolicy: u64 = 238; +export def SYS_get_mempolicy: u64 = 239; +export def SYS_mq_open: u64 = 240; +export def SYS_mq_unlink: u64 = 241; +export def SYS_mq_timedsend: u64 = 242; +export def SYS_mq_timedreceive: u64 = 243; +export def SYS_mq_notify: u64 = 244; +export def SYS_mq_getsetattr: u64 = 245; +export def SYS_kexec_load: u64 = 246; +export def SYS_waitid: u64 = 247; +export def SYS_add_key: u64 = 248; +export def SYS_request_key: u64 = 249; +export def SYS_keyctl: u64 = 250; +export def SYS_ioprio_set: u64 = 251; +export def SYS_ioprio_get: u64 = 252; +export def SYS_inotify_init: u64 = 253; +export def SYS_inotify_add_watch: u64 = 254; +export def SYS_inotify_rm_watch: u64 = 255; +export def SYS_migrate_pages: u64 = 256; +export def SYS_openat: u64 = 257; +export def SYS_mkdirat: u64 = 258; +export def SYS_mknodat: u64 = 259; +export def SYS_fchownat: u64 = 260; +export def SYS_futimesat: u64 = 261; +export def SYS_newfstatat: u64 = 262; +export def SYS_unlinkat: u64 = 263; +export def SYS_renameat: u64 = 264; +export def SYS_linkat: u64 = 265; +export def SYS_symlinkat: u64 = 266; +export def SYS_readlinkat: u64 = 267; +export def SYS_fchmodat: u64 = 268; +export def SYS_faccessat: u64 = 269; +export def SYS_pselect6: u64 = 270; +export def SYS_ppoll: u64 = 271; +export def SYS_unshare: u64 = 272; +export def SYS_set_robust_list: u64 = 273; +export def SYS_get_robust_list: u64 = 274; +export def SYS_splice: u64 = 275; +export def SYS_tee: u64 = 276; +export def SYS_sync_file_range: u64 = 277; +export def SYS_vmsplice: u64 = 278; +export def SYS_move_pages: u64 = 279; +export def SYS_utimensat: u64 = 280; +export def SYS_epoll_pwait: u64 = 281; +export def SYS_signalfd: u64 = 282; +export def SYS_timerfd_create: u64 = 283; +export def SYS_eventfd: u64 = 284; +export def SYS_fallocate: u64 = 285; +export def SYS_timerfd_settime: u64 = 286; +export def SYS_timerfd_gettime: u64 = 287; +export def SYS_accept4: u64 = 288; +export def SYS_signalfd4: u64 = 289; +export def SYS_eventfd2: u64 = 290; +export def SYS_epoll_create1: u64 = 291; +export def SYS_dup3: u64 = 292; +export def SYS_pipe2: u64 = 293; +export def SYS_inotify_init1: u64 = 294; +export def SYS_preadv: u64 = 295; +export def SYS_pwritev: u64 = 296; +export def SYS_rt_tgsigqueueinfo: u64 = 297; +export def SYS_perf_event_open: u64 = 298; +export def SYS_recvmmsg: u64 = 299; +export def SYS_fanotify_init: u64 = 300; +export def SYS_fanotify_mark: u64 = 301; +export def SYS_prlimit64: u64 = 302; +export def SYS_name_to_handle_at: u64 = 303; +export def SYS_open_by_handle_at: u64 = 304; +export def SYS_clock_adjtime: u64 = 305; +export def SYS_syncfs: u64 = 306; +export def SYS_sendmmsg: u64 = 307; +export def SYS_setns: u64 = 308; +export def SYS_getcpu: u64 = 309; +export def SYS_process_vm_readv: u64 = 310; +export def SYS_process_vm_writev: u64 = 311; +export def SYS_kcmp: u64 = 312; +export def SYS_finit_module: u64 = 313; +export def SYS_sched_setattr: u64 = 314; +export def SYS_sched_getattr: u64 = 315; +export def SYS_renameat2: u64 = 316; +export def SYS_seccomp: u64 = 317; +export def SYS_getrandom: u64 = 318; +export def SYS_memfd_create: u64 = 319; +export def SYS_kexec_file_load: u64 = 320; +export def SYS_bpf: u64 = 321; +export def SYS_execveat: u64 = 322; +export def SYS_userfaultfd: u64 = 323; +export def SYS_membarrier: u64 = 324; +export def SYS_mlock2: u64 = 325; +export def SYS_copy_file_range: u64 = 326; +export def SYS_preadv2: u64 = 327; +export def SYS_pwritev2: u64 = 328; +export def SYS_pkey_mprotect: u64 = 329; +export def SYS_pkey_alloc: u64 = 330; +export def SYS_pkey_free: u64 = 331; +export def SYS_statx: u64 = 332; +export def SYS_io_pgetevents: u64 = 333; +export def SYS_rseq: u64 = 334; +export def SYS_pidfd_send_signal: u64 = 424; +export def SYS_io_uring_setup: u64 = 425; +export def SYS_io_uring_enter: u64 = 426; +export def SYS_io_uring_register: u64 = 427; +export def SYS_open_tree: u64 = 428; +export def SYS_move_mount: u64 = 429; +export def SYS_fsopen: u64 = 430; +export def SYS_fsconfig: u64 = 431; +export def SYS_fsmount: u64 = 432; +export def SYS_fspick: u64 = 433; +export def SYS_openat2: u64 = 437; +export def SYS_faccessat2: u64 = 439; diff --git a/rt/+linux/syscalls.ha b/rt/+linux/syscalls.ha @@ -0,0 +1,363 @@ +fn syscall0(u64) u64; +fn syscall1(u64, u64) u64; +fn syscall2(u64, u64, u64) u64; +fn syscall3(u64, u64, u64, u64) u64; +fn syscall4(u64, u64, u64, u64, u64) u64; +fn syscall5(u64, u64, u64, u64, u64, u64) u64; +fn syscall6(u64, u64, u64, u64, u64, u64, u64) u64; + +def PATH_MAX: size = 4096z; +export type path = (str | []u8 | *const char); +let pathbuf: [PATH_MAX + 1]u8 = [0...]; + +// NUL terminates a string and stores it in a static buffer of PATH_MAX+1 bytes +// in length. +fn kpath(path: path) (*const char | errno) = { + let path = match (path) { + c: *const char => return c, + s: str => { + let ptr = &s: *struct { + buf: *[*]u8, + length: size, + capacity: size, + }; + ptr.buf[..ptr.length]; + }, + b: []u8 => b, + }; + if (len(path) + 1 >= len(pathbuf)) { + return ENAMETOOLONG; + }; + memcpy(&pathbuf, path: *[*]u8, len(path)); + pathbuf[len(path)] = 0; + return &pathbuf: *const char; +}; + +export fn read(fd: int, buf: *void, count: size) (size | errno) = { + return wrap_return(syscall3(SYS_read, + fd: u64, buf: uintptr: u64, count: u64))?: size; +}; + +export fn write(fd: int, buf: *const void, count: size) (size | errno) = { + return wrap_return(syscall3(SYS_write, + fd: u64, buf: uintptr: u64, count: u64))?: size; +}; + +export fn open(path: path, flags: int, mode: uint) (int | errno) = { + let path = kpath(path)?; + return wrap_return(syscall4(SYS_openat, AT_FDCWD: u64, + path: uintptr: u64, flags: u64, mode: u64))?: int; +}; + +fn openat( + dirfd: int, + path: *const char, + flags: int, + mode: uint, +) (int | errno) = wrap_return(syscall4(SYS_openat, dirfd: u64, + path: uintptr: u64, flags: u64, mode: u64))?: int; + +export fn openat2( + dirfd: int, + path: path, + how: *open_how, + how_sz: size, +) (int | errno) = { + let path = kpath(path)?; + return openat(dirfd, path, how.flags: int, how.mode: uint); +}; + +export fn unlink(path: path) (void | errno) = { + let path = kpath(path)?; + wrap_return(syscall3(SYS_unlinkat, + AT_FDCWD: u64, path: uintptr: u64, 0u64)); + return; +}; + +export fn unlinkat(dirfd: int, path: path, flags: int) (void | errno) = { + let path = kpath(path)?; + wrap_return(syscall3(SYS_unlinkat, + dirfd: u64, path: uintptr: u64, flags: u64)); + return; +}; + +export fn dup(fd: int) (int | errno) = { + return wrap_return(syscall1(SYS_dup, fd: u64))?: int; +}; + +export fn close(fd: int) (void | errno) = { + wrap_return(syscall1(SYS_close, fd: u64))?; + return; +}; + +export fn chdir(path: path) (void | errno) = { + let path = kpath(path)?; + wrap_return(syscall1(SYS_chdir, path: uintptr: u64))?; + return; +}; + +export fn fchdir(fd: int) (void | errno) = { + wrap_return(syscall1(SYS_fchdir, fd: u64))?; + return; +}; + +export fn chroot(path: path) (void | errno) = { + let path = kpath(path)?; + wrap_return(syscall1(SYS_chroot, path: uintptr: u64))?; + return; +}; + +export fn mkdir(path: path, mode: uint) (void | errno) = { + let path = kpath(path)?; + wrap_return(syscall2(SYS_mkdir, path: uintptr: u64, mode: u64))?; + return; +}; + +export fn mkdirat(dirfd: int, path: path, mode: uint) (void | errno) = { + let path = kpath(path)?; + wrap_return(syscall3(SYS_mkdirat, + dirfd: u64, path: uintptr: u64, mode: u64))?; + return; +}; + +export fn execveat(dirfd: int, path: path, argv: *[*]nullable *const char, + envp: *[*]nullable *const char, flags: int) errno = { + let path = kpath(path)?; + return match (wrap_return(syscall5(SYS_execveat, dirfd: u64, + path: uintptr: u64, argv: uintptr: u64, + envp: uintptr: u64, flags: u64))) { + err: errno => err, + u64 => abort("unreachable"), + }; +}; + +// Returns the new PID to the parent, void to the child, or errno if something +// goes wrong. +export fn fork() (int | void | errno) = clone(null, SIGCHLD, null, null, 0); + +export fn getpid() int = syscall0(SYS_getpid): int; + +export fn wait4( + pid: int, + wstatus: *int, + options: int, + rusage: *rusage, +) (int | errno) = { + return wrap_return(syscall4(SYS_wait4, + pid: u64, wstatus: uintptr: u64, + options: u64, rusage: uintptr: u64))?: int; +}; + +export fn sendfile( + out: int, + in: int, + offs: nullable *size, + count: size, +) (size | errno) = wrap_return(syscall4(SYS_sendfile, + out: u64, in: u64, offs: uintptr: u64, count: u64))?: size; + +export @noreturn fn exit(status: int) void = syscall1(SYS_exit, status: u64); + +export fn kill(pid: int, signal: int) (void | errno) = { + wrap_return(syscall2(SYS_kill, pid: u64, signal: u64))?; + return; +}; + +export fn pipe2(pipefd: *[2]int, flags: int) (void | errno) = { + wrap_return(syscall2(SYS_pipe2, pipefd: uintptr: u64, flags: u64))?; + return; +}; + +export fn mmap( + addr: nullable *void, + length: size, + prot: uint, + flags: uint, + fd: int, + offs: size +) (*void | errno) = { + let r = syscall6(SYS_mmap, addr: uintptr: u64, + length: u64, prot: u64, flags: u64, fd: u64, offs: u64); + match (wrap_return(r)) { + err: errno => { + // XXX: Type promotion would simplify this + return if (r: int == -EPERM + && addr: uintptr == null: uintptr + && (flags & MAP_ANON) > 0 + && (flags & MAP_FIXED) == 0) { + // Fix up incorrect EPERM from kernel: + wrap_errno(ENOMEM); + } else err; + }, + n: u64 => n: uintptr: *void, + }; +}; + +export fn munmap(addr: *void, length: size) (void | errno) = { + wrap_return(syscall2(SYS_munmap, + addr: uintptr: u64, length: u64))?; + return; +}; + + +export fn mprotect(addr: *void, length: size, prot: uint) (void | errno) = { + wrap_return(syscall3(SYS_mprotect, + addr: uintptr: u64, length: u64, prot: u64))?; + return; +}; + + +export fn lseek(fd: int, off: i64, whence: uint) (i64 | errno) = { + return wrap_return(syscall3(SYS_lseek, + fd: u64, off: u64, whence: u64))?: i64; +}; + +fn faccessat1(dirfd: int, path: *const char, mode: int) (bool | errno) = { + return match (wrap_return(syscall3(SYS_faccessat, dirfd: u64, + path: uintptr: u64, mode: u64))) { + err: errno => switch (err) { + EACCES => false, + * => err, + }, + n: u64 => { + assert(n == 0); + true; + }, + }; +}; + +// The use of this function is discouraged, as it can create race conditions. +// TOCTOU is preferred: attempt to simply use the resource you need and handle +// any access errors which occur. +export fn faccessat( + dirfd: int, + path: path, + mode: int, + flags: int, +) (bool | errno) = { + let path = kpath(path)?; + return match (wrap_return(syscall4(SYS_faccessat2, dirfd: u64, + path: uintptr: u64, mode: u64, flags: u64))) { + err: errno => switch (err) { + EACCES => false, + ENOSYS => + if (flags == 0) faccessat1(dirfd, path, mode) + else err, + * => err, + }, + n: u64 => { + assert(n == 0); + true; + }, + }; +}; + +export fn getdents64(dirfd: int, dirp: *void, count: size) (size | errno) = { + return wrap_return(syscall3(SYS_getdents64, dirfd: u64, + dirp: uintptr: u64, count: u64))?: size; +}; + +// The use of this function is discouraged, as it can create race conditions. +// TOCTOU is preferred: attempt to simply use the resource you need and handle +// any access errors which occur. +export fn access(path: path, mode: int) (bool | errno) = + faccessat(AT_FDCWD, path, mode, 0); + +export type fcntl_arg = (void | int | *st_flock | *f_owner_ex | *u64); + +export fn fcntl(fd: int, cmd: int, arg: fcntl_arg) (int | errno) = { + let _fd = fd: u64, _cmd = cmd: u64; + return wrap_return(match (arg) { + void => syscall2(SYS_fcntl, _fd, _cmd), + i: int => syscall3(SYS_fcntl, _fd, _cmd, i: u64), + l: *st_flock => syscall3(SYS_fcntl, _fd, _cmd, l: uintptr: u64), + o: *f_owner_ex => syscall3(SYS_fcntl, _fd, _cmd, o: uintptr: u64), + u: *u64 => syscall3(SYS_fcntl, _fd, _cmd, u: uintptr: u64), + })?: int; +}; + +export fn getrandom(buf: *void, bufln: size, flags: uint) (size | errno) = { + return wrap_return(syscall3(SYS_getrandom, + buf: uintptr: u64, bufln: u64, flags: u64))?: size; +}; + +// TODO: Implement me with VDSO +export fn clock_gettime(clock_id: int, tp: *timespec) (void | errno) = { + wrap_return(syscall2(SYS_clock_gettime, + clock_id: u64, tp: uintptr: u64))?; + return; +}; + +export fn uname(uts: *utsname) (void | errno) = { + wrap_return(syscall1(SYS_uname, uts: uintptr: u64))?; + return; +}; + +// The return value is statically allocated and must be duplicated before +// calling getcwd again. +export fn getcwd() (*const char | errno) = { + static let pathbuf: [PATH_MAX + 1]u8 = [0...]; + wrap_return(syscall2(SYS_getcwd, + &pathbuf: *[*]u8: uintptr: u64, + PATH_MAX + 1))?; + return &pathbuf: *const char; +}; + +export fn poll(fds: *pollfd, nfds: nfds_t, timeout: int) (int | errno) = { + return wrap_return(syscall3(SYS_poll, + fds: uintptr: u64, nfds: u64, timeout: u64))?: int; +}; + +export fn timerfd_create(clock_id: int, flags: int) (int | errno) = { + return wrap_return(syscall2(SYS_timerfd_create, + clock_id: u64, flags: u64))?: int; +}; + +export fn timerfd_settime( + fd: int, + flags: int, + new_value: *const itimerspec, + old_value: nullable *itimerspec +) (int | errno) = { + return wrap_return(syscall4(SYS_timerfd_settime, + fd: u64, flags: u64, + new_value: uintptr: u64, old_value: uintptr: u64))?: int; +}; + +export fn timerfd_gettime(fd: int, curr_value: *itimerspec) (int | errno) = { + return wrap_return(syscall2(SYS_timerfd_gettime, + fd: u64, curr_value: uintptr: u64))?: int; +}; + +export fn signalfd(fd: int, mask: *const sigset, flags: int) (int | errno) = { + return wrap_return(syscall4(SYS_signalfd4, + fd: u64, mask: uintptr: u64, size(sigset): u64, + flags: u64))?: int; +}; + +export fn sigprocmask( + how: int, + set: *const sigset, + old: nullable *sigset +) (int | errno) = { + return wrap_return(syscall4(SYS_rt_sigprocmask, + how: u64, set: uintptr: u64, old: uintptr: u64, + size(sigset): u64))?: int; +}; + +fn restore() void; +fn restore_si() void; + +export fn sigaction( + signum: int, + act: *const sigact, + old: nullable *sigact +) (int | errno) = { + let real_act = *act; + real_act.sa_flags |= SA_RESTORER; + let restore_fn = if ((act.sa_flags & SA_SIGINFO) != 0) &restore_si else &restore; + real_act.sa_restorer = &restore; + return wrap_return(syscall4(SYS_rt_sigaction, + signum: u64, &real_act: uintptr: u64, old: uintptr: u64, + size(sigset): u64))?: int; +}; diff --git a/rt/+linux/types.ha b/rt/+linux/types.ha @@ -0,0 +1,485 @@ +export type off_t = u64; +export type dev_t = u64; +export type ino_t = u64; +export type nlink_t = u64; +export type mode_t = uint; +export type uid_t = uint; +export type gid_t = uint; +export type time_t = i64; +export type suseconds_t = i64; +export type nfds_t = u64; +export type pid_t = int; +export type timer_t = int; +export type clock_t = i64; +export type si_band_t = i64; + +export def NSIG: int = 64; + +export type sigset = struct { + __val: [1]u64, +}; + +export type timeval = struct { + tv_sec: time_t, + tv_usec: suseconds_t, +}; + +export type timespec = struct { + tv_sec: time_t, + tv_nsec: i64, +}; + +export type itimerspec = struct { + it_interval: timespec, + it_value: timespec, +}; + +export def AT_FDCWD: int = -100; +export def AT_SYMLINK_NOFOLLOW: int = 0x100; +export def AT_REMOVEDIR: int = 0x200; +export def AT_SYMLINK_FOLLOW: int = 0x400; +export def AT_EACCESS: int = 0x200; +export def AT_NO_AUTOMOUNT: int = 0x800; +export def AT_EMPTY_PATH: int = 0x1000; +export def AT_STATX_SYNC_TYPE: int = 0x6000; +export def AT_STATX_SYNC_AS_STAT: int = 0x0000; +export def AT_STATX_FORCE_SYNC: int = 0x2000; +export def AT_STATX_DONT_SYNC: int = 0x4000; +export def AT_RECURSIVE: int = 0x8000; + +export def O_RDONLY: int = 0o0; +export def O_WRONLY: int = 0o1; +export def O_RDWR: int = 0o2; +export def O_CREATE: int = 0o100; +export def O_EXCLUSIVE: int = 0o200; +export def O_NOCTTY: int = 0o400; +export def O_TRUNC: int = 0o1000; +export def O_APPEND: int = 0o2000; +export def O_NONBLOCK: int = 0o4000; +export def O_DSYNC: int = 0o10000; +export def O_SYNC: int = 0o4010000; +export def O_RSYNC: int = 0o4010000; +export def O_DIRECTORY: int = 0o200000; +export def O_NOFOLLOW: int = 0o400000; +export def O_NOATIME: int = 0o1000000; +export def O_CLOEXEC: int = 0o2000000; +export def O_PATH: int = 0o10000000; +export def O_TMPFILE: int = 0o20000000; + +type statx_timestamp = struct { + tv_sec: i64, + tv_nsec: u32, +}; + +type stx = struct { + mask: u32, + blksize: u32, + attributes: u64, + nlink: u32, + uid: u32, + gid: u32, + mode: u16, + ino: u64, + sz: u64, + blocks: u64, + attr_mask: u64, + atime: statx_timestamp, + btime: statx_timestamp, + ctime: statx_timestamp, + mtime: statx_timestamp, + rdev_major: u32, + rdev_minor: u32, + dev_major: u32, + dev_minor: u32, + __reserved: [14]u64, +}; + +// Note: the st type does not match the kernel API. The kernel API has a stat +// buffer which varies from arch to arch, but because we always use statx(2) and +// copy the data from the stx type, we don't have to deal with that nonsense. +export type st = struct { + dev: dev_t, + ino: ino_t, + mode: mode_t, + nlink: nlink_t, + uid: uid_t, + gid: gid_t, + rdev: dev_t, + sz: u64, + blksz: u64, + blocks: u64, + atime: timespec, + mtime: timespec, + ctime: timespec, +}; + +def STATX_TYPE: uint = 0x00000001; +def STATX_MODE: uint = 0x00000002; +def STATX_NLINK: uint = 0x00000004; +def STATX_UID: uint = 0x00000008; +def STATX_GID: uint = 0x00000010; +def STATX_ATIME: uint = 0x00000020; +def STATX_MTIME: uint = 0x00000040; +def STATX_CTIME: uint = 0x00000080; +def STATX_INO: uint = 0x00000100; +def STATX_SIZE: uint = 0x00000200; +def STATX_BLOCKS: uint = 0x00000400; +def STATX_BASIC_STATS: uint = 0x000007FF; +def STATX_BTIME: uint = 0x00000800; +def STATX_MNT_ID: uint = 0x00001000; + +export def SIGHUP: int = 1; +export def SIGINT: int = 2; +export def SIGQUIT: int = 3; +export def SIGILL: int = 4; +export def SIGTRAP: int = 5; +export def SIGABRT: int = 6; +export def SIGBUS: int = 7; +export def SIGFPE: int = 8; +export def SIGKILL: int = 9; +export def SIGUSR1: int = 10; +export def SIGSEGV: int = 11; +export def SIGUSR2: int = 12; +export def SIGPIPE: int = 13; +export def SIGALRM: int = 14; +export def SIGTERM: int = 15; +export def SIGSTKFLT: int = 16; +export def SIGCHLD: int = 17; +export def SIGCONT: int = 18; +export def SIGSTOP: int = 19; +export def SIGTSTP: int = 20; +export def SIGTTIN: int = 21; +export def SIGTTOU: int = 22; +export def SIGURG: int = 23; +export def SIGXCPU: int = 24; +export def SIGXFSZ: int = 25; +export def SIGVTALRM: int = 26; +export def SIGPROF: int = 27; +export def SIGWINCH: int = 28; +export def SIGIO: int = 29; +export def SIGPOLL: int = 29; +export def SIGPWR: int = 30; +export def SIGSYS: int = 31; + +export def MAP_SHARED: uint = 0x01; +export def MAP_PRIVATE: uint = 0x02; +export def MAP_SHARED_VALIDATE: uint = 0x03; +export def MAP_FIXED: uint = 0x10; +export def MAP_ANON: uint = 0x20; +export def MAP_NORESERVE: uint = 0x4000; +export def MAP_GROWSDOWN: uint = 0x0100; +export def MAP_DENYWRITE: uint = 0x0800; +export def MAP_EXECUTABLE: uint = 0x1000; +export def MAP_LOCKED: uint = 0x2000; +export def MAP_POPULATE: uint = 0x8000; +export def MAP_NONBLOCK: uint = 0x10000; +export def MAP_STACK: uint = 0x20000; +export def MAP_HUGETLB: uint = 0x40000; +export def MAP_SYNC: uint = 0x80000; +export def MAP_FIXED_NOREPLACE: uint = 0x100000; +export def MAP_FILE: uint = 0; +export def MAP_HUGE_SHIFT: uint = 26; +export def MAP_HUGE_MASK: uint = 0x3F; +export def MAP_HUGE_64KB: uint = 16 << 26; +export def MAP_HUGE_512KB: uint = 19 << 26; +export def MAP_HUGE_1MB: uint = 20 << 26; +export def MAP_HUGE_2MB: uint = 21 << 26; +export def MAP_HUGE_8MB: uint = 23 << 26; +export def MAP_HUGE_16MB: uint = 24 << 26; +export def MAP_HUGE_32MB: uint = 25 << 26; +export def MAP_HUGE_256MB: uint = 28 << 26; +export def MAP_HUGE_512MB: uint = 29 << 26; +export def MAP_HUGE_1GB: uint = 30 << 26; +export def MAP_HUGE_2GB: uint = 31 << 26; +export def MAP_HUGE_16GB: uint = 34 << 26; + +export def PROT_NONE: uint = 0; +export def PROT_READ: uint = 1; +export def PROT_WRITE: uint = 2; +export def PROT_EXEC: uint = 4; +export def PROT_GROWSDOWN: uint = 0x01000000; +export def PROT_GROWSUP: uint = 0x02000000; + +export def F_OK: int = 0; +export def R_OK: int = 4; +export def W_OK: int = 2; +export def X_OK: int = 1; + +export def F_DUPFD: int = 0; +export def F_GETFD: int = 1; +export def F_SETFD: int = 2; +export def F_GETFL: int = 3; +export def F_SETFL: int = 4; +export def F_SETOWN: int = 8; +export def F_GETOWN: int = 9; +export def F_SETSIG: int = 10; +export def F_GETSIG: int = 11; +export def F_GETLK: int = 12; +export def F_SETLK: int = 13; +export def F_SETLKW: int = 14; +export def F_SETOWN_EX: int = 15; +export def F_GETOWN_EX: int = 16; +export def F_GETOWNER_UIDS: int = 17; + +export type st_flock = struct { + l_type: i16, + l_whence: i16, + l_start: i64, + l_len: i64, + pid: int, +}; + +export type f_owner_ex = struct { + _type: int, + pid: int, +}; + +export def CLOCK_REALTIME: int = 0; +export def CLOCK_MONOTONIC: int = 1; +export def CLOCK_PROCESS_CPUTIME_ID: int = 2; +export def CLOCK_THREAD_CPUTIME_ID: int = 3; +export def CLOCK_MONOTONIC_RAW: int = 4; +export def CLOCK_REALTIME_COARSE: int = 5; +export def CLOCK_MONOTONIC_COARSE: int = 6; +export def CLOCK_BOOTTIME: int = 7; +export def CLOCK_REALTIME_ALARM: int = 8; +export def CLOCK_BOOTTIME_ALARM: int = 9; +export def CLOCK_SGI_CYCLE: int = 10; +export def CLOCK_TAI: int = 11; + +export type open_how = struct { + flags: u64, + mode: u64, + resolve: u64, +}; + +export def RESOLVE_NO_XDEV: u64 = 0x01; +export def RESOLVE_NO_MAGICLINKS: u64 = 0x02; +export def RESOLVE_NO_SYMLINKS: u64 = 0x04; +export def RESOLVE_BENEATH: u64 = 0x08; +export def RESOLVE_IN_ROOT: u64 = 0x10; + +export def DT_UNKNOWN: u8 = 0; +export def DT_FIFO: u8 = 1; +export def DT_CHR: u8 = 2; +export def DT_DIR: u8 = 4; +export def DT_BLK: u8 = 6; +export def DT_REG: u8 = 8; +export def DT_LNK: u8 = 10; +export def DT_SOCK: u8 = 12; + +export type dirent64 = struct { + d_ino: ino_t, + d_off: off_t, + d_reclen: u16, + d_type: u8, + d_name: [*]char, +}; + +export def WNOHANG: int = 1; +export def WUNTRACED: int = 2; +export def WSTOPPED: int = 2; +export def WEXITED: int = 4; +export def WCONTINUED: int = 8; +export def WNOWAIT: int = 0x1000000; + +export fn wexitstatus(s: int) int = (s & 0xff00) >> 8; +export fn wtermsig(s: int) int = s & 0x7f; +export fn wstopsig(s: int) int = wexitstatus(s); +export fn wcoredump(s: int) int = s & 0x80; +export fn wifexited(s: int) bool = wtermsig(s) <= 0; +export fn wifstopped(s: int) bool = (((s & 0xFFFF) * 0x10001) >> 8) > 0x7f00; +export fn wifsignaled(s: int) bool = (s & 0xFFFF) - 1 < 0xFF; +export fn wifcontinued(s: int) bool = s == 0xFFFF; + +export type rusage = struct { + ru_utime: timeval, + ru_stime: timeval, + ru_maxrss: u64, + ru_ixrss: u64, + ru_idrss: u64, + ru_isrss: u64, + ru_minflt: u64, + ru_majflt: u64, + ru_nswap: u64, + ru_inblock: u64, + ru_oublock: u64, + ru_msgsnd: u64, + ru_msgrcv: u64, + ru_nsignals: u64, + ru_nvcsw: u64, + ru_nivcsw: u64, + __reserved: [16]u64, +}; + +export type utsname = struct { + sysname: [65]char, + nodename: [65]char, + release: [65]char, + version: [65]char, + machine: [65]char, + domainname: [65]char, +}; + +export def POLLIN: i16 = 0x001; +export def POLLPRI: i16 = 0x002; +export def POLLOUT: i16 = 0x004; +export def POLLERR: i16 = 0x008; +export def POLLHUP: i16 = 0x010; +export def POLLVAL: i16 = 0x020; + +export type pollfd = struct { + fd: int, + events: i16, + revents: i16, +}; + +export def TFD_TIMER_ABSTIME: int = 1; +export def TFD_TIMER_CANCEL_ON_SET: int = 2; + +export def SIG_BLOCK: int = 0; +export def SIG_UNBLOCK: int = 1; +export def SIG_SETMASK: int = 2; + +def SI_MAX_SIZE: size = 128 / size(u8); + +export type sigval_t = union { + sival_t: int, + sival_ptr: *void, +}; + + +//#define __ADDR_BND_PKEY_PAD (__alignof__(void *) < sizeof(short) ? sizeof(short) : __alignof__(void *)) +def __ADDR_BND_PKEY_PAD: size = 8; + +export type siginfo = union { + struct { + si_signo: int, + si_errno: int, + si_code: int, + + union { + // kill() + struct { + si_pid: pid_t, + si_uid: u32, + }, + + // POSIX.1b timers + struct { + si_tid: timer_t, + si_overrun: int, + _sigval: sigval_t, // @ signals + si_sys_private: int, + }, + + // POSIX.1b signals + struct { + _sig_pid: pid_t, // @kill + _sig_uid: u32, // @ kill + union { + si_value: sigval_t, + si_int: int, + si_ptr: *void, + } + }, + + // SIGCHLD + struct { + _chld_pid: pid_t, // @ kill + _chld_uid: u32, // @ kill + si_status: int, + si_utime: clock_t, + si_stime: clock_t, + }, + + // SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT + struct { + si_addr: *void, + + union { + + // used when si_code=BUS_MCEERR_AR or + // used when si_code=BUS_MCEERR_AO + si_addr_lsb: i16, + + struct { + _dummy_bnd: [__ADDR_BND_PKEY_PAD]u8, + si_lower: *void, + si_upper: *void, + }, + + struct { + _dummy_pkey: [__ADDR_BND_PKEY_PAD]u8, + si_pkey: u32, + }, + }, + }, + + // SIGPOLL + struct { + si_band: si_band_t, + si_fd: int, + }, + + // SIGSYS + struct { + si_call_addr: *void, + si_syscall: int, + si_arch: uint, + }, + + }, + }, + _si_pad: [SI_MAX_SIZE - 3 * size(int)]u8, +}; + +export def SA_NOCLDSTOP: u64 = 0x00000001; +export def SA_NOCLDWAIT: u64 = 0x00000002; +export def SA_SIGINFO: u64 = 0x00000004; +export def SA_ONSTACK: u64 = 0x08000000; +export def SA_RESTART: u64 = 0x10000000; +export def SA_NODEFER: u64 = 0x40000000; +export def SA_RESETHAND: u64 = 0x80000000; +export def SA_NOMASK: u64 = SA_NODEFER; +export def SA_ONESHOT: u64 = SA_RESETHAND; +export def SA_RESTORER: u64 = 0x04000000; + +export type sigact = struct { + union { + sa_handler: *fn (int) void, + sa_sigaction: *fn (int, *siginfo, *void) void, + }, + sa_flags: u64, + sa_restorer: *fn () void, + sa_mask: sigset, +}; + +export def SFD_NONBLOCK: int = O_NONBLOCK; +export def SFD_CLOEXEC: int = O_CLOEXEC; + +export type signalfd_siginfo = struct { + ssi_signo: u32, + ssi_errno: i32, + ssi_code: i32, + ssi_pid: u32, + ssi_uid: u32, + ssi_fd: i32 , + ssi_tid: u32, + ssi_band: u32, + ssi_overrun: u32, + ssi_trapno: u32, + ssi_status: i32, + ssi_int: i32, + ssi_ptr: u64, + ssi_utime: u64, + ssi_stime: u64, + ssi_addr: u64, + ssi_addr_lsb: u16, + __pad2: u16, + ssi_syscall: i32, + ssi_call_addr: u64, + ssi_arch: u32, + + __pad: [28]u8, // pad to 128 bytes +}; diff --git a/rt/+test/+linux.ha b/rt/+test/+linux.ha @@ -0,0 +1,13 @@ +let start: timespec = timespec { ... }; + +fn time_start() void = { + clock_gettime(CLOCK_MONOTONIC, &start) as void; +}; + +// Returns elapsed time as (seconds, milliseconds) +fn time_stop() (size, size) = { + let end: timespec = timespec { ... }; + clock_gettime(CLOCK_MONOTONIC, &end) as void; + return ((end.tv_sec - start.tv_sec): size, + (end.tv_nsec - start.tv_nsec): size / 10000z); +}; diff --git a/rt/+test/abort.ha b/rt/+test/abort.ha @@ -0,0 +1,16 @@ +export @noreturn @symbol("rt.abort") fn _abort(msg: str) void = { + reason = abort_reason { loc = "", msg = msg }; + longjmp(&jmp, 1); +}; + +// See harec:include/gen.h +const reasons: [_]str = [ + "slice or array access out of bounds", // 0 + "type assertion failed", // 1 + "out of memory", // 2 +]; + +export @noreturn fn abort_fixed(loc: str, i: int) void = { + reason = abort_reason { loc = loc, msg = reasons[i] }; + longjmp(&jmp, 1); +}; diff --git a/rt/+test/cstring.ha b/rt/+test/cstring.ha @@ -0,0 +1,16 @@ +fn c_strlen(cstr: *const char) size = { + const ptr = cstr: *[*]u8; + let ln = 0z; + for (ptr[ln] != 0; ln += 1) void; + return ln; +}; + +fn from_c_unsafe(cstr: *const char) const str = { + const l = c_strlen(cstr); + const s = struct { + data: *[*]u8 = cstr: *[*]u8, + length: size = l, + capacity: size = l, + }; + return *(&s: *const str); +}; diff --git a/rt/+test/start.ha b/rt/+test/start.ha @@ -0,0 +1,124 @@ +type test = struct { + name: str, + func: *fn() void, +}; + +type abort_reason = struct { + loc: str, + msg: str, +}; + +const @symbol("__init_array_start") init_start: [*]*fn() void; +const @symbol("__init_array_end") init_end: [*]*fn() void; +const @symbol("__fini_array_start") fini_start: [*]*fn() void; +const @symbol("__fini_array_end") fini_end: [*]*fn() void; +const @symbol("__test_array_start") test_start: [*]test; +const @symbol("__test_array_end") test_end: [*]test; + +let jmp: jmpbuf = jmpbuf { ... }; +let reason: abort_reason = abort_reason { ... }; + +export @noreturn fn start_ha() void = { + const ninit = (&init_end: uintptr - &init_start: uintptr): size + / size(*fn() void); + for (let i = 0z; i < ninit; i += 1) { + init_start[i](); + }; + + const ntest = (&test_end: uintptr - &test_start: uintptr): size / size(test); + let maxname = 0z; + for (let i = 0z; i < ntest; i += 1) { + if (len(test_start[i].name) > maxname) { + maxname = len(test_start[i].name); + }; + }; + + let failures: [](str, abort_reason) = []; + let npass = 0z, nfail = 0z; + print("Running "); + print(ztos(ntest)); + print(" tests:\n\n"); + time_start(); + for (let i = 0z; i < ntest; i += 1) { + if (!should_test(test_start[i].name)) { + continue; + }; + print(test_start[i].name); + dots(maxname - len(test_start[i].name) + 3); + print(" "); + + if (setjmp(&jmp) != 0) { + nfail += 1; + append(failures, (test_start[i].name, reason)); + print("FAIL\n"); + continue; + }; + test_start[i].func(); + + npass += 1; + print("OK\n"); + }; + let end = time_stop(); + + if (nfail != 0) { + print("\n"); + print(ztos(nfail)); + if (nfail == 1) { + print(" test failed:\n"); + } else { + print(" tests failed:\n"); + }; + for (let i = 0z; i < nfail; i += 1) { + print(failures[i].0); + print(": "); + if (len(failures[i].1.loc) != 0) { + print(failures[i].1.loc); + print(": "); + }; + print(failures[i].1.msg); + print("\n"); + }; + }; + + print("\n"); + print(ztos(npass)); + print(" passed; "); + print(ztos(nfail)); + print(" failed; "); + print(ztos(ntest)); + print(" tests completed in "); + print(ztos(end.0)); + print("."); + print(ztos(end.1)); + print("s\n"); + + const nfini = (&fini_end: uintptr - &fini_start: uintptr): size + / size(*fn() void); + for (let i = 0z; i < nfini; i += 1) { + fini_start[i](); + }; + + exit(if (nfail > 0) 1 else 0); +}; + +fn print(msg: str) void = write(1, msg: *const char, len(msg)); + +fn dots(n: size) void = { + // XXX: this is slow, I guess + for (let i = 0z; i < n; i += 1) { + print("."); + }; +}; + +fn should_test(name: str) bool = { + if (argc == 1) { + return true; + }; + for (let i = 1z; i < argc; i += 1) { + let s = from_c_unsafe(argv[i]); + if (name == s) { + return true; + }; + }; + return false; +}; diff --git a/rt/+test/ztos.ha b/rt/+test/ztos.ha @@ -0,0 +1,38 @@ +fn bytes_reverse(b: []u8) void = { + if (len(b) == 0) { + return; + }; + for (let s = 0z, e = len(b) - 1; s < e) { + let x = b[s]; + b[s] = b[e]; + b[e] = x; + s += 1; + e -= 1; + }; +}; + +fn ztos(u: size) const str = { + static let buf: [21]u8 = [0...]; // 20 digits plus NUL + buf = [0...]; + + let s = struct { + b: *[*]u8 = &buf, + l: size = 0, + c: size = 0, + }; + + if (u == 0) { + s.b[s.l] = '0': u32: u8; + s.l += 1; + }; + + for (u > 0) { + s.b[s.l] = '0': u32: u8 + (u % 10): u8; + s.l += 1; + u /= 10; + }; + + bytes_reverse(s.b[..s.l]); + s.b[s.l] = 0; + return *(&s: *str); +}; diff --git a/rt/+x86_64/jmp.ha b/rt/+x86_64/jmp.ha @@ -0,0 +1 @@ +type arch_jmpbuf = [8]u64; diff --git a/rt/+x86_64/longjmp.s b/rt/+x86_64/longjmp.s @@ -0,0 +1,15 @@ +/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */ +.global rt.longjmp +.type rt.longjmp,@function +rt.longjmp: + xor %eax,%eax + cmp $1,%esi /* CF = val ? 0 : 1 */ + adc %esi,%eax /* eax = val + !val */ + mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */ + mov 8(%rdi),%rbp + mov 16(%rdi),%r12 + mov 24(%rdi),%r13 + mov 32(%rdi),%r14 + mov 40(%rdi),%r15 + mov 48(%rdi),%rsp + jmp *56(%rdi) /* goto saved address without altering rsp */ diff --git a/rt/+x86_64/restore.s b/rt/+x86_64/restore.s @@ -0,0 +1,11 @@ +// Stolen from MUSL + +.global rt.restore +.global rt.restore_si +.type rt.restore,@function +.type rt.restore_si,@function +rt.restore: +rt.restore_si: + movl $15, %eax + syscall + diff --git a/rt/+x86_64/setjmp.s b/rt/+x86_64/setjmp.s @@ -0,0 +1,16 @@ +/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */ +.global rt.setjmp +.type rt.setjmp,@function +rt.setjmp: + mov %rbx,(%rdi) /* rdi is jmp_buf, move registers onto it */ + mov %rbp,8(%rdi) + mov %r12,16(%rdi) + mov %r13,24(%rdi) + mov %r14,32(%rdi) + mov %r15,40(%rdi) + lea 8(%rsp),%rdx /* this is our rsp WITHOUT current ret addr */ + mov %rdx,48(%rdi) + mov (%rsp),%rdx /* save return addr ptr for new rip */ + mov %rdx,56(%rdi) + xor %eax,%eax /* always return 0 */ + ret diff --git a/rt/ensure.ha b/rt/ensure.ha @@ -0,0 +1,36 @@ +export type slice = struct { + data: *void, + length: size, + capacity: size, +}; + +export fn ensure(s: *slice, membsz: size) void = { + let cap = s.capacity; + if (cap >= s.length) { + return; + }; + for (cap < s.length) { + assert(cap >= s.capacity, "slice out of memory (overflow)"); + if (cap == 0) { + cap = s.length; + } else { + cap *= 2; + }; + }; + s.capacity = cap; + const data = realloc(s.data, s.capacity * membsz); + assert(data != null || s.capacity * membsz == 0); + s.data = data; +}; + +export fn unensure(s: *slice, membsz: size) void = { + let cap = s.capacity; + for (cap > s.length) { + cap /= 2; + }; + cap *= 2; + s.capacity = cap; + const data = realloc(s.data, s.capacity * membsz); + assert(data != null || s.capacity * membsz == 0); + s.data = data; +}; diff --git a/rt/hare.sc b/rt/hare.sc @@ -0,0 +1,36 @@ +ENTRY(_start); +SECTIONS { + . = 0x10000; + .text : { + KEEP (*(.text)) + *(.text.*) + } + . = 0x8000000; + .data : { + KEEP (*(.data)) + *(.data.*) + } + + .init_array : { + PROVIDE_HIDDEN (__init_array_start = .); + KEEP (*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + } + + .fini_array : { + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP (*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + } + + .test_array : { + PROVIDE_HIDDEN (__test_array_start = .); + KEEP (*(.test_array)) + PROVIDE_HIDDEN (__test_array_end = .); + } + + .bss : { + KEEP (*(.bss)) + *(.bss.*) + } +} diff --git a/rt/jmp.ha b/rt/jmp.ha @@ -0,0 +1,9 @@ +type jmpbuf = struct { + __jb: arch_jmpbuf, + __fl: size, + __ss: [128 / size(size)]size, +}; + +fn setjmp(buf: *jmpbuf) int; + +@noreturn fn longjmp(buf: *jmpbuf, n: int) void; diff --git a/rt/malloc.ha b/rt/malloc.ha @@ -0,0 +1,163 @@ +// This is a simple memory allocator, based on Appel, Andrew W., and David A. +// Naumann. "Verified sequential malloc/free." It is not thread-safe. +// +// Large allocations are handled with mmap. +// +// For small allocations, we set up 50 bins, where each bin is responsible for +// 16 different allocation sizes (e.g. bin 1 handles allocations from 10 thru 26 +// bytes); except for the first and last bin, which are responsible for fewer +// than 16 allocation sizes. +// +// Each bin is 1MiB (BIGBLOCK) in size. We ceil the allocation size to the +// largest size supported for this bin, then break the bin up into smaller +// blocks. Each block is structured as [{sz: size, data..., link: *void}...]; +// where sz is the size of this (small) block, data is is set aside for the +// user's actual allocation, and link is a pointer to the next bin's data field. +// +// In short, a bin for a particular size is pre-filled with all allocations of +// that size, and the first word of each allocation is set to a pointer to the +// next allocation. As such, malloc becomes: +// +// 1. Look up bin; pre-fill if not already allocated +// 2. Let p = bin; bin = *bin; return p +// +// Then, free is simply: +// 1. Look up bin +// 2. *p = bin; +// 3. bin = p; +// +// Note that over time this can cause the ordering of the allocations in each +// bin to become non-continuous. This has no consequences for performance or +// correctness. + +def ALIGN: size = 2; +def WORD: size = size(size); +def WASTE: size = WORD * ALIGN - WORD; +def BIGBLOCK: size = (2 << 16) * WORD; + +let bins: [50]nullable *void = [null...]; + +fn bin2size(b: size) size = ((b + 1) * ALIGN - 1) * WORD; + +fn size2bin(s: size) size = { + assert(s <= bin2size(len(bins) - 1), "Size exceeds maximum for bin"); + return (s + (WORD * (ALIGN - 1) - 1)) / (WORD * ALIGN); +}; + +// Allocates n bytes of memory and returns a pointer to them, or null if there +// is insufficient memory. +export fn malloc(n: size) nullable *void = { + return if (n == 0) null + else if (n > bin2size(len(bins) - 1)) malloc_large(n) + else malloc_small(n); +}; + +fn malloc_large(n: size) nullable *void = { + let p = segmalloc(n + WASTE + WORD); + if (p == null) { + return null; + }; + let bsize = (p: uintptr + WASTE: uintptr): *[1]size; + bsize[0] = n; + return (p: uintptr + WASTE: uintptr + WORD: uintptr): nullable *void; +}; + +fn malloc_small(n: size) nullable *void = { + const b = size2bin(n); + let p = bins[b]; + if (p == null) { + p = fill_bin(b); + if (p != null) { + bins[b] = p; + }; + }; + return if (p != null) { + let q = *(p: **void); + bins[b] = q; + p; + } else null; +}; + +fn fill_bin(b: size) nullable *void = { + const s = bin2size(b); + let p = segmalloc(BIGBLOCK); + return if (p == null) null else list_from_block(s, p: uintptr); +}; + +fn list_from_block(s: size, p: uintptr) nullable *void = { + const nblocks = (BIGBLOCK - WASTE) / (s + WORD); + + let q = p + WASTE: uintptr; // align q+WORD + for (let j = 0z; j != nblocks - 1; j += 1) { + let sz = q: *size; + let useralloc = q + WORD: uintptr; // aligned + let next = (useralloc + s: uintptr + WORD: uintptr): *void; + *sz = s; + *(useralloc: **void) = next; + q += s: uintptr + WORD: uintptr; + }; + + // Terminate last block: + (q: *[1]size)[0] = s; + *((q + 1: uintptr): *nullable *void) = null; + + // Return first block: + return (p + WASTE: uintptr + WORD: uintptr): *void; +}; + +// Frees a pointer previously allocated with [malloc]. +export @symbol("rt.free") fn free_(_p: nullable *void) void = { + if (_p != null) { + let p = _p: *void; + let bsize = (p: uintptr - size(size): uintptr): *[1]size; + let s = bsize[0]; + if (s <= bin2size(len(bins) - 1)) free_small(p, s) + else free_large(p, s); + }; +}; + +fn free_large(_p: *void, s: size) void = { + let p = (_p: uintptr - (WASTE: uintptr + WORD: uintptr)): *void; + segfree(p, s + WASTE + WORD); +}; + +fn free_small(p: *void, s: size) void = { + let b = size2bin(s); + let q = bins[b]; + *(p: **void) = q; + bins[b] = p: nullable *void; +}; + +// Changes the allocation size of a pointer to n bytes. If n is smaller than +// the prior allocation, it is truncated; otherwise the allocation is expanded +// and the values of the new bytes are undefined. May return a different pointer +// than the one given if there is insufficient space to expand the pointer +// in-place. Returns null if there is insufficient memory to support the +// request. +export fn realloc(_p: nullable *void, n: size) nullable *void = { + if (n == 0) { + free_(_p); + return null; + } else if (_p == null) { + return malloc(n); + }; + + let p = _p: *void; + let bsize = (p: uintptr - size(size): uintptr): *size; + let s = *bsize; + if (s >= n) { + return p; + }; + + if (n < bin2size(len(bins) - 1) && size2bin(n) == size2bin(s)) { + return p; + }; + + let new = malloc(n); + if (new != null) { + memcpy(new: *void, p, s); + free(p); + }; + + return new; +}; diff --git a/rt/memcpy.ha b/rt/memcpy.ha @@ -0,0 +1,6 @@ +export fn memcpy(dest: *void, src: *void, amt: size) void = { + let a = dest: *[*]u8, b = src: *[*]u8; + for (let i = 0z; i < amt; i += 1) { + a[i] = b[i]; + }; +}; diff --git a/rt/memset.ha b/rt/memset.ha @@ -0,0 +1,6 @@ +export fn memset(dest: *void, val: u8, amt: size) void = { + let a = dest: *[*]u8; + for (let i = 0z; i < amt; i += 1) { + a[i] = val; + }; +}; diff --git a/rt/start.ha b/rt/start.ha @@ -0,0 +1,24 @@ +@symbol("main") fn main() void; + +const @symbol("__init_array_start") init_start: [*]*fn() void; +const @symbol("__init_array_end") init_end: [*]*fn() void; +const @symbol("__fini_array_start") fini_start: [*]*fn() void; +const @symbol("__fini_array_end") fini_end: [*]*fn() void; + +export @noreturn fn start_ha() void = { + const ninit = (&init_end: uintptr - &init_start: uintptr): size + / size(*fn() void); + for (let i = 0z; i < ninit; i += 1) { + init_start[i](); + }; + + main(); + + const nfini = (&fini_end: uintptr - &fini_start: uintptr): size + / size(*fn() void); + for (let i = 0z; i < nfini; i += 1) { + fini_start[i](); + }; + + exit(0); +}; diff --git a/rt/strcmp.ha b/rt/strcmp.ha @@ -0,0 +1,18 @@ +type string = struct { + data: *[*]u8, + length: size, + capacity: size, +}; + +export fn strcmp(_a: str, _b: str) bool = { + if (len(_a) != len(_b)) { + return false; + }; + let a = (&_a: *string).data, b = (&_b: *string).data; + for (let i = 0z; i < len(_a); i += 1) { + if (a[i] != b[i]) { + return false; + }; + }; + return true; +}; diff --git a/slice/reverse.ha b/slice/reverse.ha @@ -0,0 +1,22 @@ +// Reverses a slice. +export fn reverse(b: []void, membsz: size) void = { + if (len(b) == 0) { + return; + }; + let a = b: *[*]u8; + for (let s = 0z, e = len(b) - 1; s < e) { + for (let i = 0z; i < membsz; i += 1z) { + let z = a[s * membsz + i]; + a[s * membsz + i] = a[e * membsz + i]; + a[e * membsz + i] = z; + }; + s += 1; + e -= 1; + }; +}; + +@test fn reverse() void = { + let a: []int = [1, 2, 3, 4]; + reverse(a, size(int)); + assert(a[0] == 4 && a[1] == 3 && a[2] == 2 && a[3] == 1); +}; diff --git a/sort/+test.ha b/sort/+test.ha @@ -0,0 +1,17 @@ +fn ncmp(a: const *void, b: const *void) int = { + let a = a: const *int, b = b: const *int; + return *a - *b; +}; + +@test fn search() void = { + const nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + for (let i = 0z; i < len(nums); i += 1) { + const key = nums[i]; + match (search(nums[..], size(int), &key, &ncmp): nullable *int) { + null => abort(), + p: *int => assert(p == &nums[i] && *p == nums[i]), + }; + }; + const key = 1337; + assert(search(nums[..], size(int), &key, &ncmp) == null); +}; diff --git a/sort/search.ha b/sort/search.ha @@ -0,0 +1,26 @@ +// Performs a binary search over a sorted slice. 'in' shall be the sorted slice, +// and 'sz' shall be the size of each array member. The 'cmp' function will be +// called with the key value and an array member, and shall return a integer +// less than, equal to, or greater than zero if the key is, respectively, less +// than, equal to, or greater than the array member. +export fn search( + in: []void, + sz: size, + key: const *void, + cmp: *fn(a: const *void, b: const *void) int, +) nullable *void = { + let ba = in: *[*]u8; + for (let nmemb = len(in); nmemb > 0) { + let v = &ba[nmemb / 2 * sz]; + let r = cmp(key, v); + if (r < 0) { + nmemb /= 2; + } else if (r > 0) { + ba = (v: uintptr + sz: uintptr): *[*]u8; + nmemb -= nmemb / 2 + 1; + } else { + return v; + }; + }; + return null; +}; diff --git a/strconv/+test/stoi.ha b/strconv/+test/stoi.ha @@ -0,0 +1,55 @@ +fn is_invalid_i64(value: (i64 | invalid | overflow)) bool = { + return match (value) { + invalid => true, + * => false, + }; +}; + +fn is_overflow_i64(value: (i64 | invalid | overflow)) bool = { + return match (value) { + overflow => true, + * => false, + }; +}; + +fn is_number_i64(n: i64, value: (i64 | invalid | overflow)) bool = { + return match (value) { + v: i64 => v == n, + * => false, + }; +}; + +fn is_overflow_i32(value: (i32 | invalid | overflow)) bool = { + return match (value) { + overflow => true, + * => false, + }; +}; + +fn is_number_i32(n: i32, value: (i32 | invalid | overflow)) bool = { + return match (value) { + v: i32 => v == n, + * => false, + }; +}; + +@test fn stoi() void = { + assert(is_invalid_i64(stoi64(""))); + assert(is_invalid_i64(stoi64("abc"))); + assert(is_invalid_i64(stoi64("1a"))); + + assert(is_overflow_i64(stoi64("9223372036854775808"))); + assert(is_overflow_i64(stoi64("-9223372036854775809"))); + + assert(is_number_i64(0i64, stoi64("0"))); + assert(is_number_i64(1i64, stoi64("1"))); + assert(is_number_i64(-1i64, stoi64("-1"))); + assert(is_number_i64(9223372036854775807i64, stoi64("9223372036854775807"))); + assert(is_number_i64(-9223372036854775808i64, stoi64("-9223372036854775808"))); + + assert(is_overflow_i32(stoi32("2147483648"))); + assert(is_overflow_i32(stoi32("-2147483649"))); + + assert(is_number_i32(2147483647i32, stoi32("2147483647"))); + assert(is_number_i32(-2147483648i32, stoi32("-2147483648"))); +}; diff --git a/strconv/+test/stou.ha b/strconv/+test/stou.ha @@ -0,0 +1,41 @@ +fn is_invalid64(value: (u64 | invalid | overflow)) bool = { + return match (value) { + invalid => true, + * => false, + }; +}; + +fn is_overflow64(value: (u64 | invalid | overflow)) bool = { + return match (value) { + overflow => true, + * => false, + }; +}; + +fn is_number64(n: u64, value: (u64 | invalid | overflow)) bool = { + return match (value) { + v: u64 => v == n, + * => false, + }; +}; + +@test fn stou() void = { + assert(is_invalid64(stou64(""))); + assert(is_invalid64(stou64("abc"))); + assert(is_invalid64(stou64("1a"))); + assert(is_invalid64(stou64("-1"))); + + assert(is_overflow64(stou64("18446744073709551616"))); + assert(is_overflow64(stou64("184467440737095516150"))); + + assert(is_number64(0u64, stou64("0"))); + assert(is_number64(1u64, stou64("1"))); + assert(is_number64(18446744073709551615u64, stou64("18446744073709551615"))); +}; + +@test fn stoub() void = { + assert(is_number64(0x7fu64, stou64b("7f", 16u))); + assert(is_number64(0x7fu64, stou64b("7F", 16u))); + assert(is_number64(0o37u64, stou64b("37", 8u))); + assert(is_number64(0b110101u64, stou64b("110101", 2u))); +}; diff --git a/strconv/itos.ha b/strconv/itos.ha @@ -0,0 +1,107 @@ +use bytes; +use types; +use strings; + +// Converts an i64 to a string in the given base. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn i64tosb(i: i64, b: base) const str = { + static assert(types::I64_MAX == 9223372036854775807); + if (i >= 0) return u64tosb(i: u64, b); + + static let buf: [66]u8 = [0...]; // 64 binary digits plus NUL and - + buf = [0...]; + + let s = types::string { data = &buf, ... }; + + buf[0] = '-': u32: u8; + s.length = 1; + + let u = strings::to_utf8(u64tosb((-i): u64, b)); + assert(len(u) + 1 < len(buf)); + + bytes::copy(buf[1..len(u) + 1], u); + s.length += len(u); + + return *(&s: *str); +}; + +// Converts a i32 to a string in the given base. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn i32tosb(i: i32, b: base) const str = i64tosb(i, b); + +// Converts a i16 to a string in the given base. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn i16tosb(i: i16, b: base) const str = i64tosb(i, b); + +// Converts a i8 to a string in the given base. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn i8tosb(i: i8, b: base) const str = i64tosb(i, b); + +// Converts an int to a string in the given base. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn itosb(i: int, b: base) const str = i64tosb(i, b); + +// Converts a i64 to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn i64tos(i: i64) const str = i64tosb(i, base::DEC); + +// Converts a i32 to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn i32tos(i: i32) const str = i64tos(i); + +// Converts a i16 to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn i16tos(i: i16) const str = i64tos(i); + +// Converts a i8 to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn i8tos(i: i8) const str = i64tos(i); + +// Converts a int to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn itos(i: int) const str = i64tos(i); + +@test fn itosb() void = { + assert("11010" == i64tosb(0b11010, base::BIN)); + assert("1234567" == i64tosb(0o1234567, base::OCT)); + assert("123456789" == i64tosb(123456789, base::DEC)); + assert("123456789ABCDEF" == i64tosb(0x123456789ABCDEF, base::HEX)); + assert("123456789ABCDEF" == i64tosb(0x123456789ABCDEF, base::HEX_UPPER)); + assert("123456789abcdef" == i64tosb(0x123456789ABCDEF, base::HEX_LOWER)); + assert("-1000000000000000000000000000000000000000000000000000000000000000" + == i64tosb(types::I64_MIN, base::BIN)); +}; + +@test fn itos() void = { + const samples: [_]i64 = [ + 1234, + 4321, + -1337, + 0, + types::I64_MAX, + types::I64_MIN, + ]; + const expected = [ + "1234", + "4321", + "-1337", + "0", + "9223372036854775807", + "-9223372036854775808", + ]; + + for (let i = 0z; i < len(samples); i += 1) { + const s = i64tos(samples[i]); + assert(s == expected[i]); + }; +}; diff --git a/strconv/numeric.ha b/strconv/numeric.ha @@ -0,0 +1,96 @@ +use types; + +// Converts any [types::signed] to a string in a given base. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn signedtosb(n: types::signed, b: base) const str = { + return match (n) { + i: int => itosb(i, b), + i: i8 => i8tosb(i, b), + i: i16 => i16tosb(i, b), + i: i32 => i32tosb(i, b), + i: i64 => i64tosb(i, b), + }; +}; + +// Converts any [types::signed] to a string in base 10. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn signedtos(n: types::signed) const str = signedtosb(n, base::DEC); + +// Converts any [types::unsigned] to a string in a given base. The return value +// is statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn unsignedtosb(n: types::unsigned, b: base) const str = { + return match (n) { + u: size => ztosb(u, b), + u: uint => utosb(u, b), + u: u8 => u8tosb(u, b), + u: u16 => u16tosb(u, b), + u: u32 => u32tosb(u, b), + u: u64 => u64tosb(u, b), + }; +}; + +// Converts any [types::unsigned] to a string in base 10. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn unsignedtos(n: types::unsigned) const str = unsignedtosb(n, base::DEC); + +// Converts any [types::integer] to a string in a given base, which must be 2, +// 8, 10, or 16. The return value is statically allocated and will be +// overwritten on subsequent calls; see [strings::dup] to duplicate the result. +export fn integertosb(n: types::integer, b: base) const str = { + return match (n) { + s: types::signed => signedtosb(s, b), + u: types::unsigned => unsignedtosb(u, b), + }; +}; + +// Converts any [types::integer] to a string in base 10. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn integertos(n: types::integer) const str = integertosb(n, base::DEC); + +// Converts any [types::floating] to a string in a given base. The return value +// is statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn floatingtosb(n: types::floating, b: base) const str = { + abort(); // TODO +}; + +// Converts any [types::floating] to a string in base 10. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn floatingtos(n: types::floating) const str = floatingtosb(n, base::DEC); + +// Converts any [types::numeric] to a string in a given base. The return value +// is statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn numerictosb(n: types::numeric, b: base) const str = { + return match (n) { + i: types::integer => integertosb(i, b), + f: types::floating => floatingtosb(f, b), + }; +}; + +// Converts any [types::numeric] to a string in base 10. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn numerictos(n: types::numeric) const str = numerictosb(n, base::DEC); + +@test fn numeric() void = { + const cases: [_]types::numeric = [ + 42u8, 1337u16, 1337u32, 1337u64, 42i8, -42i8, 1337i16, -1337i16, + 1337i32, -1337i32, 1337i64, -1337i64, 1337i, -1337i, 1337u, + -1337i, + ]; + const expected = [ + "42", "1337", "1337", "1337", "42", "-42", "1337", "-1337", + "1337", "-1337", "1337", "-1337", "1337", "-1337", "1337", + "-1337", + ]; + for (let i = 0z; i < len(cases); i += 1) { + assert(numerictos(cases[i]) == expected[i]); + }; +}; diff --git a/strconv/stoi.ha b/strconv/stoi.ha @@ -0,0 +1,71 @@ +use types; +use strings; + +// Converts a string to an i64 in base 10. If the string contains any +// non-numeric characters, except '-' at the start, or if it's empty, +// [strconv::invalid] is returned. If the number is too large to be represented +// by an i64, [strconv::overflow] is returned. +export fn stoi64(s: str) (i64 | invalid | overflow) = { + if (len(s) == 0) return invalid; + let b = strings::to_utf8(s); + let sign = 1i64; + let max = types::I64_MAX: u64; + if (b[0] == '-': u32: u8) { + sign = -1; + max += 1; + }; + let u = if (sign < 0) stou64(strings::from_utf8_unsafe(b[1..])) + else stou64(s); + let n = u?; + if (n > max) { + return overflow; + }; + return n: i64 * sign; +}; + +// Converts a string to an i32 in base 10. If the string contains any +// non-numeric characters, except '-' at the start, or if it's empty, +// [strconv::invalid] is returned. If the number is too large to be represented +// by an i32, [strconv::overflow] is returned. +export fn stoi32(s: str) (i32 | invalid | overflow) = { + let n = stoi64(s)?; + if (n >= types::I32_MIN: i64 && n <= types::I32_MAX: i64) { + return n: i32; + }; + return overflow; +}; + +// Converts a string to an i16 in base 10. If the string contains any +// non-numeric characters, except '-' at the start, or if it's empty, +// [strconv::invalid] is returned. If the number is too large to be represented +// by an i16, [strconv::overflow] is returned. +export fn stoi16(s: str) (i16 | invalid | overflow) = { + let n = stoi64(s)?; + if (n >= types::I16_MIN: i64 && n <= types::I16_MAX: i64) { + return n: i16; + }; + return overflow; +}; + +// Converts a string to an i8 in base 10. If the string contains any +// non-numeric characters, except '-' at the start, or if it's empty, +// [strconv::invalid] is returned. If the number is too large to be represented +// by an i8, [strconv::overflow] is returned. +export fn stoi8(s: str) (i8 | invalid | overflow) = { + let n= stoi64(s)?; + if (n >= types::I8_MIN: i64 && n <= types::I8_MAX: i64) { + return n: i8; + }; + return overflow; +}; + +// Converts a string to an int in base 10. If the string contains any +// non-numeric characters, except '-' at the start, or if it's empty, +// [strconv::invalid] is returned. If the number is too large to be represented +// by an int, [strconv::overflow] is returned. +export fn stoi(s: str) (int | invalid | overflow) = { + static assert(size(int) == size(i32) || size(int) == size(i64)); + return + if (size(int) == size(i32)) stoi32(s)?: int + else stoi64(s)?: int; +}; diff --git a/strconv/stou.ha b/strconv/stou.ha @@ -0,0 +1,149 @@ +use strings; +use types; +use ascii; +use encoding::utf8; + +fn rune_to_integer(r: rune) (u64 | void) = { + if (ascii::isdigit(r)) + return (r: u32 - '0': u32): u64 + else if (ascii::isalpha(r) && ascii::islower(r)) + return (r: u32 - 'a': u32): u64 + 10 + else if (ascii::isalpha(r) && ascii::isupper(r)) + return (r: u32 - 'A': u32): u64 + 10; +}; + +// Converts a string to a u64 in the given base, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a u64, [strconv::overflow] is +// returned. Supported bases are 2, 8, 10 and 16. +export fn stou64b(s: str, base: uint) (u64 | invalid | overflow) = { + assert(base == 2 || base == 8 || base == 10 || base == 16); + + if (len(s) == 0) { + return invalid; + }; + + let n = 0z; + let iter = strings::iter(s); + for (true) { + let r: rune = match (strings::next(&iter)) { + void => break, + r: rune => r, + }; + + let digit = match (rune_to_integer(r)) { + void => return invalid, + d: u64 => d, + }; + + if (digit >= base: u64) return invalid; + + let old = n; + + n *= base; + n += digit; + + if (n < old) { + return overflow; + }; + }; + return n; +}; + +// Converts a string to a u32 in the given base, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a u32, [strconv::overflow] is +// returned. Supported bases are 2, 8, 10 and 16. +export fn stou32b(s: str, base: uint) (u32 | invalid | overflow) = { + let n = stou64b(s, base)?; + if (n <= types::U32_MAX: u64) { + return n: u32; + }; + return overflow; +}; + +// Converts a string to a u16 in the given base, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a u16, [strconv::overflow] is +// returned. Supported bases are 2, 8, 10 and 16. +export fn stou16b(s: str, base: uint) (u16 | invalid | overflow) = { + let n = stou64b(s, base)?; + if (n <= types::U16_MAX: u64) { + return n: u16; + }; + return overflow; +}; + +// Converts a string to a u8 in the given base, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a u8, [strconv::overflow] is +// returned. Supported bases are 2, 8, 10 and 16. +export fn stou8b(s: str, base: uint) (u8 | invalid | overflow) = { + let n = stou64b(s, base)?; + if (n <= types::U8_MAX: u64) { + return n: u8; + }; + return overflow; +}; + +// Converts a string to a uint in the given base, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a uint, [strconv::overflow] is +// returned. Supported bases are 2, 8, 10 and 16. +export fn stoub(s: str, base: uint) (uint | invalid | overflow) = { + static assert(size(uint) == size(u32) || size(uint) == size(u64)); + return + if (size(uint) == size(u32)) stou32b(s, base)?: uint + else stou64b(s, base)?: uint; +}; + +// Converts a string to a size in the given base, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a size, [strconv::overflow] is +// returned. Supported bases are 2, 8, 10 and 16. +export fn stozb(s: str, base: uint) (size | invalid | overflow) = { + static assert(size(size) == size(u32) || size(size) == size(u64)); + return if (size(size) == size(u32)) match (stou32b(s, base)) { + v: (invalid | overflow) => v, + n: u32 => n: size, + } else match (stou64b(s, base)) { + v: (invalid | overflow) => v, + n: u64 => n: size, + }; +}; + +// Converts a string to a u64 in base 10, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a u64, [strconv::overflow] is +// returned. +export fn stou64(s: str) (u64 | invalid | overflow) = stou64b(s, 10); + +// Converts a string to a u32 in base 10, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a u32, [strconv::overflow] is +// returned. +export fn stou32(s: str) (u32 | invalid | overflow) = stou32b(s, 10); + +// Converts a string to a u16 in base 10, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a u16, [strconv::overflow] is +// returned. +export fn stou16(s: str) (u16 | invalid | overflow) = stou16b(s, 10); + +// Converts a string to a u8 in base 10, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a u8, [strconv::overflow] is +// returned. +export fn stou8(s: str) (u8 | invalid | overflow) = stou8b(s, 10); + +// Converts a string to a uint in base 10, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a uint, [strconv::overflow] is +// returned. +export fn stou(s: str) (uint | invalid | overflow) = stoub(s, 10); + +// Converts a string to a u64 in base 10, If the string contains any +// non-numeric characters, or if it's empty, [strconv::invalid] is returned. If +// the number is too large to be represented by a u64, [strconv::overflow] is +// returned. +export fn stoz(s: str) (size | invalid | overflow) = stozb(s, 10); diff --git a/strconv/types.ha b/strconv/types.ha @@ -0,0 +1,22 @@ +// Indicates that the input string is not an integer +export type invalid = void!; + +// Indicates that the input number is too large to be represented by the +// requested data type +export type overflow = void!; + +// The valid numeric bases for numeric conversions. +export type base = enum uint { + // Base 2, binary + BIN = 2, + // Base 8, octal + OCT = 8, + // Base 10, decimal + DEC = 10, + // Base 16, UPPERCASE hexadecimal + HEX_UPPER = 16, + // Alias for HEX_UPPER + HEX = 16, + // Base 16, lowercase hexadecimal + HEX_LOWER = 17, +}; diff --git a/strconv/utos.ha b/strconv/utos.ha @@ -0,0 +1,135 @@ +use bytes; +use types; + +// Converts a u64 to a string in the given base. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn u64tosb(u: u64, b: base) const str = { + static assert(types::U64_MAX == 18446744073709551615); + static let buf: [65]u8 = [0...]; // 64 binary digits plus NUL + buf = [0...]; + + static const lut_upper = [ + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + ], lut_lower = [ + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', + ]; + const lut = if (b != base::HEX_LOWER) &lut_upper else { + b = base::HEX_UPPER; + &lut_lower; + }; + + let s = types::string { data = &buf, ... }; + if (u == 0) { + buf[s.length] = '0': u32: u8; + s.length += 1z; + }; + + for (u > 0u64) { + buf[s.length] = lut[u % b: u64]: u32: u8; + s.length += 1; + u /= b; + }; + + bytes::reverse(buf[..s.length]); + buf[s.length] = 0; + return *(&s: *str); +}; + +// Converts a u32 to a string in the given base. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn u32tosb(u: u32, b: base) const str = u64tosb(u, b); + +// Converts a u16 to a string in the given base. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn u16tosb(u: u16, b: base) const str = u64tosb(u, b); + +// Converts a u8 to a string in the given base. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn u8tosb(u: u8, b: base) const str = u64tosb(u, b); + +// Converts a uint to a string in the given base. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn utosb(u: uint, b: base) const str = u64tosb(u, b); + +// Converts a size to a string in the given base. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn ztosb(u: size, b: base) const str = u64tosb(u, b); + +// Converts a size to a string in the given base. The return value is +// statically allocated and will be overwritten on subsequent calls; see +// [strings::dup] to duplicate the result. +export fn uptrtosb(uptr: uintptr, b: base) const str = u64tosb(uptr: u64, b); + +// Converts a u64 to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn u64tos(u: u64) const str = u64tosb(u, base::DEC); + +// Converts a u32 to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn u32tos(u: u32) const str = u64tos(u); + +// Converts a u16 to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn u16tos(u: u16) const str = u64tos(u); + +// Converts a u8 to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn u8tos(u: u8) const str = u64tos(u); + +// Converts a uint to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn utos(u: uint) const str = u64tos(u); + +// Converts a size to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result, or [strconv::itosb] to pass your own string buffer. +export fn ztos(z: size) const str = u64tos(z); + +// Converts a uintptr to a string in base 10. The return value is statically +// allocated and will be overwritten on subsequent calls; see [strings::dup] to +// duplicate the result. +export fn uptrtos(uptr: uintptr) const str = u64tos(uptr: u64); + +@test fn utosb() void = { + assert("11010" == u64tosb(0b11010, base::BIN)); + assert("1234567" == u64tosb(0o1234567, base::OCT)); + assert("123456789" == u64tosb(123456789, base::DEC)); + assert("123456789ABCDEF" == u64tosb(0x123456789ABCDEF, base::HEX)); + assert("123456789ABCDEF" == u64tosb(0x123456789ABCDEF, base::HEX_UPPER)); + assert("123456789abcdef" == u64tosb(0x123456789ABCDEF, base::HEX_LOWER)); + assert("1111111111111111111111111111111111111111111111111111111111111111" + == u64tosb(types::U64_MAX, base::BIN)); +}; + +@test fn utos() void = { + const samples: [_]u64 = [ + 1234, + 4321, + types::U64_MIN, + types::U64_MAX, + ]; + const expected = [ + "1234", + "4321", + "0", + "18446744073709551615", + ]; + + for (let i = 0z; i < len(samples); i += 1) { + const s = u64tos(samples[i]); + assert(s == expected[i]); + }; +}; diff --git a/strings/concat.ha b/strings/concat.ha @@ -0,0 +1,36 @@ +// Concatenates two or more strings. The caller must free the return value. +export fn concat(strs: str...) str = { + let z = 0z; + for (let i = 0z; i < len(strs); i += 1) { + z += len(strs[i]); + }; + let new: []u8 = alloc([], z + 1); + for (let i = 0z; i < len(strs); i += 1) { + append(new, ...to_utf8(strs[i])); + }; + append(new, 0); + return from_utf8_unsafe(new[..z]); +}; + +@test fn concat() void = { + let s = concat("hello ", "world"); + assert(s == "hello world"); + assert((s: *const char: *[*]u8)[len(s)] == 0); + free(s); + + s = concat("hello", " ", "world"); + assert(s == "hello world"); + free(s); + + s = concat("hello", "", "world"); + assert(s == "helloworld"); + free(s); + + s = concat("", ""); + assert(s == ""); + free(s); + + s = concat(); + assert(s == ""); + free(s); +}; diff --git a/strings/contains.ha b/strings/contains.ha @@ -0,0 +1,15 @@ +use bytes; +use encoding::utf8; + +// Returns true if a string contains a rune or a sub-string. +export fn contains(haystack: str, needle: (str | rune)) bool = match (needle) { + s: str => bytes::contains(to_utf8(haystack), to_utf8(s)), + r: rune => bytes::contains(to_utf8(haystack), utf8::encode_rune(r)), +}; + +@test fn contains() void = { + assert(contains("hello world", "hello")); + assert(contains("hello world", "world")); + assert(contains("hello world", "")); + assert(!contains("hello world", "foobar")); +}; diff --git a/strings/cstrings.ha b/strings/cstrings.ha @@ -0,0 +1,51 @@ +use encoding::utf8; +use types; +use rt; + +let emptybuf: [1]u8 = [0]; + +// A C-compatible empty string. Empty Hare strings have a null pointer instead +// of containing only '\0', so a special string is needed for this case. +export let c_empty: *const char = &emptybuf: *[*]u8: *const char; + +// Computes the length of a NUL-terminated C string, in octets, in O(n). The +// computed length does not include the NUL terminator. +export fn c_strlen(cstr: *const char) size = { + const ptr = cstr: *[*]u8; + let ln = 0z; + for (ptr[ln] != 0; ln += 1) void; + return ln; +}; + +// Converts a C string to a Hare string in O(n), and does not check if it's +// valid UTF-8. +export fn from_c_unsafe(cstr: *const char) const str = { + const l = c_strlen(cstr); + const s = types::string { + data = cstr: *[*]u8, + length = l, + capacity = l, + }; + return *(&s: *const str); +}; + +// Converts a C string to a Hare string in O(n). If the string is not valid +// UTF-8, abort. +export fn from_c(cstr: *const char) const str = { + let s = from_c_unsafe(cstr); + assert(utf8::valid(s)); + return s; +}; + +// Converts a Hare string to a C string. The result is allocated, the caller +// must free it when they're done. +export fn to_c(s: const str) *char = { + let ptr = rt::malloc(len(s) + 1): nullable *[*]u8; + let ptr = match (ptr) { + null => abort("Out of memory"), + p: *[*]u8 => p, + }; + rt::memcpy(ptr, (&s: *types::string).data, len(s)); + ptr[len(s)] = 0; + return ptr: *char; +}; diff --git a/strings/dup.ha b/strings/dup.ha @@ -0,0 +1,41 @@ +use bytes; +use rt; +use types; + +// Duplicates a string. Aborts on allocation failure. +export fn dup(s: const str) str = { + const in = &s: *types::string; + const id = match (in.data) { + null => return "", // Empty string + b: *[*]u8 => b, + }; + let buf: *[*]u8 = match (rt::malloc(in.length + 1)) { + null => abort("Out of memory"), + v: *void => v, + }; + bytes::copy(buf[..in.length + 1z], id[..in.length + 1]); + let out = types::string { + data = buf, + length = in.length, + capacity = in.length, + }; + return *(&out: *str); +}; + +// Duplicates every string of a slice in place, returning the same slice with +// new strings. +export fn dup_all(s: []str) void = { + for (let i = 0z; i < len(s); i += 1) { + s[i] = strings::dup(s[i]); + }; +}; + +@test fn dup() void = { + let s = dup(""); + assert(s == ""); + free(s); + + s = dup("hello"); + assert(s == "hello"); + free(s); +}; diff --git a/strings/index.ha b/strings/index.ha @@ -0,0 +1,25 @@ +use bytes; + +// Returns the index of the first occurance of 'needle' in the 'haystack', or +// void if not present. The index returned is the rune-wise index, not the +// byte-wise index. +export fn index(haystack: str, needle: (str | rune)) (size | void) = { + return match (needle) { + r: rune => index_rune(haystack, r), + s: str => abort(), // TODO + }; +}; + +fn index_rune(s: str, r: rune) (size | void) = { + let iter = iter(s); + for (let i = 0z; true; i += 1) match (next(&iter)) { + n: rune => if (r == n) return i, + void => break, + }; +}; + +@test fn index() void = { + assert(index("hello world", 'w') as size == 6); + assert(index("こんにちは", 'ち') as size == 3); + assert(index("こんにちは", 'q') is void); +}; diff --git a/strings/iter.ha b/strings/iter.ha @@ -0,0 +1,105 @@ +use encoding::utf8; + +// An iterator which yields each rune from a string. +export type iterator = struct { + dec: utf8::decoder, + push: (rune | void), +}; + +// Initializes a string iterator, starting at the beginning of the string. +export fn iter(src: str) iterator = iterator { + dec = utf8::decode(src), + push = void, +}; + +// Initializes a string iterator, starting at the end of the string. +export fn riter(src: str) iterator = { + let ret = iterator { + dec = utf8::decode(src), + push = void, + }; + ret.dec.offs = len(src); + return ret; +}; + +// Get the next rune from an iterator, or void if there are none left. +export fn next(iter: *iterator) (rune | void) = { + match (iter.push) { + r: rune => { + iter.push = void; + return r; + }, + void => void, + }; + return match (utf8::next(&iter.dec)) { + r: rune => r, + void => void, + (utf8::more | utf8::invalid) => + abort("Invalid UTF-8 string (this should not happen)"), + }; +}; + +// Get the previous rune from an iterator, or void when at the start of the +// string. +export fn prev(iter: *iterator) (rune | void) = { + assert(iter.push is void); + return match (utf8::prev(&iter.dec)) { + r: rune => r, + void => void, + (utf8::more | utf8::invalid) => + abort("Invalid UTF-8 string (this should not happen)"), + }; +}; + +// Causes the next call to [next] to return the provided rune, effectively +// un-reading it. The next call using this iterator *must* be [next]; all other +// functions will cause the program to abort until the pushed rune is consumed. +// This does not modify the underlying string, and as such, subsequent calls to +// functions like [prev] or [iter_str] will behave as if push were never called. +export fn push(iter: *iterator, r: rune) void = { + assert(iter.push is void); + iter.push = r; +}; + +// Return a substring from the next rune to the end of the string. +export fn iter_str(iter: *iterator) str = { + assert(iter.push is void); + return from_utf8(iter.dec.src[iter.dec.offs..]); +}; + +@test fn iter() void = { + let s = iter("こんにちは"); + assert(prev(&s) is void); + const expected1 = ['こ', 'ん']; + for (let i = 0z; i < len(expected1); i += 1) { + match (next(&s)) { + r: rune => assert(r == expected1[i]), + void => abort(), + }; + }; + assert(iter_str(&s) == "にちは"); + assert(prev(&s) as rune == 'ん'); + const expected2 = ['ん', 'に', 'ち', 'は']; + for (let i = 0z; i < len(expected2); i += 1) { + match (next(&s)) { + r: rune => assert(r == expected2[i]), + void => abort(), + }; + }; + assert(next(&s) is void); + assert(next(&s) is void); + push(&s, 'q'); + assert(next(&s) as rune == 'q'); + assert(prev(&s) as rune == 'は'); + + s = riter("にちは"); + const expected3 = ['は', 'ち', 'に']; + for (let i = 0z; i< len(expected3); i += 1) { + match (prev(&s)) { + r: rune => assert(r == expected3[i]), + void => abort(), + }; + }; + assert(prev(&s) is void); + assert(next(&s) as rune == 'に'); +}; diff --git a/strings/sub.ha b/strings/sub.ha @@ -0,0 +1,51 @@ +use encoding::utf8; + +export type end = void; + +fn utf8_byte_len_bounded(iter: *iterator, end: size) size = { + let pos = 0z; + for (let i = 0z; i < end; i += 1) { + let r: rune = match (strings::next(iter)) { + void => break, + r: rune => r, + }; + + pos += utf8::runesz(r); + }; + return pos; +}; + +fn utf8_byte_len_unbounded(iter: *iterator) size = { + let pos = 0z; + for (true) { + let r: rune = match (strings::next(iter)) { + void => break, + r: rune => r, + }; + + pos += utf8::runesz(r); + }; + return pos; +}; + +// Returns a substring in the range [start, end - 1], where each argument is the +// index of the Nth rune. If the end argument is given as [strings::end], the +// end of the substring is the end of the original string. The lifetime of the +// substring is the same as that of the original string. +export fn sub(s: str, start: size, end: (size | end)) str = { + let iter = iter(s); + let starti = utf8_byte_len_bounded(&iter, start); + let endi = match (end) { + sz: size => starti + utf8_byte_len_bounded(&iter, sz - start), + end => starti + utf8_byte_len_unbounded(&iter), + }; + let bytes = to_utf8(s); + return from_utf8_unsafe(bytes[starti..endi]); +}; + +@test fn sub() void = { + assert(sub("a string", 2, end) == "string"); + assert(sub("a string", 0, 1) == "a"); + assert(sub("a string", 0, 3) == "a s"); + assert(sub("a string", 2, 8) == "string"); +}; diff --git a/strings/suffix.ha b/strings/suffix.ha @@ -0,0 +1,48 @@ +// Returns true if 'in' has the given prefix. +export fn has_prefix(in: str, prefix: str) bool = { + let a = to_utf8(in), b = to_utf8(prefix); + if (len(a) < len(b)) { + return false; + }; + for (let i = 0z; i < len(b); i += 1) { + if (a[i] != b[i]) { + return false; + }; + }; + return true; +}; + +@test fn prefix() void = { + assert(has_prefix("abcde", "abc")); + assert(has_prefix("abcde", "abcde")); + assert(has_prefix("abcde", "")); + assert(has_prefix("", "")); + assert(!has_prefix("abcde", "cde")); + assert(!has_prefix("abcde", "abcdefg")); + assert(!has_prefix("", "abc")); +}; + +// Returns true if 'in' has the given prefix. +export fn has_suffix(in: str, suff: str) bool = { + let a = to_utf8(in), b = to_utf8(suff); + if (len(a) < len(b)) { + return false; + }; + for (let i = 0z; i < len(b); i += 1) { + if (a[len(a) - len(b) + i] != b[i]) { + return false; + }; + }; + return true; +}; + +@test fn suffix() void = { + assert(has_suffix("abcde", "cde")); + assert(has_suffix("abcde", "abcde")); + assert(has_suffix("abcde", "")); + assert(has_suffix("", "")); + assert(has_suffix("abcde", "")); + assert(!has_suffix("abcde", "abc")); + assert(!has_suffix("abcde", "fabcde")); + assert(!has_suffix("", "abc")); +}; diff --git a/strings/tokenize.ha b/strings/tokenize.ha @@ -0,0 +1,120 @@ +use bytes; +use types; + +// The state for a tokenizer. +export type tokenizer = bytes::tokenizer; + +// Returns a tokenizer which yields sub-strings tokenized by a delimiter. +// +// let tok = strings::tokenize("hello, my name is drew", " "); +// assert(strings::token(tok) == "hello,"); +// assert(strings::token(tok) == "my"); +// assert(strings::token(tok) == "name"); +// assert(strings::remaining_tokens(tok) == "is drew"); +export fn tokenize(s: str, delim: str) tokenizer = + bytes::tokenize(to_utf8(s), to_utf8(delim)); + +// Returns the next string from a tokenizer, and advances the cursor. Returns +// void if there are no tokens left. +export fn next_token(s: *tokenizer) (str | void) = { + return match (bytes::next_token(s)) { + b: []u8 => from_utf8(b), + void => void, + }; +}; + +// Same as next_token(), but does not advance the cursor +export fn peek_token(s: *tokenizer) (str | void) = { + return match (bytes::peek_token(s)) { + b: []u8 => from_utf8(b), + void => void, + }; +}; + +// Returns the remainder of the string associated with a tokenizer, without doing +// any further tokenization. +export fn remaining_tokens(s: *tokenizer) str = { + return from_utf8(bytes::remaining_tokens(s)); +}; + +@test fn tokenize() void = { + let tok = tokenize("Hello, my name is drew", " "); + match (next_token(&tok)) { + s: str => assert(s == "Hello,"), + void => abort(), + }; + + match (next_token(&tok)) { + s: str => assert(s == "my"), + void => abort(), + }; + + match (peek_token(&tok)) { + s: str => assert(s == "name"), + void => abort(), + }; + + + match (next_token(&tok)) { + s: str => assert(s == "name"), + void => abort(), + }; + + assert(remaining_tokens(&tok) == "is drew"); + assert(peek_token(&tok) as str == "is"); + assert(remaining_tokens(&tok) == "is drew"); + + tok = tokenize("foo", "foo"); + + assert(peek_token(&tok) as str == ""); + assert(next_token(&tok) as str == ""); + + assert(peek_token(&tok) as str == ""); + assert(next_token(&tok) as str == ""); + + assert(peek_token(&tok) is void); + assert(next_token(&tok) is void); + + tok = tokenize("", "foo"); + assert(peek_token(&tok) is void); + assert(next_token(&tok) is void); +}; + +// Splits a string into tokens delimited by 'delim', returning a slice of up to +// N tokens. The caller must free this slice. The strings within the slice are +// borrowed from 'in', and needn't be freed - but should be [strings::dup_all]'d +// if they should outlive 'in'. +export fn splitN(in: str, delim: str, n: size) []str = { + let toks: []str = alloc([]); + let tok = tokenize(in, delim); + for (let i = 0z; i < n - 1z; i += 1) { + match (next_token(&tok)) { + s: str => append(toks, s), + void => return toks, + }; + }; + append(toks, remaining_tokens(&tok)); + return toks; +}; + +// Splits a string into tokens delimited by 'delim'. The caller must free the +// returned slice. The strings within the slice are borrowed from 'in', and +// needn't be freed - but must be [strings::dup_all]'d if they should outlive +// 'in'. +export fn split(in: str, delim: str) []str = splitN(in, delim, types::SIZE_MAX); + +@test fn split() void = { + const expected = ["Hello,", "my", "name", "is Drew"]; + const actual = splitN("Hello, my name is Drew", " ", 4z); + assert(len(expected) == len(actual)); + for (let i = 0z; i < len(expected); i += 1) { + assert(expected[i] == actual[i]); + }; + + const expected2 = ["Hello,", "my", "name", "is", "Drew"]; + const actual2 = split("Hello, my name is Drew", " "); + assert(len(expected2) == len(actual2)); + for (let i = 0z; i < len(expected2); i += 1) { + assert(expected2[i] == actual2[i]); + }; +}; diff --git a/strings/utf8.ha b/strings/utf8.ha @@ -0,0 +1,42 @@ +use encoding::utf8; +use types; + +// Converts a byte slice into a string WITHOUT checking that the byte slice is a +// valid UTF-8 string. +export fn from_utf8_unsafe(in: []u8) str = { + const s = types::string { + data = in: *[*]u8, + length = len(in), + capacity = len(in), + }; + return *(&s: *const str); +}; + +// Converts a byte slice into a string. Aborts if the bytes contain invalid +// UTF-8. To handle such an error without aborting, see +// [encoding::utf8::decode] instead. +export fn from_utf8(in: []u8) str = { + let s = from_utf8_unsafe(in); + assert(utf8::valid(s), "attempted to load invalid UTF-8 string"); + return s; +}; + +// Converts a byte slice into a string. If the slice contains invalid UTF-8 +// sequences, void is returned instead. +export fn try_from_utf8(in: []u8) (str | utf8::invalid) = { + let s = from_utf8_unsafe(in); + if (!utf8::valid(s)) { + return utf8::invalid; + }; + return s; +}; + +// Converts a string to a UTF-8 slice. +export fn to_utf8(in: str) []u8 = *(&in: *[]u8); + +@test fn utf8() void = { + assert(from_utf8([ + 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, + ]) == "hello world"); + assert(from_utf8([]) == ""); +}; diff --git a/strio/dynamic.ha b/strio/dynamic.ha @@ -0,0 +1,79 @@ +use io; +use strings; + +type dynamic_stream = struct { + stream: io::stream, + buf: []u8, +}; + +// Creates a write-only string stream using an allocated buffer for storage, for +// efficiently building strings. +// +// Calling [io::close] on this stream will free the buffer. Call [strio::finish] +// instead to free up resources associated with the stream, but transfer +// ownership of the buffer to the caller. +export fn dynamic() *io::stream = { + let s = alloc(dynamic_stream { + stream = io::stream { + name = "<strio::dynamic>", + writer = &dynamic_write, + closer = &dynamic_close, + ... + }, + buf = [], + }); + return &s.stream; +}; + +// Closes the stream without freeing the buffer, instead transferring ownership +// of it to the caller. +export fn finish(s: *io::stream) str = { + assert(s.writer == &dynamic_write, + "strio::finish called on non-strio stream"); + let s = s: *dynamic_stream; + let buf = s.buf; + free(s); + return strings::from_utf8(buf); +}; + +// Resets the buffer's length to zero, but keeps the allocated memory around for +// future writes. +export fn reset(s: *io::stream) (void | io::unsupported) = { + if (s.writer != &dynamic_write || s.closer != &dynamic_close) { + return io::unsupported; + }; + const s = s: *dynamic_stream; + s.buf = s.buf[..0]; +}; + +// Truncates the buffer, freeing memory associated with it and setting its +// length to zero. +export fn truncate(s: *io::stream) (void | io::unsupported) = { + if (s.writer != &dynamic_write || s.closer != &dynamic_close) { + return io::unsupported; + }; + let s = s: *dynamic_stream; + delete(s.buf[..]); +}; + +fn dynamic_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let s = s: *dynamic_stream; + append(s.buf, ...buf); + return len(buf); +}; + +fn dynamic_close(s: *io::stream) void = { + const s = s: *dynamic_stream; + free(s.buf); + free(s); +}; + +@test fn dynamic() void = { + let stream = dynamic(); + io::write(stream, strings::to_utf8("hello ")) as size; + io::write(stream, strings::to_utf8("world")) as size; + assert(string(stream) == "hello world"); + let s = finish(stream); + assert(s == "hello world"); + free(s); +}; diff --git a/strio/fixed.ha b/strio/fixed.ha @@ -0,0 +1,63 @@ +use io; +use strings; + +type fixed_stream = struct { + stream: io::stream, + buf: []u8, + cur: []u8, +}; + +// Creates a write-only string stream using the provided buffer for storage. +// The program aborts if writes would exceed the buffer's capacity. +export fn fixed(in: []u8) *io::stream = { + let s = alloc(fixed_stream { + stream = io::stream { + name = "<strio::fixed>", + writer = &fixed_write, + closer = &fixed_close, + ... + }, + buf = in, + cur = in, + }); + return &s.stream; +}; + +// Returns the current contents of the buffer as a string. Aborts the program if +// invalid UTF-8 has been written to the buffer. +export fn string(s: *io::stream) str = { + if (s.writer == &fixed_write) { + let stream = s: *fixed_stream; + const n = len(stream.buf) - len(stream.cur); + return strings::from_utf8(stream.buf[..n]); + } else if (s.writer == &dynamic_write) { + let s = s: *dynamic_stream; + let buf = s.buf; + return strings::from_utf8(buf); + } else { + abort("strio::string called on non-strio stream"); + }; +}; + +fn fixed_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let stream = s: *fixed_stream; + if (len(stream.cur) == 0) { + abort("strio::fixed buffer exceeded"); + }; + const n = if (len(buf) > len(stream.cur)) len(stream.cur) else len(buf); + stream.cur[..n] = buf[..n]; + stream.cur = stream.cur[n..]; + return n; +}; + +fn fixed_close(s: *io::stream) void = { + free(s); +}; + +@test fn fixed() void = { + static let buf: [1024]u8 = [0...]; + let stream = fixed(buf); + io::write(stream, strings::to_utf8("hello ")) as size; + io::write(stream, strings::to_utf8("world")) as size; + assert(string(stream) == "hello world"); +}; diff --git a/strio/ops.ha b/strio/ops.ha @@ -0,0 +1,110 @@ +use encoding::utf8; +use io; +use strings; + +// Appends zero or more strings to an [io::stream]. The stream needn't be a +// strio stream, but it's often efficient if it is. Returns the number of bytes +// written, or an error. +export fn concat(st: *io::stream, strs: str...) (size | io::error) = { + let n = 0z; + for (let i = 0z; i < len(strs); i += 1) { + let q = 0z; + let buf = strings::to_utf8(strs[i]); + for (q < len(buf)) { + let w = io::write(st, buf[q..])?; + n += w; + q -= w; + }; + }; + return n; +}; + +@test fn concat() void = { + let st = dynamic(); + defer io::close(st); + concat(st, "hello") as size; + concat(st, " ", "world") as size; + assert(string(st) == "hello world"); +}; + +// Joins several strings together by a delimiter and writes them to a stream. +// The stream needn't be a strio stream, but it's often more efficient if it is. +// Returns the number of bytes written, or an error. +export fn join(st: *io::stream, delim: str, strs: str...) (size | io::error) = { + let n = 0z; + let delim = strings::to_utf8(delim); + for (let i = 0z; i < len(strs); i += 1) { + let q = 0z; + let buf = strings::to_utf8(strs[i]); + for (q < len(buf)) { + let w = io::write(st, buf[q..])?; + n += w; + q -= w; + }; + if (i + 1 < len(strs)) { + let q = 0z; + for (q < len(delim)) { + let w = io::write(st, delim[q..])?; + n += w; + q -= w; + }; + }; + }; + return n; +}; + +@test fn join() void = { + let st = dynamic(); + defer io::close(st); + join(st, "::", "hello", "world") as size; + assert(string(st) == "hello::world"); + truncate(st); + join(st, "::") as size; + assert(string(st) == ""); + truncate(st); + join(st, "::", "foo") as size; + assert(string(st) == "foo"); +}; + +// Joins several strings together by a delimiter and writes them to a stream, in +// reverse order. The stream needn't be a strio stream, but it's often more +// efficient if it is. Returns the number of bytes written, or an error. +export fn rjoin(st: *io::stream, delim: str, strs: str...) (size | io::error) = { + let n = 0z; + let delim = strings::to_utf8(delim); + for (let i = len(strs); i > 0; i -= 1) { + let q = 0z; + let buf = strings::to_utf8(strs[i - 1]); + for (q < len(buf)) { + let w = io::write(st, buf[q..])?; + n += w; + q -= w; + }; + if (i - 1 > 0) { + let q = 0z; + for (q < len(delim)) { + let w = io::write(st, delim[q..])?; + n += w; + q -= w; + }; + }; + }; + return n; +}; + +@test fn rjoin() void = { + let st = dynamic(); + defer io::close(st); + rjoin(st, "::", "hello", "world") as size; + assert(string(st) == "world::hello"); + truncate(st); + rjoin(st, "::") as size; + assert(string(st) == ""); + truncate(st); + rjoin(st, "::", "foo") as size; + assert(string(st) == "foo"); +}; + +// Appends a rune to a stream. +export fn append_rune(st: *io::stream, r: rune) (size | io::error) = + io::write(st, utf8::encode_rune(r)); diff --git a/temp/+linux.ha b/temp/+linux.ha @@ -0,0 +1,68 @@ +use crypto::random; +use encoding::hex; +use fs; +use io; +use os; +use path; + +fn get_tmpdir() str = os::tryenv("TMPDIR", "/tmp"); + +// Creates an unnamed temporary file. The file may or may not have a name; not +// all systems support the creation of temporary inodes which are not linked to +// any directory. If it is necessary to create a real file, it will be removed +// when the stream is closed. +// +// The I/O mode must be either [io::mode::WRITE] or [io::mode::RDWR]. +// +// Only one variadic argument may be provided, if at all, to specify the mode of +// the new file. The default is 0o644. +export fn file( + iomode: io::mode, + mode: fs::mode... +) (*io::stream | fs::error) = { + assert(iomode == io::mode::WRITE || iomode == io::mode::RDWR); + assert(len(mode) == 0 || len(mode) == 1); + let fmode = if (len(mode) != 0) mode[0] else 0o644: fs::mode; + let oflags = fs::flags::TMPFILE | fs::flags::EXCL | fs::flags::CLOEXEC; + if (iomode == io::mode::RDWR) { + oflags |= fs::flags::RDWR; + } else { + oflags |= fs::flags::WRONLY; + }; + return match (os::create(get_tmpdir(), fmode, oflags)) { + err: fs::error => abort(), // TODO: Fall back to named file + s: *io::stream => s, + }; +}; + +// Creates a named temporary file. +// +// The I/O mode must be either [io::mode::WRITE] or [io::mode::RDWR]. +// +// Only one variadic argument may be provided, if at all, to specify the mode of +// the new file. The default is 0o644. +export fn named( + iomode: io::mode, + mode: fs::mode... +) (*io::stream | fs::error) = { + abort(); // TODO +}; + +// Creates a temporary directory. This function only guarantees that the +// directory will have a unique name and be placed in the system temp directory, +// but not that it will be removed automatically; the caller must remove it when +// they're done using it via [os::rmdir] or [os::rmdirall]. +// +// The caller must free the return value. +export fn dir() str = { + let buf: [8]u8 = [0...]; + random::buffer(buf[..]); + let name = hex::encode(buf); + defer free(name); + let path = path::join(get_tmpdir(), name); + match (os::mkdir(path)) { + err: fs::error => abort("Could not create temp directory"), + void => void, + }; + return path; +}; diff --git a/types/arch+aarch64.ha b/types/arch+aarch64.ha @@ -0,0 +1,23 @@ +// Minimum value which can be stored in an int type. +export def INT_MIN: int = I32_MIN; + +// Maximum value which can be stored in an int type. +export def INT_MAX: int = I32_MAX; + +// Minimum value which can be stored in a uint type +export def UINT_MIN: uint = U32_MIN; + +// Maximum value which can be stored in a uint type. +export def UINT_MAX: uint = U32_MAX; + +// Minimum value which can be stored in a size type +export def SIZE_MIN: size = U64_MIN; + +// Maximum value which can be stored in a size type. +export def SIZE_MAX: size = U64_MAX; + +// Minimum value which can be stored in a uintptr type +export def UINTPTR_MIN: uintptr = U64_MIN: uintptr; + +// Maximum value which can be stored in a uintptr type. +export def UINTPTR_MAX: uintptr = U64_MAX: uintptr; diff --git a/types/arch+x86_64.ha b/types/arch+x86_64.ha @@ -0,0 +1,23 @@ +// Minimum value which can be stored in an int type. +export def INT_MIN: int = I32_MIN; + +// Maximum value which can be stored in an int type. +export def INT_MAX: int = I32_MAX; + +// Minimum value which can be stored in a uint type +export def UINT_MIN: uint = U32_MIN; + +// Maximum value which can be stored in a uint type. +export def UINT_MAX: uint = U32_MAX; + +// Minimum value which can be stored in a size type +export def SIZE_MIN: size = U64_MIN; + +// Maximum value which can be stored in a size type. +export def SIZE_MAX: size = U64_MAX; + +// Minimum value which can be stored in a uintptr type +export def UINTPTR_MIN: uintptr = U64_MIN: uintptr; + +// Maximum value which can be stored in a uintptr type. +export def UINTPTR_MAX: uintptr = U64_MAX: uintptr; diff --git a/types/classes.ha b/types/classes.ha @@ -0,0 +1,42 @@ +// A tagged union of all signed integer types. +export type signed = (i8 | i16 | i32 | i64 | int); + +// A tagged union of all unsigned integer types, excluding uintptr. +export type unsigned = (u8 | u16 | u32 | u64 | uint | size); + +// A tagged union of all integer types. +export type integer = (...signed | ...unsigned); + +// A tagged union of all floating point numeric types. +export type floating = (f32 | f64); + +// A tagged union of all numeric types. +export type numeric = (...integer | ...floating); + +// A type representing the internal structure of strings, useful for low-level +// string manipulation. +export type string = struct { + // UTF-8 encoded octets, plus a NUL terminator. + data: nullable *[*]u8, + + // The length capacity, in octets of UTF-8 data, not including the NUL + // terminator. + length: size, + + // The allocated capacity, in octets of UTF-8 data, not including the + // NUL terminator. + capacity: size, +}; + +// A type representing the internal structure of slices, useful for low-level +// slice manipulation. +export type slice = struct { + // The slice contents. + data: nullable *void, + + // The number of members of the slice. + length: size, + + // The allocated capacity (in members) of data. + capacity: size, +}; diff --git a/types/limits.ha b/types/limits.ha @@ -0,0 +1,54 @@ +// Minimum value which can be stored in an i8 type. +export def I8_MIN: i8 = -128; + +// Maximum value which can be stored in an i8 type. +export def I8_MAX: i8 = 127; + +// Minimum value which can be stored in an i16 type. +export def I16_MIN: i16 = -32708; + +// Maximum value which can be stored in an i16 type. +export def I16_MAX: i16 = 32707; + +// Minimum value which can be stored in an i32 type. +export def I32_MIN: i32 = -2147483648; + +// Maximum value which can be stored in an i32 type. +export def I32_MAX: i32 = 2147483647; + +// Minimum value which can be stored in an i64 type +export def I64_MIN: i64 = -9223372036854775808; + +// Maximum value which can be stored in an i64 type. +export def I64_MAX: i64 = 9223372036854775807; + + +// Minimum value which can be stored in a u8 type. +export def U8_MIN: u8 = 0; + +// Maximum value which can be stored in a u8 type. +export def U8_MAX: u8 = 255; + +// Minimum value which can be stored in a u16 type +export def U16_MIN: u16 = 0; + +// Maximum value which can be stored in a u16 type. +export def U16_MAX: u16 = 65535; + +// Minimum value which can be stored in a u32 type +export def U32_MIN: u32 = 0; + +// Maximum value which can be stored in a u32 type. +export def U32_MAX: u32 = 4294967295; + +// Minimum value which can be stored in a u64 type +export def U64_MIN: u64 = 0; + +// Maximum value which can be stored in a u64 type. +export def U64_MAX: u64 = 18446744073709551615; + +// Maximum value which can be stored in a rune. +export def RUNE_MIN: rune = U32_MIN: rune; + +// Maximum value which can be stored in a rune. +export def RUNE_MAX: rune = U32_MAX: rune;