hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 3ab2d595610b8c4d6a43423fb2566e272ae534ec
parent ff7b6c1a8eee0760b4dbbe94b5d04ca85d49f943
Author: Ember Sawady <ecs@d2evs.net>
Date:   Wed,  7 Jun 2023 05:54:39 +0000

Improve +libc argv/argc/envp initialization

Previously, we had libc run @init, which meant that we couldn't make use
of the argc and argv it passed into rt::start_ha and we needed to do
terrible hacks in rt::start_linux. While we're here, delete the
now-unnecessary os/+linux/environ+libc.ha, which was out of sync with
os/+linux/environ.ha anyways

Signed-off-by: Ember Sawady <ecs@d2evs.net>

Diffstat:
Mlinux/start+libc.ha | 2--
Dos/+linux/environ+libc.ha | 126-------------------------------------------------------------------------------
Drt/+linux/platformstart+libc.ha | 23-----------------------
Rrt/+linux/platformstart.ha -> rt/+linux/platformstart-libc.ha | 0
Mrt/hare+libc.sc | 6+++---
Mrt/start+libc.ha | 23++++++++++++++++++++---
Mrt/start+test+libc.ha | 24+++++++++++++++++++++---
Mscripts/gen-stdlib | 2+-
Mstdlib.mk | 4++--
9 files changed, 47 insertions(+), 163 deletions(-)

diff --git a/linux/start+libc.ha b/linux/start+libc.ha @@ -4,8 +4,6 @@ use rt; use format::elf; @init fn init_linux() void = { - rt::start_linux(); - let i = 0; for (rt::envp[i] != null) { i += 1; diff --git a/os/+linux/environ+libc.ha b/os/+linux/environ+libc.ha @@ -1,126 +0,0 @@ -// License: MPL-2.0 -// (c) 2022 Alexey Yerin <yyp@disroot.org> -use bytes; -use rt; -use strings; -use types::c; - -// The command line arguments provided to the program. By convention, the first -// member is usually the name of the program. -export let args: []str = []; - -// Statically allocate arg strings if there are few enough arguments, saves a -// syscall if we don't need it. -let args_static: [32]str = [""...]; - -@init fn init_environ() void = { - rt::start_linux(); - if (rt::argc < len(args_static)) { - args = args_static[..rt::argc]; - for (let i = 0z; i < rt::argc; i += 1) { - args[i] = c::tostr(rt::argv[i]: *const c::char)!; - }; - } else { - args = alloc([], rt::argc); - for (let i = 0z; i < rt::argc; i += 1) { - append(args, c::tostr(rt::argv[i]: *const c::char)!); - }; - }; - -}; - -@fini fn fini_environ() void = { - if (rt::argc >= len(args_static)) { - free(args); - }; - free(envp); -}; - -// Looks up an environment variable and returns its value, or void if unset. -export fn getenv(name: const str) (str | void) = { - const name_b = strings::toutf8(name); - for (let i = 0z; rt::envp[i] != null; i += 1) { - const item = rt::envp[i]: *[*]u8; - const ln = c::strlen(item: *c::char); - const eq: size = match (bytes::index(item[..ln], '=')) { - case void => - abort("Environment violates System-V invariants"); - case let i: size => - yield i; - }; - if (bytes::equal(name_b, item[..eq])) { - const ln = c::strlen(item: *const c::char); - return strings::fromutf8(item[eq+1..ln])!; - }; - }; -}; - -// Looks up an environment variable and returns its value, or a default value if -// unset. -export fn tryenv(name: const str, default: str) str = match (getenv(name)) { -case let s: str => - yield s; -case void => - yield default; -}; - -let envp: []str = []; - -// Returns a slice of the environment strings in the form KEY=VALUE. -export fn getenvs() []str = { - if (len(envp) != 0) { - return envp; - }; - for (let i = 0z; rt::envp[i] != null; i += 1) { - append(envp, c::tostr(rt::envp[i]: *const c::char)!); - }; - return envp; -}; - -let uts: rt::utsname = rt::utsname { ... }; -let uts_valid: bool = false; - -// Returns the host kernel name -export fn sysname() const str = { - if (!uts_valid) { - rt::uname(&uts) as void; - uts_valid = true; - }; - return c::tostr(&uts.sysname: *const c::char)!; -}; - -// Returns the host system hostname -export fn hostname() const str = { - if (!uts_valid) { - rt::uname(&uts) as void; - uts_valid = true; - }; - return c::tostr(&uts.nodename: *const c::char)!; -}; - -// Returns the host kernel version -export fn release() const str = { - if (!uts_valid) { - rt::uname(&uts) as void; - uts_valid = true; - }; - return c::tostr(&uts.release: *const c::char)!; -}; - -// Returns the host operating system version -export fn version() const str = { - if (!uts_valid) { - rt::uname(&uts) as void; - uts_valid = true; - }; - return c::tostr(&uts.version: *const c::char)!; -}; - -// Returns the host CPU architecture -export fn machine() const str = { - if (!uts_valid) { - rt::uname(&uts) as void; - uts_valid = true; - }; - return c::tostr(&uts.machine: *const c::char)!; -}; diff --git a/rt/+linux/platformstart+libc.ha b/rt/+linux/platformstart+libc.ha @@ -1,23 +0,0 @@ -// License: MPL-2.0 -// (c) 2021-2022 Alexey Yerin <yyp@disroot.org> - -export fn start_linux() void = { - // Here we use a cool strategy of re-constructing argv and argc without - // knowing their original values. Since environ is placed just after - // them, it's possible to traverse backwards calculating how many - // entries were processed and comparing that value to the one at - // current position. - let argv_ptr = c_environ: uintptr - size(*u8): uintptr * 2; - let i = 0z; - for (*(argv_ptr: **u8): uintptr: size != i; i += 1) { - argv_ptr -= size(*u8): uintptr; - }; - - argc = i; - argv = (argv_ptr + size(*u8): uintptr): *[*]*u8; - envp = c_environ; -}; - -@init fn start_linux() void = start_linux(); - -let @symbol("environ") c_environ: *[*]nullable *u8; diff --git a/rt/+linux/platformstart.ha b/rt/+linux/platformstart-libc.ha diff --git a/rt/hare+libc.sc b/rt/hare+libc.sc @@ -14,10 +14,10 @@ SECTIONS { *(.rela.plt) } - .init_array : { - PROVIDE_HIDDEN (__init_array_start = .); + .libc_init_array : { + PROVIDE_HIDDEN (__libc_init_array_start = .); KEEP (*(.init_array)) - PROVIDE_HIDDEN (__init_array_end = .); + PROVIDE_HIDDEN (__libc_init_array_end = .); } .fini_array : { diff --git a/rt/start+libc.ha b/rt/start+libc.ha @@ -4,10 +4,19 @@ @symbol(".main") fn main() void; @symbol("exit") fn c_exit(status: int) void; +const @symbol("__libc_init_array_start") init_start: [*]*fn() void; +const @symbol("__libc_init_array_end") init_end: [*]*fn() void; const @symbol("__fini_array_start") fini_start: [*]*fn() void; const @symbol("__fini_array_end") fini_end: [*]*fn() void; -export fn init() void = void; +// Run all global initialization functions. +export fn init() void = { + const ninit = (&init_end: uintptr - &init_start: uintptr): size + / size(*fn() void); + for (let i = 0z; i < ninit; i += 1) { + init_start[i](); + }; +}; // Run all global finalization functions. export fn fini() void = { @@ -18,8 +27,16 @@ export fn fini() void = { }; }; -export @symbol("main") @noreturn fn start_ha() void = { - // libc runs @init and @fini for us (unless rt::exit() is called) +export @symbol("main") @noreturn fn start_ha(c_argc: int, c_argv: *[*]*u8) void = { + argc = c_argc: size; + argv = c_argv; + envp = c_envp; + // we deliberately prevent libc from running @init for us, in order to + // be able to initialize argc/argv/envp beforehand. we can still get + // away with just using libc for @fini though + init(); main(); c_exit(0); }; + +let @symbol("environ") c_envp: *[*]nullable *u8; diff --git a/rt/start+test+libc.ha b/rt/start+test+libc.ha @@ -3,11 +3,20 @@ @symbol("__test_main") fn test_main() size; -export fn init() void = void; - +const @symbol("__libc_init_array_start") init_start: [*]*fn() void; +const @symbol("__libc_init_array_end") init_end: [*]*fn() void; const @symbol("__fini_array_start") fini_start: [*]*fn() void; const @symbol("__fini_array_end") fini_end: [*]*fn() void; +// Run all global initialization functions. +export fn init() void = { + const ninit = (&init_end: uintptr - &init_start: uintptr): size + / size(*fn() void); + for (let i = 0z; i < ninit; i += 1) { + init_start[i](); + }; +}; + // Run all global finalization functions. export fn fini() void = { const nfini = (&fini_end: uintptr - &fini_start: uintptr): size @@ -17,7 +26,16 @@ export fn fini() void = { }; }; -export @symbol("main") fn main() int = { +export @symbol("main") fn start_ha(c_argc: int, c_argv: *[*]*u8) int = { + argc = c_argc: size; + argv = c_argv; + envp = c_envp; + // we deliberately prevent libc from running @init for us, in order to + // be able to initialize argc/argv/envp beforehand. we can still get + // away with just using libc for @fini though + init(); const nfail = test_main(); return if (nfail > 0) 1 else 0; }; + +let @symbol("environ") c_envp: *[*]nullable *u8; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -24,7 +24,7 @@ gensrcs_rt() { +linux/errno.ha \ +linux/types.ha \ +linux/segmalloc.ha \ - +linux/platformstart.ha \ + +linux/platformstart-libc.ha \ +linux/prctl.ha \ +linux/'+$(ARCH)'.ha \ +linux/syscallno+'$(ARCH)'.ha \ diff --git a/stdlib.mk b/stdlib.mk @@ -8,7 +8,7 @@ stdlib_rt_linux_srcs = \ $(STDLIB)/rt/+linux/errno.ha \ $(STDLIB)/rt/+linux/types.ha \ $(STDLIB)/rt/+linux/segmalloc.ha \ - $(STDLIB)/rt/+linux/platformstart.ha \ + $(STDLIB)/rt/+linux/platformstart-libc.ha \ $(STDLIB)/rt/+linux/prctl.ha \ $(STDLIB)/rt/+linux/+$(ARCH).ha \ $(STDLIB)/rt/+linux/syscallno+$(ARCH).ha \ @@ -2401,7 +2401,7 @@ testlib_rt_linux_srcs = \ $(STDLIB)/rt/+linux/errno.ha \ $(STDLIB)/rt/+linux/types.ha \ $(STDLIB)/rt/+linux/segmalloc.ha \ - $(STDLIB)/rt/+linux/platformstart.ha \ + $(STDLIB)/rt/+linux/platformstart-libc.ha \ $(STDLIB)/rt/+linux/prctl.ha \ $(STDLIB)/rt/+linux/+$(ARCH).ha \ $(STDLIB)/rt/+linux/syscallno+$(ARCH).ha \