docstr.ha (4805B)
1 // License: GPL-3.0 2 // (c) 2022 Alexey Yerin <yyp@disroot.org> 3 // (c) 2021 Drew DeVault <sir@cmpwn.com> 4 // (c) 2021 Ember Sawady <ecs@d2evs.net> 5 // (c) 2021 Thomas Bracht Laumann Jespersen <t@laumann.xyz> 6 // (c) 2022 Umar Getagazov <umar@handlerug.me> 7 use ascii; 8 use bufio; 9 use encoding::utf8; 10 use fmt; 11 use hare::ast; 12 use hare::parse; 13 use io; 14 use memio; 15 use strings; 16 17 type paragraph = void; 18 type text = str; 19 type reference = ast::ident; 20 type sample = str; 21 type listitem = void; 22 type token = (paragraph | text | reference | sample | listitem); 23 24 type docstate = enum { 25 PARAGRAPH, 26 TEXT, 27 LIST, 28 }; 29 30 type parser = struct { 31 src: bufio::stream, 32 state: docstate, 33 }; 34 35 fn parsedoc(in: io::handle) parser = { 36 static let buf: [4096]u8 = [0...]; 37 return parser { 38 src = bufio::init(in, buf[..], []), 39 state = docstate::PARAGRAPH, 40 }; 41 }; 42 43 fn scandoc(par: *parser) (token | void) = { 44 const rn = match (bufio::scanrune(&par.src)!) { 45 case let rn: rune => 46 yield rn; 47 case io::EOF => 48 return; 49 }; 50 51 bufio::unreadrune(&par.src, rn); 52 switch (par.state) { 53 case docstate::TEXT => 54 switch (rn) { 55 case '[' => 56 return scanref(par); 57 case => 58 return scantext(par); 59 }; 60 case docstate::LIST => 61 switch (rn) { 62 case '[' => 63 return scanref(par); 64 case '-' => 65 return scanlist(par); 66 case => 67 return scantext(par); 68 }; 69 case docstate::PARAGRAPH => 70 switch (rn) { 71 case ' ', '\t' => 72 return scansample(par); 73 case '-' => 74 return scanlist(par); 75 case => 76 return scantext(par); 77 }; 78 }; 79 }; 80 81 fn scantext(par: *parser) (token | void) = { 82 if (par.state == docstate::PARAGRAPH) { 83 par.state = docstate::TEXT; 84 return paragraph; 85 }; 86 // TODO: Collapse whitespace 87 const buf = memio::dynamic(); 88 for (true) { 89 const rn = match (bufio::scanrune(&par.src)!) { 90 case io::EOF => break; 91 case let rn: rune => 92 yield rn; 93 }; 94 switch (rn) { 95 case '[' => 96 bufio::unreadrune(&par.src, rn); 97 break; 98 case '\n' => 99 memio::appendrune(&buf, rn)!; 100 const rn = match (bufio::scanrune(&par.src)!) { 101 case io::EOF => break; 102 case let rn: rune => 103 yield rn; 104 }; 105 if (rn == '\n') { 106 par.state = docstate::PARAGRAPH; 107 break; 108 }; 109 bufio::unreadrune(&par.src, rn); 110 if (rn == '-' && par.state == docstate::LIST) { 111 break; 112 }; 113 case => 114 memio::appendrune(&buf, rn)!; 115 }; 116 }; 117 let result = memio::string(&buf)!; 118 if (len(result) == 0) { 119 return; 120 }; 121 return result: text; 122 }; 123 124 fn scanref(par: *parser) (token | void) = { 125 match (bufio::scanrune(&par.src)!) { 126 case io::EOF => 127 return; 128 case let rn: rune => 129 if (rn != '[') { 130 abort(); 131 }; 132 }; 133 match (bufio::scanrune(&par.src)!) { 134 case io::EOF => 135 return; 136 case let rn: rune => 137 if (rn != '[') { 138 bufio::unreadrune(&par.src, rn); 139 return strings::dup("["): text; 140 }; 141 }; 142 143 const buf = memio::dynamic(); 144 defer io::close(&buf)!; 145 // TODO: Handle invalid syntax here 146 for (true) { 147 match (bufio::scanrune(&par.src)!) { 148 case let rn: rune => 149 switch (rn) { 150 case ']' => 151 bufio::scanrune(&par.src) as rune; // ] 152 break; 153 case => 154 memio::appendrune(&buf, rn)!; 155 }; 156 case io::EOF => break; 157 }; 158 }; 159 let id = parse::identstr(memio::string(&buf)!) as ast::ident; 160 return id: reference; 161 }; 162 163 fn scansample(par: *parser) (token | void) = { 164 let nws = 0z; 165 for (true) { 166 match (bufio::scanrune(&par.src)!) { 167 case io::EOF => 168 return; 169 case let rn: rune => 170 switch (rn) { 171 case ' ' => 172 nws += 1; 173 case '\t' => 174 nws += 8; 175 case => 176 bufio::unreadrune(&par.src, rn); 177 break; 178 }; 179 }; 180 }; 181 if (nws <= 1) { 182 return scantext(par); 183 }; 184 185 let cont = true; 186 let buf = memio::dynamic(); 187 for (cont) { 188 const rn = match (bufio::scanrune(&par.src)!) { 189 case io::EOF => break; 190 case let rn: rune => 191 yield rn; 192 }; 193 switch (rn) { 194 case '\n' => 195 memio::appendrune(&buf, rn)!; 196 case => 197 memio::appendrune(&buf, rn)!; 198 continue; 199 }; 200 201 // Consume whitespace 202 for (let i = 0z; i < nws) { 203 match (bufio::scanrune(&par.src)!) { 204 case io::EOF => break; 205 case let rn: rune => 206 switch (rn) { 207 case ' ' => 208 i += 1; 209 case '\t' => 210 i += 8; 211 case '\n' => 212 memio::appendrune(&buf, rn)!; 213 i = 0; 214 case => 215 bufio::unreadrune(&par.src, rn); 216 cont = false; 217 break; 218 }; 219 }; 220 }; 221 }; 222 223 let buf = memio::string(&buf)!; 224 // Trim trailing newlines 225 buf = strings::rtrim(buf, '\n'); 226 return buf: sample; 227 }; 228 229 fn scanlist(par: *parser) (token | void) = { 230 match (bufio::scanrune(&par.src)!) { 231 case io::EOF => return void; 232 case let rn: rune => 233 if (rn != '-') { 234 abort(); 235 }; 236 }; 237 const rn = match (bufio::scanrune(&par.src)!) { 238 case io::EOF => return void; 239 case let rn: rune => 240 yield rn; 241 }; 242 if (rn != ' ') { 243 bufio::unreadrune(&par.src, rn); 244 return strings::dup("-"): text; 245 }; 246 par.state = docstate::LIST; 247 return listitem; 248 };