go-nestedtext

NestedText in Go (alpha)
Log | Files | Refs | README | LICENSE

commit 0a5905e8e6dda95a77cb337806aad4cdbc8116cc
parent a039c7123d3b752e5736add267890a3a64e09997
Author: Byron Torres <b@torresjrjr.com>
Date:   Sat,  1 May 2021 20:17:58 +0100

Improve error handling

Diffstat:
Mnestedtext.go | 181+++++++++++++++++++++++++++++++++----------------------------------------------
1 file changed, 75 insertions(+), 106 deletions(-)

diff --git a/nestedtext.go b/nestedtext.go @@ -11,6 +11,8 @@ import ( "strings" ) +// TODO: Impl negative lookahead of ': ' for reKey{Value,Empty} + var ( reLeadingWs = regexp.MustCompile(`^\s*`) reComment = regexp.MustCompile(`^\s*(#.*)?$`) @@ -18,7 +20,7 @@ var ( reKeyEmpty = regexp.MustCompile(`^\s*[^:]*: ?$`) reListValue = regexp.MustCompile(`^\s*- .*$`) reListEmpty = regexp.MustCompile(`^\s*- ?$`) - reMultiline = regexp.MustCompile(`^\s*> ?.*$`) + reMultiline = regexp.MustCompile(`^\s*>( .*)?$`) ) type BlockType int @@ -31,16 +33,23 @@ const ( ) func main() { - DebugDumpJSON(UnmarshelNestedText(os.Stdin)) + doc, err := UnmarshalNestedText(os.Stdin) + must(err) + jsonRepr, err := json.MarshalIndent(doc, "", "\t") + must(err) + fmt.Println(string(jsonRepr)) } -func UnmarshelNestedText(r io.Reader) interface{} { +func UnmarshalNestedText(r io.Reader) (interface{}, error) { scanner := bufio.NewScanner(r) - return parseNestedText(scanner, "", true) + lineNumber := 0 + return parseNestedText(scanner, "", &lineNumber, true) } -func parseNestedText(scanner *bufio.Scanner, parentIndent string, root bool) interface{} { - blockType := Unknown +func parseNestedText( + scanner *bufio.Scanner, parentIndent string, lineNum *int, root bool, +) (interface{}, error) { + var blockType BlockType = Unknown var dict map[string]interface{} var list []interface{} var multiline string @@ -54,16 +63,14 @@ func parseNestedText(scanner *bufio.Scanner, parentIndent string, root bool) int if !scanner.Scan() { break } + *lineNum++ } else { skip = false } - // fmt.Println() - rawLine := scanner.Text() if reComment.MatchString(rawLine) { - // fmt.Println("COMMENT, SKIPPING") continue } @@ -71,43 +78,46 @@ func parseNestedText(scanner *bufio.Scanner, parentIndent string, root bool) int if !localIndentDefined { localIndent = currentIndent localIndentDefined = true - } - - // fmt.Printf("parentIndent: |%s|\n", parentIndent) - // fmt.Printf("localIndent: |%s|\n", localIndent) - // fmt.Printf("localIndent: |%s|\n", currentIndent) - - if (currentIndent != localIndent) || + } else if (currentIndent != localIndent) || !strings.HasPrefix(localIndent, parentIndent) { - // indented leftwards, node has finished. - // fmt.Println("RETURNING") - switch blockType { - case Dict: - return dict - case List: - return list - case Multiline: - return multiline - default: - return nil - } + // indented leftwards, leaf has finished. + break } - // match for line types + // the effective current line line := strings.TrimPrefix(rawLine, localIndent) - // fmt.Println("DEBUG: rawLine |" + rawLine) - // fmt.Println("DEBUG: line |" + line) - - if reKeyValue.MatchString(line) { + // determine the blocktype from the current linetype + errmsg := "ERROR: inconsistent linetypes - line %d: \"%s\"" + switch { + case reKeyValue.MatchString(line) || reKeyEmpty.MatchString(line): if blockType == Unknown { blockType = Dict dict = make(map[string]interface{}) } else if blockType != Dict { - // fmt.Println("ERROR: mixed line types in block") - return dict + err := fmt.Errorf(errmsg, *lineNum, line) + return nil, err + } + case reListValue.MatchString(line) || reListEmpty.MatchString(line): + if blockType == Unknown { + blockType = List + list = []interface{}{} + } else if blockType != List { + err := fmt.Errorf(errmsg, *lineNum, line) + return nil, err + } + case reMultiline.MatchString(line): + if blockType == Unknown { + blockType = Multiline + } else if blockType != Multiline { + err := fmt.Errorf(errmsg, *lineNum, line) + return nil, err } + } + // parse line according to linetype, then continue to next cycle + switch { + case reKeyValue.MatchString(line): // TODO optimise with regex? ls := strings.Split(line, ": ") key, v := ls[0], ls[1:] @@ -115,108 +125,67 @@ func parseNestedText(scanner *bufio.Scanner, parentIndent string, root bool) int value := strings.Join(v, ": ") dict[key] = value - // DebugDumpJSON(dict) - continue - } - if reKeyEmpty.MatchString(line) { - if blockType == Unknown { - blockType = Dict - dict = make(map[string]interface{}) - } else if blockType != Dict { - // fmt.Println("ERROR: mixed line types in block") - return dict + case reKeyEmpty.MatchString(line): + var key string + switch { + case strings.HasSuffix(line, ":"): + key = strings.TrimSuffix(line, ":") + case strings.HasSuffix(line, ": "): + key = strings.TrimSuffix(line, ": ") } - - // fmt.Println("NESTING") - // TODO: Check if correct. - key := strings.TrimSuffix(line, ":") - key = strings.TrimSuffix(key, ": ") key = strings.TrimSpace(key) - child := parseNestedText(scanner, localIndent, false) + child, err := parseNestedText(scanner, localIndent, lineNum, false) + if err != nil { + return nil, err + } dict[key] = child - - // when parseNestedText returns, scanner will be - // at the first line of the nest block. skip = true - // DebugDumpJSON(dict) - continue - } - if reListValue.MatchString(line) { - if blockType == Unknown { - blockType = List - list = []interface{}{} - } else if blockType != List { - // fmt.Println("ERROR: mixed line types in block") - return list - } - + case reListValue.MatchString(line): value := strings.TrimPrefix(line, "- ") list = append(list, value) - // DebugDumpJSON(list) - continue - } - if reListEmpty.MatchString(line) { - if blockType == Unknown { - blockType = List - list = []interface{}{} - } else if blockType != List { - // fmt.Println("ERROR: mixed line types in block") - return list + case reListEmpty.MatchString(line): + child, err := parseNestedText(scanner, localIndent, lineNum, false) + if err != nil { + return nil, err } - - // fmt.Println("NESTING") - - child := parseNestedText(scanner, localIndent, false) list = append(list, child) - - // when parseNestedText returns, scanner will be - // at the first line of the nest block. skip = true - // DebugDumpJSON(dict) - continue - } - if reMultiline.MatchString(line) { - if blockType == Unknown { - blockType = Multiline - } else if blockType != Multiline { - // fmt.Println("ERROR: mixed line types in block") - return multiline - } - - oneline := "" + case reMultiline.MatchString(line): + var oneline string if line != ">" { oneline = strings.TrimPrefix(line, "> ") } multiline += oneline + "\n" - // DebugDumpJSON(multiline) - continue + default: + errmsg := "Internal error - unknown linetype of line %d" + return nil, fmt.Errorf(errmsg, &lineNum) } } - // fmt.Println("SCAN FINISHED") - if err := scanner.Err(); err != nil { log.Println(err) } switch blockType { case Dict: - return dict + return dict, nil case List: - return list + return list, nil case Multiline: - return multiline + return multiline, nil default: - return nil + return nil, fmt.Errorf("Internal error - line %d", &lineNum) } } -func DebugDumpJSON(doc interface{}) { - jsonRepr, _ := json.MarshalIndent(doc, "", " ") - fmt.Println(string(jsonRepr)) +func must(err error) { + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } }