go-nestedtext

NestedText in Go (alpha)
Log | Files | Refs | README | LICENSE

commit 5cbed313b5f17f9d42b92bc56b165d5e10ad0e9b
parent 6a6c0ccbc966bcf6f16322d55018f762a38d13c5
Author: Byron Torres <b@torresjrjr.com>
Date:   Fri, 30 Apr 2021 02:23:41 +0100

Add NestedText implementation and project files

Diffstat:
A.gitignore | 2++
Ago.mod | 3+++
Anestedtext.go | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 227 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,2 @@ +tmp/ +go-nestedtext diff --git a/go.mod b/go.mod @@ -0,0 +1,3 @@ +module git.sr.ht/~torresjrjr/go-nestedtext + +go 1.16 diff --git a/nestedtext.go b/nestedtext.go @@ -0,0 +1,222 @@ +package main + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "log" + "os" + "regexp" + "strings" +) + +var ( + reLeadingWs = regexp.MustCompile(`^\s*`) + reComment = regexp.MustCompile(`^\s*(#.*)?$`) + reKeyValue = regexp.MustCompile(`^\s*[^:]*: .*$`) + reKeyEmpty = regexp.MustCompile(`^\s*[^:]*: ?$`) + reListValue = regexp.MustCompile(`^\s*- .*$`) + reListEmpty = regexp.MustCompile(`^\s*- ?$`) + reMultiline = regexp.MustCompile(`^\s*> ?.*$`) +) + +type BlockType int + +const ( + Unknown BlockType = iota + Dict + List + Multiline +) + +func main() { + DebugDumpJSON(UnmarshelNestedText(os.Stdin)) +} + +func UnmarshelNestedText(r io.Reader) interface{} { + scanner := bufio.NewScanner(r) + return parseNestedText(scanner, "", true) +} + +func parseNestedText(scanner *bufio.Scanner, parentIndent string, root bool) interface{} { + blockType := Unknown + var dict map[string]interface{} + var list []interface{} + var multiline string + + var localIndentDefined bool + var localIndent string + var skip bool + + for { + if !skip { + if !scanner.Scan() { + break + } + } else { + skip = false + } + + // fmt.Println() + + rawLine := scanner.Text() + + if reComment.MatchString(rawLine) { + // fmt.Println("COMMENT, SKIPPING") + continue + } + + currentIndent := reLeadingWs.FindString(rawLine) + if !localIndentDefined { + localIndent = currentIndent + localIndentDefined = true + } + + // fmt.Printf("parentIndent: |%s|\n", parentIndent) + // fmt.Printf("localIndent: |%s|\n", localIndent) + // fmt.Printf("localIndent: |%s|\n", currentIndent) + + if (currentIndent != localIndent) || + !strings.HasPrefix(localIndent, parentIndent) { + // indented leftwards, node has finished. + // fmt.Println("RETURNING") + switch blockType { + case Dict: + return dict + case List: + return list + case Multiline: + return multiline + default: + return nil + } + } + + // match for line types + line := strings.TrimPrefix(rawLine, localIndent) + + // fmt.Println("DEBUG: rawLine |" + rawLine) + // fmt.Println("DEBUG: line |" + line) + + if reKeyValue.MatchString(line) { + if blockType == Unknown { + blockType = Dict + dict = make(map[string]interface{}) + } else if blockType != Dict { + // fmt.Println("ERROR: mixed line types in block") + return dict + } + + // TODO optimise with regex? + ls := strings.Split(line, ": ") + key, v := ls[0], ls[1:] + key = strings.TrimSpace(key) + value := strings.Join(v, ": ") + dict[key] = value + + // DebugDumpJSON(dict) + continue + } + if reKeyEmpty.MatchString(line) { + if blockType == Unknown { + blockType = Dict + dict = make(map[string]interface{}) + } else if blockType != Dict { + // fmt.Println("ERROR: mixed line types in block") + return dict + } + + // fmt.Println("NESTING") + // TODO: Check if correct. + key := strings.TrimSuffix(line, ":") + key = strings.TrimSuffix(line, ": ") + key = strings.TrimSpace(key) + + child := parseNestedText(scanner, localIndent, false) + dict[key] = child + + // when parseNestedText returns, scanner will be + // at the first line of the nest block. + skip = true + + // DebugDumpJSON(dict) + continue + } + if reListValue.MatchString(line) { + if blockType == Unknown { + blockType = List + list = []interface{}{} + } else if blockType != List { + // fmt.Println("ERROR: mixed line types in block") + return list + } + + value := strings.TrimPrefix(line, "- ") + list = append(list, value) + + // DebugDumpJSON(list) + continue + } + if reListEmpty.MatchString(line) { + if blockType == Unknown { + blockType = List + list = []interface{}{} + } else if blockType != List { + // fmt.Println("ERROR: mixed line types in block") + return list + } + + // fmt.Println("NESTING") + + child := parseNestedText(scanner, localIndent, false) + list = append(list, child) + + // when parseNestedText returns, scanner will be + // at the first line of the nest block. + skip = true + + // DebugDumpJSON(dict) + continue + } + if reMultiline.MatchString(line) { + if blockType == Unknown { + blockType = Multiline + } else if blockType != Multiline { + // fmt.Println("ERROR: mixed line types in block") + return multiline + } + + oneline := "" + if line != ">" { + oneline = strings.TrimPrefix(line, "> ") + } + multiline += oneline + "\n" + + // DebugDumpJSON(multiline) + continue + } + } + + // fmt.Println("SCAN FINISHED") + + if err := scanner.Err(); err != nil { + log.Println(err) + } + + switch blockType { + case Dict: + return dict + case List: + return list + case Multiline: + return multiline + default: + return nil + } +} + +func DebugDumpJSON(doc interface{}) { + jsonRepr, _ := json.MarshalIndent(doc, "", " ") + fmt.Println(string(jsonRepr)) +}