commit 5cbed313b5f17f9d42b92bc56b165d5e10ad0e9b
parent 6a6c0ccbc966bcf6f16322d55018f762a38d13c5
Author: Byron Torres <b@torresjrjr.com>
Date: Fri, 30 Apr 2021 02:23:41 +0100
Add NestedText implementation and project files
Diffstat:
3 files changed, 227 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+tmp/
+go-nestedtext
diff --git a/go.mod b/go.mod
@@ -0,0 +1,3 @@
+module git.sr.ht/~torresjrjr/go-nestedtext
+
+go 1.16
diff --git a/nestedtext.go b/nestedtext.go
@@ -0,0 +1,222 @@
+package main
+
+import (
+ "bufio"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "regexp"
+ "strings"
+)
+
+var (
+ reLeadingWs = regexp.MustCompile(`^\s*`)
+ reComment = regexp.MustCompile(`^\s*(#.*)?$`)
+ reKeyValue = regexp.MustCompile(`^\s*[^:]*: .*$`)
+ reKeyEmpty = regexp.MustCompile(`^\s*[^:]*: ?$`)
+ reListValue = regexp.MustCompile(`^\s*- .*$`)
+ reListEmpty = regexp.MustCompile(`^\s*- ?$`)
+ reMultiline = regexp.MustCompile(`^\s*> ?.*$`)
+)
+
+type BlockType int
+
+const (
+ Unknown BlockType = iota
+ Dict
+ List
+ Multiline
+)
+
+func main() {
+ DebugDumpJSON(UnmarshelNestedText(os.Stdin))
+}
+
+func UnmarshelNestedText(r io.Reader) interface{} {
+ scanner := bufio.NewScanner(r)
+ return parseNestedText(scanner, "", true)
+}
+
+func parseNestedText(scanner *bufio.Scanner, parentIndent string, root bool) interface{} {
+ blockType := Unknown
+ var dict map[string]interface{}
+ var list []interface{}
+ var multiline string
+
+ var localIndentDefined bool
+ var localIndent string
+ var skip bool
+
+ for {
+ if !skip {
+ if !scanner.Scan() {
+ break
+ }
+ } else {
+ skip = false
+ }
+
+ // fmt.Println()
+
+ rawLine := scanner.Text()
+
+ if reComment.MatchString(rawLine) {
+ // fmt.Println("COMMENT, SKIPPING")
+ continue
+ }
+
+ currentIndent := reLeadingWs.FindString(rawLine)
+ if !localIndentDefined {
+ localIndent = currentIndent
+ localIndentDefined = true
+ }
+
+ // fmt.Printf("parentIndent: |%s|\n", parentIndent)
+ // fmt.Printf("localIndent: |%s|\n", localIndent)
+ // fmt.Printf("localIndent: |%s|\n", currentIndent)
+
+ if (currentIndent != localIndent) ||
+ !strings.HasPrefix(localIndent, parentIndent) {
+ // indented leftwards, node has finished.
+ // fmt.Println("RETURNING")
+ switch blockType {
+ case Dict:
+ return dict
+ case List:
+ return list
+ case Multiline:
+ return multiline
+ default:
+ return nil
+ }
+ }
+
+ // match for line types
+ line := strings.TrimPrefix(rawLine, localIndent)
+
+ // fmt.Println("DEBUG: rawLine |" + rawLine)
+ // fmt.Println("DEBUG: line |" + line)
+
+ if reKeyValue.MatchString(line) {
+ if blockType == Unknown {
+ blockType = Dict
+ dict = make(map[string]interface{})
+ } else if blockType != Dict {
+ // fmt.Println("ERROR: mixed line types in block")
+ return dict
+ }
+
+ // TODO optimise with regex?
+ ls := strings.Split(line, ": ")
+ key, v := ls[0], ls[1:]
+ key = strings.TrimSpace(key)
+ value := strings.Join(v, ": ")
+ dict[key] = value
+
+ // DebugDumpJSON(dict)
+ continue
+ }
+ if reKeyEmpty.MatchString(line) {
+ if blockType == Unknown {
+ blockType = Dict
+ dict = make(map[string]interface{})
+ } else if blockType != Dict {
+ // fmt.Println("ERROR: mixed line types in block")
+ return dict
+ }
+
+ // fmt.Println("NESTING")
+ // TODO: Check if correct.
+ key := strings.TrimSuffix(line, ":")
+ key = strings.TrimSuffix(line, ": ")
+ key = strings.TrimSpace(key)
+
+ child := parseNestedText(scanner, localIndent, false)
+ dict[key] = child
+
+ // when parseNestedText returns, scanner will be
+ // at the first line of the nest block.
+ skip = true
+
+ // DebugDumpJSON(dict)
+ continue
+ }
+ if reListValue.MatchString(line) {
+ if blockType == Unknown {
+ blockType = List
+ list = []interface{}{}
+ } else if blockType != List {
+ // fmt.Println("ERROR: mixed line types in block")
+ return list
+ }
+
+ value := strings.TrimPrefix(line, "- ")
+ list = append(list, value)
+
+ // DebugDumpJSON(list)
+ continue
+ }
+ if reListEmpty.MatchString(line) {
+ if blockType == Unknown {
+ blockType = List
+ list = []interface{}{}
+ } else if blockType != List {
+ // fmt.Println("ERROR: mixed line types in block")
+ return list
+ }
+
+ // fmt.Println("NESTING")
+
+ child := parseNestedText(scanner, localIndent, false)
+ list = append(list, child)
+
+ // when parseNestedText returns, scanner will be
+ // at the first line of the nest block.
+ skip = true
+
+ // DebugDumpJSON(dict)
+ continue
+ }
+ if reMultiline.MatchString(line) {
+ if blockType == Unknown {
+ blockType = Multiline
+ } else if blockType != Multiline {
+ // fmt.Println("ERROR: mixed line types in block")
+ return multiline
+ }
+
+ oneline := ""
+ if line != ">" {
+ oneline = strings.TrimPrefix(line, "> ")
+ }
+ multiline += oneline + "\n"
+
+ // DebugDumpJSON(multiline)
+ continue
+ }
+ }
+
+ // fmt.Println("SCAN FINISHED")
+
+ if err := scanner.Err(); err != nil {
+ log.Println(err)
+ }
+
+ switch blockType {
+ case Dict:
+ return dict
+ case List:
+ return list
+ case Multiline:
+ return multiline
+ default:
+ return nil
+ }
+}
+
+func DebugDumpJSON(doc interface{}) {
+ jsonRepr, _ := json.MarshalIndent(doc, "", " ")
+ fmt.Println(string(jsonRepr))
+}