go-nestedtext

[go] NestedText in Go (alpha)
git clone https://git.torresjrjr.com/go-nestedtext.git
Log | Files | Refs | README | LICENSE

nestedtext.go (4650B)


      1 package main
      2 
      3 import (
      4 	"bufio"
      5 	"encoding/json"
      6 	"fmt"
      7 	"io"
      8 	"log"
      9 	"os"
     10 	"regexp"
     11 	"strings"
     12 )
     13 
     14 // TODO: Impl negative lookahead of ': ' for reKey{Value,Empty}
     15 
     16 var (
     17 	reLeadingWs = regexp.MustCompile(`^\s*`)
     18 	reComment   = regexp.MustCompile(`^\s*(#.*)?$`)
     19 	reKeyValue  = regexp.MustCompile(`^\s*[^:]*: .*$`)
     20 	reKeyEmpty  = regexp.MustCompile(`^\s*[^:]*:$`)
     21 	reListValue = regexp.MustCompile(`^\s*- .*$`)
     22 	reListEmpty = regexp.MustCompile(`^\s*- ?$`)
     23 	reMultiline = regexp.MustCompile(`^\s*>( .*)?$`)
     24 )
     25 
     26 type BlockType int
     27 
     28 const (
     29 	Unknown BlockType = iota
     30 	Dict
     31 	List
     32 	Multiline
     33 )
     34 
     35 func main() {
     36 	doc, err := UnmarshalNestedText(os.Stdin)
     37 	must(err)
     38 	jsonRepr, err := json.MarshalIndent(doc, "", "\t")
     39 	must(err)
     40 	fmt.Println(string(jsonRepr))
     41 }
     42 
     43 func UnmarshalNestedText(r io.Reader) (interface{}, error) {
     44 	scanner := bufio.NewScanner(r)
     45 	lineNumber := 0
     46 	return parseNestedText(scanner, &lineNumber, "", Unknown)
     47 }
     48 
     49 func parseNestedText(
     50 	scanner *bufio.Scanner, lineNum *int, parentIndent string, parentType BlockType,
     51 ) (interface{}, error) {
     52 	var blockType BlockType = Unknown
     53 	var dict map[string]interface{}
     54 	var list []interface{}
     55 	var multiline string
     56 
     57 	var localIndentDefined bool
     58 	var localIndent string
     59 	var skipNextScan bool
     60 
     61 	for {
     62 		if skipNextScan {
     63 			skipNextScan = false
     64 		} else {
     65 			ok := scanner.Scan()
     66 			if !ok {
     67 				break
     68 			}
     69 			*lineNum++
     70 		}
     71 
     72 		rawLine := scanner.Text()
     73 
     74 		if reComment.MatchString(rawLine) {
     75 			continue
     76 		}
     77 
     78 		currentIndent := reLeadingWs.FindString(rawLine)
     79 		if !localIndentDefined {
     80 			localIndent = currentIndent
     81 			localIndentDefined = true
     82 		} else if currentIndent != localIndent {
     83 			// indented leftwards, leaf has finished.
     84 			break
     85 		}
     86 
     87 		if parentType != Unknown && currentIndent == parentIndent {
     88 			// previous line was a list '-\n' or key ':\n' marker
     89 			// but this line is not the value of the previous line
     90 			// as it is on the same indentation level.
     91 			// As such, that list element or key's value should be
     92 			// interpreted as "".
     93 			return "", nil
     94 		}
     95 
     96 		// the effective current line
     97 		line := strings.TrimPrefix(rawLine, localIndent)
     98 
     99 		// determine the blocktype from the current linetype
    100 		errmsg := "ERROR: inconsistent linetypes - line %d: \"%s\""
    101 		switch {
    102 		case reKeyValue.MatchString(line) || reKeyEmpty.MatchString(line):
    103 			if blockType == Unknown {
    104 				blockType = Dict
    105 				dict = make(map[string]interface{})
    106 			} else if blockType != Dict {
    107 				err := fmt.Errorf(errmsg, *lineNum, line)
    108 				return nil, err
    109 			}
    110 		case reListValue.MatchString(line) || reListEmpty.MatchString(line):
    111 			if blockType == Unknown {
    112 				blockType = List
    113 				list = []interface{}{}
    114 			} else if blockType != List {
    115 				err := fmt.Errorf(errmsg, *lineNum, line)
    116 				return nil, err
    117 			}
    118 		case reMultiline.MatchString(line):
    119 			if blockType == Unknown {
    120 				blockType = Multiline
    121 			} else if blockType != Multiline {
    122 				err := fmt.Errorf(errmsg, *lineNum, line)
    123 				return nil, err
    124 			}
    125 		}
    126 
    127 		// parse line according to linetype, then continue to next cycle
    128 		switch {
    129 		case reKeyValue.MatchString(line):
    130 			// TODO optimise with regex?
    131 			ls := strings.Split(line, ": ")
    132 			key, v := ls[0], ls[1:]
    133 			key = strings.TrimSpace(key)
    134 			value := strings.Join(v, ": ")
    135 			dict[key] = value
    136 
    137 		case reKeyEmpty.MatchString(line):
    138 			var key string
    139 			switch {
    140 			case strings.HasSuffix(line, ":"):
    141 				key = strings.TrimSuffix(line, ":")
    142 			case strings.HasSuffix(line, ": "):
    143 				key = strings.TrimSuffix(line, ": ")
    144 			}
    145 			key = strings.TrimSpace(key)
    146 
    147 			child, err := parseNestedText(scanner, lineNum, localIndent, blockType)
    148 			if err != nil {
    149 				return nil, err
    150 			}
    151 			dict[key] = child
    152 			skipNextScan = true
    153 
    154 		case reListValue.MatchString(line):
    155 			value := strings.TrimPrefix(line, "- ")
    156 			list = append(list, value)
    157 
    158 		case reListEmpty.MatchString(line):
    159 			child, err := parseNestedText(scanner, lineNum, localIndent, blockType)
    160 			if err != nil {
    161 				return nil, err
    162 			}
    163 			list = append(list, child)
    164 			skipNextScan = true
    165 
    166 		case reMultiline.MatchString(line):
    167 			var oneline string
    168 			if line != ">" {
    169 				oneline = strings.TrimPrefix(line, "> ")
    170 			}
    171 			multiline += oneline + "\n"
    172 
    173 		default:
    174 			errmsg := "Internal error - unknown linetype of line %d"
    175 			return nil, fmt.Errorf(errmsg, *lineNum)
    176 		}
    177 	}
    178 
    179 	if err := scanner.Err(); err != nil {
    180 		log.Println(err)
    181 	}
    182 
    183 	switch blockType {
    184 	case Dict:
    185 		return dict, nil
    186 	case List:
    187 		return list, nil
    188 	case Multiline:
    189 		return multiline, nil
    190 	default:
    191 		return nil, fmt.Errorf("Internal error - line %d", *lineNum)
    192 	}
    193 }
    194 
    195 func must(err error) {
    196 	if err != nil {
    197 		fmt.Fprintln(os.Stderr, err)
    198 		os.Exit(1)
    199 	}
    200 }