nestedtext.go (4650B)
1 package main 2 3 import ( 4 "bufio" 5 "encoding/json" 6 "fmt" 7 "io" 8 "log" 9 "os" 10 "regexp" 11 "strings" 12 ) 13 14 // TODO: Impl negative lookahead of ': ' for reKey{Value,Empty} 15 16 var ( 17 reLeadingWs = regexp.MustCompile(`^\s*`) 18 reComment = regexp.MustCompile(`^\s*(#.*)?$`) 19 reKeyValue = regexp.MustCompile(`^\s*[^:]*: .*$`) 20 reKeyEmpty = regexp.MustCompile(`^\s*[^:]*:$`) 21 reListValue = regexp.MustCompile(`^\s*- .*$`) 22 reListEmpty = regexp.MustCompile(`^\s*- ?$`) 23 reMultiline = regexp.MustCompile(`^\s*>( .*)?$`) 24 ) 25 26 type BlockType int 27 28 const ( 29 Unknown BlockType = iota 30 Dict 31 List 32 Multiline 33 ) 34 35 func main() { 36 doc, err := UnmarshalNestedText(os.Stdin) 37 must(err) 38 jsonRepr, err := json.MarshalIndent(doc, "", "\t") 39 must(err) 40 fmt.Println(string(jsonRepr)) 41 } 42 43 func UnmarshalNestedText(r io.Reader) (interface{}, error) { 44 scanner := bufio.NewScanner(r) 45 lineNumber := 0 46 return parseNestedText(scanner, &lineNumber, "", Unknown) 47 } 48 49 func parseNestedText( 50 scanner *bufio.Scanner, lineNum *int, parentIndent string, parentType BlockType, 51 ) (interface{}, error) { 52 var blockType BlockType = Unknown 53 var dict map[string]interface{} 54 var list []interface{} 55 var multiline string 56 57 var localIndentDefined bool 58 var localIndent string 59 var skipNextScan bool 60 61 for { 62 if skipNextScan { 63 skipNextScan = false 64 } else { 65 ok := scanner.Scan() 66 if !ok { 67 break 68 } 69 *lineNum++ 70 } 71 72 rawLine := scanner.Text() 73 74 if reComment.MatchString(rawLine) { 75 continue 76 } 77 78 currentIndent := reLeadingWs.FindString(rawLine) 79 if !localIndentDefined { 80 localIndent = currentIndent 81 localIndentDefined = true 82 } else if currentIndent != localIndent { 83 // indented leftwards, leaf has finished. 84 break 85 } 86 87 if parentType != Unknown && currentIndent == parentIndent { 88 // previous line was a list '-\n' or key ':\n' marker 89 // but this line is not the value of the previous line 90 // as it is on the same indentation level. 91 // As such, that list element or key's value should be 92 // interpreted as "". 93 return "", nil 94 } 95 96 // the effective current line 97 line := strings.TrimPrefix(rawLine, localIndent) 98 99 // determine the blocktype from the current linetype 100 errmsg := "ERROR: inconsistent linetypes - line %d: \"%s\"" 101 switch { 102 case reKeyValue.MatchString(line) || reKeyEmpty.MatchString(line): 103 if blockType == Unknown { 104 blockType = Dict 105 dict = make(map[string]interface{}) 106 } else if blockType != Dict { 107 err := fmt.Errorf(errmsg, *lineNum, line) 108 return nil, err 109 } 110 case reListValue.MatchString(line) || reListEmpty.MatchString(line): 111 if blockType == Unknown { 112 blockType = List 113 list = []interface{}{} 114 } else if blockType != List { 115 err := fmt.Errorf(errmsg, *lineNum, line) 116 return nil, err 117 } 118 case reMultiline.MatchString(line): 119 if blockType == Unknown { 120 blockType = Multiline 121 } else if blockType != Multiline { 122 err := fmt.Errorf(errmsg, *lineNum, line) 123 return nil, err 124 } 125 } 126 127 // parse line according to linetype, then continue to next cycle 128 switch { 129 case reKeyValue.MatchString(line): 130 // TODO optimise with regex? 131 ls := strings.Split(line, ": ") 132 key, v := ls[0], ls[1:] 133 key = strings.TrimSpace(key) 134 value := strings.Join(v, ": ") 135 dict[key] = value 136 137 case reKeyEmpty.MatchString(line): 138 var key string 139 switch { 140 case strings.HasSuffix(line, ":"): 141 key = strings.TrimSuffix(line, ":") 142 case strings.HasSuffix(line, ": "): 143 key = strings.TrimSuffix(line, ": ") 144 } 145 key = strings.TrimSpace(key) 146 147 child, err := parseNestedText(scanner, lineNum, localIndent, blockType) 148 if err != nil { 149 return nil, err 150 } 151 dict[key] = child 152 skipNextScan = true 153 154 case reListValue.MatchString(line): 155 value := strings.TrimPrefix(line, "- ") 156 list = append(list, value) 157 158 case reListEmpty.MatchString(line): 159 child, err := parseNestedText(scanner, lineNum, localIndent, blockType) 160 if err != nil { 161 return nil, err 162 } 163 list = append(list, child) 164 skipNextScan = true 165 166 case reMultiline.MatchString(line): 167 var oneline string 168 if line != ">" { 169 oneline = strings.TrimPrefix(line, "> ") 170 } 171 multiline += oneline + "\n" 172 173 default: 174 errmsg := "Internal error - unknown linetype of line %d" 175 return nil, fmt.Errorf(errmsg, *lineNum) 176 } 177 } 178 179 if err := scanner.Err(); err != nil { 180 log.Println(err) 181 } 182 183 switch blockType { 184 case Dict: 185 return dict, nil 186 case List: 187 return list, nil 188 case Multiline: 189 return multiline, nil 190 default: 191 return nil, fmt.Errorf("Internal error - line %d", *lineNum) 192 } 193 } 194 195 func must(err error) { 196 if err != nil { 197 fmt.Fprintln(os.Stderr, err) 198 os.Exit(1) 199 } 200 }