Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Formal ABNF (Augmented Backus-Naur Form) Grammar

ALPHA  = %x41-5A / %x61-7A
DIGIT  = %x30-39
DQUOTE = %x22
SP     = %x20
TAB    = %x09
CR     = %x0D
LF     = %x0A

; whitespace
WSP      = SP / TAB
line-end = CRLF / LF
blank    = WSP / line-end
comment  = "#" *(%x09 / %x20 / %x21-7E) line-end
ws       = *(blank / comment)  ; optional
mws      = 1*(blank / comment) ; mandatory

; the document
document = ws "{" ws *field ws "}" ws

; identifiers
identifier = identifier-begin *identifier-contents
identifier-begin = ALPHA / "_"
identifier-contents = ALPHA / DIGIT / "_" / "-" / "'"

; Field: { ident -> assign -> element -> termination }
field = identifier ws "=" ws element ws ";" ws

; Element: one of [string|multi-line string|list|map|number|boolean|null]
element = string / multiline-string / list / map / number / boolean / null

; Map: sequence of zero to any number of fields
map = "{" ws *field ws "}"

; List: sequence of zero to any number of elements
list = "[" ws [list-items] ws "]"
list-items = element *(mws element)

; String: Any valid UTF-8 text content enclosed within
;         double quotes, can span multiple lines, but are
;         not indentation aware like multi-line strins are.
string = DQUOTE *chars DQUOTE
chars = string-escape / string-literal
string-escape = "\" (DQUOTE / "\")
string-literal = %x09 / %x0A / %x0D / %x20-21 / %x23-5B / %x5D-7E / %x80-10FFFF

;; NOTES {multi-line string}:
;;   Escape sequences are initiated by the sequence `''\`, followed by
;;   any (single) valid UTF-8 character, sequences that are otherwise
;;   transformed include only the following:
;; ______________________________
;; | sequence | result          |
;; |__________|_________________|
;; | `''\n`   | line feed       |
;; | `''\r`   | carriage return |
;; | `''\t`   | tab character   |
;; |____________________________|
;;
;; Other than those displayed in this table are treated generically as
;; the raw character alone.

multiline-string  = "''" *multiline-chars "''"
multiline-chars   = multiline-escape / multiline-literal
multiline-escape  = "''" "\" UTF8-char
multiline-literal = %x09 / %x0A / %x0D / %x20-7E / %x80-10FFFF

UTF8-char = %x09 / %x0A / %x0D / %x20-7E    ; ASCII printable + whitespace
          / %xC2-DF %x80-BF                 ; 2-byte UTF-8
          / %xE0    %xA0-BF %x80-BF         ; 3-byte UTF-8 (E0)
          / %xE1-EC %x80-BF %x80-BF         ; 3-byte UTF-8 (E1–EC)
          / %xED    %x80-9F %x80-BF         ; 3-byte UTF-8 (ED, excl. surrogates)
          / %xEE-EF %x80-BF %x80-BF         ; 3-byte UTF-8 (EE–EF)
          / %xF0    %x90-BF %x80-BF %x80-BF ; 4-byte UTF-8 (F0)
          / %xF1-F3 %x80-BF %x80-BF %x80-BF ; 4-byte UTF-8 (F1–F3)
          / %xF4    %x80-8F %x80-BF %x80-BF ; 4-byte UTF-8 (F4, up to U+10FFFF)

; Numbers:
;; integers: optionally negated digit sequence
;; decimals: optionally negated digit sequence with one decimal point
number = ["-"] (decimal / integer)
integer = "0" / (non-zero *DIGIT)
decimal = [integer] "." 1*DIGIT
non-zero = %x31-39

;; NOTES {Boolean/Null}:
;;   It is not possible to clearly define (in a context-free grammar)
;;   that these are also valid when used as identifiers. This is highly
;;   discouraged in practice but should be noted.
boolean = "true" / "false"
null = "null"