Transform a JSON grammar into a CodeMirror syntax-highlight parser
A simple and light-weight ( ~ 17kB minified) CodeMirror add-on
to generate syntax-highlight parsers (codemirror modes) from a grammar specification in JSON format.
See also: ace-grammar , prism-grammar
###Contents
###Todo
Code Indentation is Codemirror default, looking for ways to add more elaborate indentation and code folding rules to the grammar specification. (maybe add "actions" to the grammar syntax part ?? )
###Features
- A grammar can extend other grammars (so arbitrary variations and dialects can be parsed more easily)
- Grammar includes: Style Model , Lex Model and Syntax Model (optional), plus a couple of settings (see examples)
- Grammar specification can be minimal (defaults will be used) (see example grammars)
- Grammar Syntax Model can enable highlight in a more context-specific way, plus detect possible syntax errors
- Grammar Syntax Model can contain recursive references (see /test/grammar-js-recursion.html)
- Generated highlight modes can support toggle comments and keyword autocompletion functionality if defined in the grammar
- Generated highlight modes can support lint-like syntax-annotation functionality generated from the grammar
- Generated parsers are optimized for speed and size
- Can generate a syntax-highlight parser from a grammar interactively and on-the-fly ( see example, http://foo123.github.io/examples/codemirror-grammar )
###How to use:
See working examples under /test folder.
An example for XML:
// 1. a partial xml grammar in simple JSON format
var xml_grammar = {
// prefix ID for regular expressions used in the grammar
"RegExpID" : "RegExp::",
"Extra" : {
"fold" : "xml"
},
//
// Style model
"Style" : {
// lang token type -> CodeMirror (style) tag
"commentBlock": "comment",
"metaBlock": "meta",
"atom": "atom",
"cdataBlock": "atom",
"startTag": "tag",
"endTag": "tag",
"autocloseTag": "tag",
"closeTag": "tag",
"attribute": "attribute",
"assignment": "operator",
"number": "number",
"number2": "number",
"string": "string"
},
//
// Lexical model
"Lex" : {
"commentBlock" : {
"type" : "comment",
"tokens" : [
// block comments
// start, end delims
[ "<!--", "-->" ]
]
},
"cdataBlock" : {
"type" : "block",
"tokens" : [
// cdata block
// start, end delims
[ "<![CDATA[", "]]>" ]
]
},
"metaBlock" : {
"type" : "block",
"tokens" : [
// meta block
// start, end delims
[ "RegExp::/<\\?[_a-zA-Z][\\w\\._\\-]*/", "?>" ]
]
},
// numbers, in order of matching
"number" : [
// floats
"RegExp::/\\d+\\.\\d*/",
"RegExp::/\\.\\d+/",
// integers
// decimal
"RegExp::/[1-9]\\d*(e[\\+\\-]?\\d+)?/",
// just zero
"RegExp::/0(?![\\dx])/"
],
// hex colors
"hexnumber" : "RegExp::/#[0-9a-fA-F]+/",
// strings
"string" : {
"type" : "block",
"multiline" : false,
"tokens" : [
// if no end given, end is same as start
[ "\"" ], [ "'" ]
]
},
// atoms
"atom" : [
"RegExp::/&[a-zA-Z][a-zA-Z0-9]*;/",
"RegExp::/&#[\\d]+;/",
"RegExp::/&#x[a-fA-F\\d]+;/"
],
// tag attributes
"attribute" : "RegExp::/[_a-zA-Z][_a-zA-Z0-9\\-]*/",
// tags
"closeTag" : ">",
"openTag" : {
// allow to match start/end tags
"push" : "TAG<$1>",
"tokens" : "RegExp::/<([_a-zA-Z][_a-zA-Z0-9\\-]*)/"
},
"autoCloseTag" : {
// allow to match start/end tags
"pop" : null,
"tokens" : "/>"
},
"endTag" : {
// allow to match start/end tags
"pop" : "TAG<$1>",
"tokens" : "RegExp::#</([_a-zA-Z][_a-zA-Z0-9\\-]*)>#"
}
},
//
// Syntax model (optional)
"Syntax" : {
"stringOrNumber" : {
"type" : "group",
"match" : "either",
"tokens" : [ "string", "number", "hexnumber" ]
},
"tagAttribute" : {
"type" : "group",
"match" : "all",
"tokens" : [ "attribute", "=", "stringOrNumber" ]
},
"tagAttributes" : {
"type" : "group",
"match" : "zeroOrMore",
"tokens" : [ "tagAttribute" ]
},
"closeOpenTag" : {
"type" : "group",
"match" : "either",
"tokens" : [ "closeTag", "autoCloseTag"]
},
// n-grams define syntax sequences
"tags" : {
"type" : "n-gram",
"tokens" :[
[ "openTag", "tagAttributes", "closeOpenTag" ],
[ "endTag" ]
]
},
},
// what to parse and in what order
"Parser" : [
"commentBlock",
"cdataBlock",
"metaBlock",
"tags",
"atom"
]
};
// 2. parse the grammar into a Codemirror syntax-highlight mode
var xml_mode = CodeMirrorGrammar.getMode(xml_grammar);
// 3. use it with Codemirror
CodeMirror.defineMode("xml", xml_mode);
var editor = CodeMirror.fromTextArea(document.getElementById("code"), {
mode: "xml",
lineNumbers: true,
indentUnit: 4,
indentWithTabs: false
});
Result:
###Other Examples:
####grammar annotations