Comments (5)
Here is an initial attempt to create a LL(1)
parser to cone
(following the code flow) and it's EBNF, using a variant of https://github.com/SSW-CocoR/CocoR-CPP .
COMPILER Cone
TERMINALS
TOKEN_REF
CHARACTERS
letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".
lowline = "_".
digit = "0123456789".
nzdigit = '1'..'9'.
oct = '0'..'7'.
noquote1 = ANY - "'".
noquote2 = ANY - '"'.
norbrack = ANY - ']'.
cr = '\r'.
lf = '\n'.
tab = '\t'.
notApo = ANY - '\'' - "\r\n".
stringCh = ANY - '"' - '\\' - cr - lf.
charCh = ANY - '\'' - '\\' - cr - lf.
printable = '\u0020' .. '\u007e'.
hex = "0123456789abcdef".
TOKENS
StringLitToken = '"' { stringCh | '\\' printable } '"' .
badString = '"' { stringCh | '\\' printable } (cr | lf).
IdentToken = letter {letter | lowline | digit} .
//LEXER_CHAR_SET = '[' { norbrack | '\\' printable } ']' . //(~ [\]\\] | EscAny)+ -> more
FloatLitToken = ( '.' digit {digit} [('e'|'E') ['+'|'-'] digit {digit}]
| digit {digit} '.' {digit} [('e'|'E') ['+'|'-'] digit {digit}]
| digit {digit} ('e'|'E') ['+'|'-'] digit {digit}
)
['f'|'l'|'F'|'L'].
IntLitToken = ( nzdigit {digit}
| '0' {oct}
| ("0x"|"0X") hex {hex}
)
{'u'|'U'|'l'|'L'}.
charcon = '\'' notApo {notApo} '\''. // no check for valid escape sequences
// Punctuation tokens
SemiToken = ";" .
ColonToken = ":" .
DblColonToken = "::" .
LCurlyToken = "{" .
RCurlyToken = "}" .
LBracketToken = "[" .
RBracketToken = "]" .
LParenToken = "(" .
RParenToken = ")" .
CommaToken = "," .
DotToken = "." .
QuesDotToken = "?." .
PlusToken = "+" .
PlusArrayRefToken = "+[]" .
PlusVirtRefToken = "+<" .
DashToken = "-" .
StarToken = "*" .
PercentToken = "%" .
SlashToken = "/" .
AmperToken = "&" .
ArrayRefToken = "&[]" .
VirtRefToken = "&<" .
AndToken = "and" .
BarToken = "|" .
OrToken = "or" .
CaretToken = "^" .
NotToken = "!" .
QuesToken = "?" .
TildeToken = "~" .
LessDashToken = "<-" .
AssgnToken = "=" .
LAssgnToken = ":=" .
SwapToken = "<=>" .
IsToken = "is" .
EqToken = "==" .
NeToken = "!=" .
LtToken = "<" .
LeToken = "<=" .
GtToken = ">" .
GeToken = ">=" .
ShlToken = "<<" .
ShrToken = ">>" .
PlusEqToken = "+=" .
MinusEqToken = "-=" .
MultEqToken = "*=" .
DivEqToken = "/=" .
RemEqToken = "%=" .
OrEqToken = "|=" .
AndEqToken = "&=" .
XorEqToken = "^=" .
ShlEqToken = "<<=" .
ShrEqToken = ">>=" .
IncrToken = "++" .
DecrToken = "--" .
// Keywords
IncludeToken = "include" .
ImportToken = "import" .
ExternToken = "extern" .
MacroToken = "macro" .
FnToken = "fn" .
ConstToken = "const" .
TypedefToken = "typedef" .
StructToken = "struct" .
TraitToken = "trait" .
UnionToken = "union" .
MoveToken = "@move" .
OpaqueToken = "@opaque" .
ExtendsToken = "extends" .
MixinToken = "mixin" .
EnumToken = "enum" .
RegionToken = "region" .
RetToken = "return" .
WithToken = "with" .
IfToken = "if" .
ElifToken = "elif" .
ElseToken = "else" .
CaseToken = "case" .
MatchToken = "match" .
WhileToken = "while" .
EachToken = "each" .
InToken = "in" .
ByToken = "step" .
BreakToken = "break" .
ContinueToken = "continue" .
AsToken = "as" .
IntoToken = "into" .
InlineToken = "inline" .
VoidToken = "void" .
nilToken = "nil" .
trueToken = "true" .
falseToken = "false" .
UndefToken = "undef" .
LifetimeToken = "'" .
COMMENTS FROM "/*" TO "*/" NESTED
COMMENTS FROM "//" TO lf
IGNORE cr + lf + tab
PRODUCTIONS
// The main entry point for parsing a v4 grammar.
Cone =
{parseGlobalStmts} EOF
.
parseGlobalStmts =
parseInclude
| parseImport
| parseTypedef
| parseStruct
| parseTrait
| parseUnion
| parseMacro
| ExternToken ["system"] (
parseBlock
| parseFnOrVar
)
| parseFnOrVar
| parseConstDcl
.
parseInclude =
IncludeToken parseFile parseEndOfStatement
.
parseImport =
ImportToken parseFile [DblColonToken StarToken] parseEndOfStatement
.
parseTypedef =
TypedefToken IdentToken parseVtype parseEndOfStatement
.
parseStruct =
StructToken parseStructBody
.
parseTrait =
TraitToken parseStructBody
.
parseUnion =
UnionToken parseStructBody
.
parseMacro =
MacroToken parseStructBody
.
parseStructBody =
[(MoveToken [OpaqueToken]) | (OpaqueToken [MoveToken])]
IdentToken [parseGenericParms]
[ExtendsToken parseTypeName]
.
parseTypeName =
parseNameUse [LBracketToken parseVtype {CommaToken parseVtype} RBracketToken]
.
parseFnOrVar =
(FnToken | parseVarDcl)
.
parseBlock =
(ColonToken | LCurlyToken) {parseFnOrVar} RCurlyToken
.
parseConstDcl =
ConstToken IdentToken parseVtype [AssgnToken parseAnyExpr]
.
parseFile =
(IdentToken | StringLitToken)
.
parseVtype =
parsePrefix
.
parsePrefix =
parseDotCall
| StarToken parsePrefix
| parseAmper
| parsePlus
| QuesToken parsePrefix
| DashToken parsePrefix
| TildeToken parsePrefix
| IncrToken parsePrefix
| DecrToken parsePrefix
| (parseTerm | parseSuffixTerm)
.
parseDotCall =
DotToken
.
parseAmper =
(AmperToken | ArrayRefToken | VirtRefToken) parsePerm [parseFn]
.
parsePlus =
(PlusToken | PlusArrayRefToken | PlusVirtRefToken) parseNameUse [DashToken (PermToken | parseNameUse)] parsePrefix
.
parseNameUse =
[DblColonToken] IdentToken {DblColonToken IdentToken}
.
parseSuffixTerm =
parseTerm parsePrefix
.
parseTerm =
nilToken
| trueToken
| falseToken
| VoidToken
| IntLitToken
| FloatLitToken
| StringLitToken
| parseNameUse
| LParenToken parseAnyExpr RParenToken
| parseArrayLit
| parseIf
| parseMatch
| parseWhile
| parseLifetime
| parseExprBlock
.
parseAnyExpr =
parseAssign
.
parseArrayLit =
LBracketToken parseSimpleExpr {CommaToken parseSimpleExpr} [SemiToken parseSimpleExpr {CommaToken parseSimpleExpr}] RBracketToken
.
parseIf =
IfToken
.
parseMatch =
MatchToken
.
parseWhile =
WhileToken
.
parseLifetime =
LifetimeToken
.
parseExprBlock =
(LCurlyToken | ColonToken) {parseExprBlockBody} RCurlyToken
.
parseExprBlockBody =
SemiToken
| parseReturn
| parseWith
| parseIf
| parseMatch
| parseWhile
| parseEach
| parseLifetime
| BreakToken [LifetimeToken] [parseAnyExpr] parseEndOfStatement
| ContinueToken [LifetimeToken] parseEndOfStatement
| parseExprBlock
| parseVarDcl parseEndOfStatement
| parseExpStmt
.
parseReturn =
RetToken [parseAnyExpr] parseIsEndOfStatement
.
parseWith =
WithToken
.
parseEach =
EachToken
.
parseIsEndOfStatement =
SemiToken
| RCurlyToken
.
parseExpStmt =
parseAnyExpr parseEndOfStatement
.
parseAssign =
parseAppend
.
parseAppend =
LessDashToken
| parseTuple (
(
AssgnToken
| LAssgnToken
| SwapToken
| PlusEqToken
| MinusEqToken
| MultEqToken
| DivEqToken
| RemEqToken
| OrEqToken
| AndEqToken
| XorEqToken
| ShlEqToken
| ShrEqToken
) parseAnyExpr
| LessDashToken parseAppend
)
.
parseFn =
IdentToken [parseGenericParms] parseFnSig [InlineToken] (parseExprBlock | parseEndOfStatement)
.
parseGenericParms =
LBracketToken IdentToken {CommaToken IdentToken} RBracketToken
.
parseFnSig =
LParenToken [parseVarDcl {CommaToken parseVarDcl}] RParenToken
.
parseVarDcl =
parsePerm IdentToken parseVtype [AssgnToken parseAnyExpr]
.
parsePerm =
PermToken
.
parseTuple =
parseSimpleExpr {CommaToken parseSimpleExpr}
.
parseSimpleExpr =
parseOrExpr
.
parseOrExpr =
parseAndLogic {OrToken parseAndLogic}
.
parseAndLogic =
parseNotLogic {AndToken parseNotLogic}
.
parseNotLogic =
NotToken parseNotLogic
| parseCmp
.
parseCmp =
parseOr [ (EqToken | NeToken | LtToken | LeToken | GtToken | GeToken) parseOr ]
.
parseOr =
parseXor {BarToken parseXor}
.
parseXor =
parseShift {AmperToken parseShift}
.
parseShift =
parseAdd {(ShlToken | ShrToken) parseAdd}
.
parseAdd =
parseMult {(PlusToken | DashToken) parseMult}
.
parseMult =
parseCast {(StarToken | SlashToken | PercentToken) parseCast}
.
parseCast =
parsePrefix {(AsToken | IntoToken) parseVtype}
.
parseEndOfStatement =
SemiToken
.
PermToken =
("uni" | "imm" | "mut" | "ro" | "opaq")
.
END Cone.
EBNF:
//
// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui
//
//
// productions
//
Cone ::= parseGlobalStmts* EOF
parseGlobalStmts ::= parseInclude | parseImport | parseTypedef | parseStruct | parseTrait | parseUnion | parseMacro | ExternToken "system"? ( parseBlock | parseFnOrVar ) | parseFnOrVar | parseConstDcl
parseInclude ::= IncludeToken parseFile parseEndOfStatement
parseImport ::= ImportToken parseFile ( DblColonToken StarToken )? parseEndOfStatement
parseTypedef ::= TypedefToken IdentToken parseVtype parseEndOfStatement
parseStruct ::= StructToken parseStructBody
parseTrait ::= TraitToken parseStructBody
parseUnion ::= UnionToken parseStructBody
parseMacro ::= MacroToken parseStructBody
parseBlock ::= ( ColonToken | LCurlyToken ) parseFnOrVar* RCurlyToken
parseFnOrVar ::= FnToken | parseVarDcl
parseConstDcl ::= ConstToken IdentToken parseVtype ( AssgnToken parseAnyExpr )?
parseFile ::= IdentToken | StringLitToken
parseEndOfStatement ::= SemiToken
parseVtype ::= parsePrefix
parseStructBody ::= ( MoveToken OpaqueToken? | OpaqueToken MoveToken? )? IdentToken parseGenericParms? ( ExtendsToken parseTypeName )?
parseGenericParms ::= LBracketToken IdentToken ( CommaToken IdentToken )* RBracketToken
parseTypeName ::= parseNameUse ( LBracketToken parseVtype ( CommaToken parseVtype )* RBracketToken )?
parseNameUse ::= DblColonToken? IdentToken ( DblColonToken IdentToken )*
parseVarDcl ::= parsePerm IdentToken parseVtype ( AssgnToken parseAnyExpr )?
parseAnyExpr ::= parseAssign
parsePrefix ::= parseDotCall | StarToken parsePrefix | parseAmper | parsePlus | QuesToken parsePrefix | DashToken parsePrefix | TildeToken parsePrefix | IncrToken parsePrefix | DecrToken parsePrefix | ( parseTerm | parseSuffixTerm )
parseDotCall ::= DotToken
parseAmper ::= ( AmperToken | ArrayRefToken | VirtRefToken ) parsePerm parseFn?
parsePlus ::= ( PlusToken | PlusArrayRefToken | PlusVirtRefToken ) parseNameUse ( DashToken ( PermToken | parseNameUse ) )? parsePrefix
parseTerm ::= nilToken | trueToken | falseToken | VoidToken | IntLitToken | FloatLitToken | StringLitToken | parseNameUse | LParenToken parseAnyExpr RParenToken | parseArrayLit | parseIf | parseMatch | parseWhile | parseLifetime | parseExprBlock
parseSuffixTerm ::= parseTerm parsePrefix
parsePerm ::= PermToken
parseFn ::= IdentToken parseGenericParms? parseFnSig InlineToken? ( parseExprBlock | parseEndOfStatement )
PermToken ::= "uni" | "imm" | "mut" | "ro" | "opaq"
parseArrayLit ::= LBracketToken parseSimpleExpr ( CommaToken parseSimpleExpr )* ( SemiToken parseSimpleExpr ( CommaToken parseSimpleExpr )* )? RBracketToken
parseIf ::= IfToken
parseMatch ::= MatchToken
parseWhile ::= WhileToken
parseLifetime ::= LifetimeToken
parseExprBlock ::= ( LCurlyToken | ColonToken ) parseExprBlockBody* RCurlyToken
parseAssign ::= parseAppend
parseSimpleExpr ::= parseOrExpr
parseExprBlockBody ::= SemiToken | parseReturn | parseWith | parseIf | parseMatch | parseWhile | parseEach | parseLifetime | BreakToken LifetimeToken? parseAnyExpr? parseEndOfStatement | ContinueToken LifetimeToken? parseEndOfStatement | parseExprBlock | parseVarDcl parseEndOfStatement | parseExpStmt
parseReturn ::= RetToken parseAnyExpr? parseIsEndOfStatement
parseWith ::= WithToken
parseEach ::= EachToken
parseExpStmt ::= parseAnyExpr parseEndOfStatement
parseIsEndOfStatement ::= SemiToken | RCurlyToken
parseAppend ::= LessDashToken | parseTuple ( ( AssgnToken | LAssgnToken | SwapToken | PlusEqToken | MinusEqToken | MultEqToken | DivEqToken | RemEqToken | OrEqToken | AndEqToken | XorEqToken | ShlEqToken | ShrEqToken ) parseAnyExpr | LessDashToken parseAppend )
parseTuple ::= parseSimpleExpr ( CommaToken parseSimpleExpr )*
parseFnSig ::= LParenToken ( parseVarDcl ( CommaToken parseVarDcl )* )? RParenToken
parseOrExpr ::= parseAndLogic ( OrToken parseAndLogic )*
parseAndLogic ::= parseNotLogic ( AndToken parseNotLogic )*
parseNotLogic ::= NotToken parseNotLogic | parseCmp
parseCmp ::= parseOr ( ( EqToken | NeToken | LtToken | LeToken | GtToken | GeToken ) parseOr )?
parseOr ::= parseXor ( BarToken parseXor )*
parseXor ::= parseShift ( AmperToken parseShift )*
parseShift ::= parseAdd ( ( ShlToken | ShrToken ) parseAdd )*
parseAdd ::= parseMult ( ( PlusToken | DashToken ) parseMult )*
parseMult ::= parseCast ( ( StarToken | SlashToken | PercentToken ) parseCast )*
parseCast ::= parsePrefix ( ( AsToken | IntoToken ) parseVtype )*
//
// tokens
//
SemiToken ::= ";"
ColonToken ::= ":"
DblColonToken ::= "::"
LCurlyToken ::= "{"
RCurlyToken ::= "}"
LBracketToken ::= "["
RBracketToken ::= "]"
LParenToken ::= "("
RParenToken ::= ")"
CommaToken ::= ","
DotToken ::= "."
QuesDotToken ::= "?."
PlusToken ::= "+"
PlusArrayRefToken ::= "+[]"
PlusVirtRefToken ::= "+<"
DashToken ::= "-"
StarToken ::= "*"
PercentToken ::= "%"
SlashToken ::= "/"
AmperToken ::= "&"
ArrayRefToken ::= "&[]"
VirtRefToken ::= "&<"
AndToken ::= "and"
BarToken ::= "|"
OrToken ::= "or"
CaretToken ::= "^"
NotToken ::= "!"
QuesToken ::= "?"
TildeToken ::= "~"
LessDashToken ::= "<-"
AssgnToken ::= "="
LAssgnToken ::= ":="
SwapToken ::= "<=>"
IsToken ::= "is"
EqToken ::= "=="
NeToken ::= "!="
LtToken ::= "<"
LeToken ::= "<="
GtToken ::= ">"
GeToken ::= ">="
ShlToken ::= "<<"
ShrToken ::= ">>"
PlusEqToken ::= "+="
MinusEqToken ::= "-="
MultEqToken ::= "*="
DivEqToken ::= "/="
RemEqToken ::= "%="
OrEqToken ::= "|="
AndEqToken ::= "&="
XorEqToken ::= "^="
ShlEqToken ::= "<<="
ShrEqToken ::= ">>="
IncrToken ::= "++"
DecrToken ::= "--"
IncludeToken ::= "include"
ImportToken ::= "import"
ExternToken ::= "extern"
MacroToken ::= "macro"
FnToken ::= "fn"
ConstToken ::= "const"
TypedefToken ::= "typedef"
StructToken ::= "struct"
TraitToken ::= "trait"
UnionToken ::= "union"
MoveToken ::= "@move"
OpaqueToken ::= "@opaque"
ExtendsToken ::= "extends"
MixinToken ::= "mixin"
EnumToken ::= "enum"
RegionToken ::= "region"
RetToken ::= "return"
WithToken ::= "with"
IfToken ::= "if"
ElifToken ::= "elif"
ElseToken ::= "else"
CaseToken ::= "case"
MatchToken ::= "match"
WhileToken ::= "while"
EachToken ::= "each"
InToken ::= "in"
ByToken ::= "step"
BreakToken ::= "break"
ContinueToken ::= "continue"
AsToken ::= "as"
IntoToken ::= "into"
InlineToken ::= "inline"
VoidToken ::= "void"
nilToken ::= "nil"
trueToken ::= "true"
falseToken ::= "false"
UndefToken ::= "undef"
LifetimeToken ::= "'"
from cone.
Wow! Thanks for doing all this. Will study it more closely when I have some free time.
from cone.
I have looked it through, and you have captured quite a bit of the syntax accurately from the code. It appears there are some productions that are currently incomplete - e.g., the control flow (parseIf, etc.).
What future plans do you have for this work?
from cone.
At first I already have done with it, I believe that the railroad diagram is a nice way to communicate the global view of a programming language and help work with it.
I hope it could be useful and I use it (or any other programming language) as an excuse to exercise my skills in parsers/parsing.
from cone.
Good on you, and thanks for sharing it for our benefit. All the best to you!
from cone.
Related Issues (20)
- Compiler reporting of time is sometimes obviously overestimated HOT 2
- Anonymous function output passed to print HOT 1
- Array declaration fails in global scope HOT 1
- Can't access global variable, as if not seen, but in fact it is seen HOT 2
- Doc for `extern` misses (curly braces) xor (colons) HOT 1
- Fails to print u32 HOT 4
- `typedef` fails HOT 6
- Add support for typed array size HOT 1
- Automatic cast of index when using each HOT 1
- Multidim array type compiles but not usable HOT 2
- Problem with multiple return values HOT 4
- Global arrays are not accessible if not initialized HOT 1
- Literals with underscores not recognized by print and returns HOT 3
- + operator called as a method in backticks does not work or is not implemented HOT 1
- Adding elements to a collection with <- doesn't work HOT 1
- Gist creation broken on the playground
- SSL certificate on website invalid since Mar 15, 2022 HOT 2
- Unaccounted for line counting when tokenizing strings HOT 1
- Using C from Cone: calling fopen() HOT 1
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from cone.