grammar - Is there a better way to specify optional elements in rules of a CFG? -
consider language , compiler design , develop it. in language there particular statement part of grammar: (=<identifier>)
. piece can recognized compiler. spaces allowed between brackets , equal sign , identifier. have these possibilities:
(=<identifier>) ( = <identifier> ) (=identifier ) ( =identifier ) ...
without considering whole grammar rules handle language feature, have (in bison-like syntax grammar rules):
statement: obrckt eq id cbrckt | obrckt s eq s id s cbrckt | obrckt s eq id s cbrckt | obrckt s eq s id cbrckt | obrckt s eq id cbrckt | obrckt eq s id s cbrckt | obrckt eq id s cbrckt | obrckt eq s id cbrckt | ...
the space terminal s
can appear or not. way rules are, need specify possible combinations... there better way achieve result?
as jim commented, use lexical tool handle these cases instead of writing them productions of grammar.
for example, commonly use flex lexical analysis , bison define grammar (probably have done).
you can achieve result want following (this example it's pretty simple , cannot much):
lexicalanalyzer.l
/* lexicalanalyzer.l specifications of tokens language. */ %{ %} /* * definitions of regular expressions * note: capture whitespace here... */ wspace [ \t\r]+ //we take care of spaces here... /* * tokens */ %% "=" { printf("token: eq lexeme: %s\n", yytext); return t_eq; } "(" { printf("token: obrckt lexeme: %s\n", yytext); return t_obrckt; } ")" { printf("token: cbrckt lexeme: %s\n", yytext); return t_cbrckt; } "<" { printf("token: lt lexeme: %s\n", yytext); return t_lt; } ">" { printf("token: gt lexeme: %s\n", yytext); return t_gt; } "identifier" { printf("token: ident lexeme: %s\n", yytext); return t_ident; } {wspace} { } . { printf("token: unknown lexeme: %s\n", yytext); return t_unknown; } %%
syntaxanalyzer.y
/* syntaxanalyzer.y create syntax analyzer: flex file.l bison file.y g++ file.tab.c -o file_parser file_parser < inputfilename */ /* * declaration section. */ %{ #include <stdio.h> void printrule(const char *lhs, const char *rhs); int yyerror(const char *s) { printf("error!"); } extern "c" { int yyparse(void); int yylex(void); int yywrap() {return 1;} } %} /* * token declarations */ %token t_obrckt t_cbrckt %token t_lt t_gt t_eq %token t_ident t_unknown /* * starting point. */ %start n_start /* * translation rules. */ %% n_start : n_statement { printrule("start", "statement"); printf("\n---- completed parsing ----\n\n"); return 0; } ; n_statement : t_obrckt t_eq t_lt t_ident t_gt t_cbrckt { printrule("expr", "t_obrckt t_eq t_lt t_ident t_gt t_cbrckt"); } ; %% #include "lex.yy.c" extern file *yyin; void printrule(const char *lhs, const char *rhs) { printf("%s -> %s\n", lhs, rhs); return; } int main() { { yyparse(); } while (!feof(yyin)); return 0; }
Comments
Post a Comment