py: Simplify "and" action within parser by making ident-rules explicit.
Most grammar rules can optimise to the identity if they only have a single argument, saving a lot of RAM building the parse tree. Previous to this patch, whether a given grammar rule could be optimised was defined (mostly implicitly) by a complicated set of logic rules. With this patch the definition is always specified explicitly by using "and_ident" in the rule definition in the grammar. This simplifies the logic of the parser, making it a bit smaller and faster. RAM usage in unaffected.
This commit is contained in:
81
py/parse.c
81
py/parse.c
@@ -56,8 +56,6 @@
|
||||
#define RULE_ARG_RULE (0x2000)
|
||||
#define RULE_ARG_OPT_RULE (0x3000)
|
||||
|
||||
#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
|
||||
|
||||
// (un)comment to use rule names; for debugging
|
||||
//#define USE_RULE_NAME (1)
|
||||
|
||||
@@ -80,10 +78,10 @@ enum {
|
||||
RULE_const_object, // special node for a constant, generic Python object
|
||||
};
|
||||
|
||||
#define ident (RULE_ACT_ALLOW_IDENT)
|
||||
#define blank (RULE_ACT_ADD_BLANK)
|
||||
#define or(n) (RULE_ACT_OR | n)
|
||||
#define and(n) (RULE_ACT_AND | n)
|
||||
#define and_ident(n) (RULE_ACT_AND | n | RULE_ACT_ALLOW_IDENT)
|
||||
#define and_blank(n) (RULE_ACT_AND | n | RULE_ACT_ADD_BLANK)
|
||||
#define one_or_more (RULE_ACT_LIST | 2)
|
||||
#define list (RULE_ACT_LIST | 1)
|
||||
#define list_with_end (RULE_ACT_LIST | 3)
|
||||
@@ -830,25 +828,6 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
|
||||
|
||||
// matched the rule, so now build the corresponding parse_node
|
||||
|
||||
// count number of arguments for the parse_node
|
||||
i = 0;
|
||||
bool emit_rule = false;
|
||||
for (size_t x = 0; x < n; ++x) {
|
||||
if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
|
||||
mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
|
||||
if (tok_kind >= MP_TOKEN_NAME) {
|
||||
emit_rule = true;
|
||||
}
|
||||
if (tok_kind == MP_TOKEN_NAME) {
|
||||
// only tokens which were names are pushed to stack
|
||||
i += 1;
|
||||
}
|
||||
} else {
|
||||
// rules are always pushed
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#if !MICROPY_ENABLE_DOC_STRING
|
||||
// this code discards lonely statements, such as doc strings
|
||||
if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
|
||||
@@ -868,35 +847,29 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// always emit these rules, even if they have only 1 argument
|
||||
if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
|
||||
emit_rule = true;
|
||||
}
|
||||
|
||||
// if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
|
||||
// rule should not be emitted if it has only 1 argument
|
||||
if (rule->act & RULE_ACT_ALLOW_IDENT) {
|
||||
emit_rule = false;
|
||||
}
|
||||
|
||||
// always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
|
||||
if (ADD_BLANK_NODE(rule)) {
|
||||
emit_rule = true;
|
||||
push_result_node(&parser, MP_PARSE_NODE_NULL);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// count number of arguments for the parse node
|
||||
i = 0;
|
||||
size_t num_not_nil = 0;
|
||||
for (size_t x = 0; x < i; ++x) {
|
||||
if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
|
||||
num_not_nil += 1;
|
||||
for (size_t x = n; x > 0;) {
|
||||
--x;
|
||||
if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
|
||||
mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
|
||||
if (tok_kind == MP_TOKEN_NAME) {
|
||||
// only tokens which were names are pushed to stack
|
||||
i += 1;
|
||||
num_not_nil += 1;
|
||||
}
|
||||
} else {
|
||||
// rules are always pushed
|
||||
if (peek_result(&parser, i) != MP_PARSE_NODE_NULL) {
|
||||
num_not_nil += 1;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
if (emit_rule || num_not_nil != 1) {
|
||||
// need to add rule when num_not_nil==0 for, eg, atom_paren, testlist_comp_3b
|
||||
push_result_rule(&parser, rule_src_line, rule, i);
|
||||
} else {
|
||||
// single result, leave it on stack
|
||||
|
||||
if (num_not_nil == 1 && (rule->act & RULE_ACT_ALLOW_IDENT)) {
|
||||
// this rule has only 1 argument and should not be emitted
|
||||
mp_parse_node_t pn = MP_PARSE_NODE_NULL;
|
||||
for (size_t x = 0; x < i; ++x) {
|
||||
mp_parse_node_t pn2 = pop_result(&parser);
|
||||
@@ -905,6 +878,16 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
|
||||
}
|
||||
}
|
||||
push_result_node(&parser, pn);
|
||||
} else {
|
||||
// this rule must be emitted
|
||||
|
||||
if (rule->act & RULE_ACT_ADD_BLANK) {
|
||||
// and add an extra blank node at the end (used by the compiler to store data)
|
||||
push_result_node(&parser, MP_PARSE_NODE_NULL);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
push_result_rule(&parser, rule_src_line, rule, i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user