py: Simplify "and" action within parser by making ident-rules explicit.

Most grammar rules can optimise to the identity if they only have a single
argument, saving a lot of RAM building the parse tree.  Previous to this
patch, whether a given grammar rule could be optimised was defined (mostly
implicitly) by a complicated set of logic rules.  With this patch the
definition is always specified explicitly by using "and_ident" in the rule
definition in the grammar.  This simplifies the logic of the parser,
making it a bit smaller and faster.  RAM usage in unaffected.
This commit is contained in:
Damien George
2016-04-14 13:23:50 +01:00
parent 0a400a6333
commit 0c1de1cdee
2 changed files with 78 additions and 95 deletions

View File

@@ -56,8 +56,6 @@
#define RULE_ARG_RULE (0x2000)
#define RULE_ARG_OPT_RULE (0x3000)
#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)
@@ -80,10 +78,10 @@ enum {
RULE_const_object, // special node for a constant, generic Python object
};
#define ident (RULE_ACT_ALLOW_IDENT)
#define blank (RULE_ACT_ADD_BLANK)
#define or(n) (RULE_ACT_OR | n)
#define and(n) (RULE_ACT_AND | n)
#define and_ident(n) (RULE_ACT_AND | n | RULE_ACT_ALLOW_IDENT)
#define and_blank(n) (RULE_ACT_AND | n | RULE_ACT_ADD_BLANK)
#define one_or_more (RULE_ACT_LIST | 2)
#define list (RULE_ACT_LIST | 1)
#define list_with_end (RULE_ACT_LIST | 3)
@@ -830,25 +828,6 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
// matched the rule, so now build the corresponding parse_node
// count number of arguments for the parse_node
i = 0;
bool emit_rule = false;
for (size_t x = 0; x < n; ++x) {
if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
if (tok_kind >= MP_TOKEN_NAME) {
emit_rule = true;
}
if (tok_kind == MP_TOKEN_NAME) {
// only tokens which were names are pushed to stack
i += 1;
}
} else {
// rules are always pushed
i += 1;
}
}
#if !MICROPY_ENABLE_DOC_STRING
// this code discards lonely statements, such as doc strings
if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
@@ -868,35 +847,29 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
}
#endif
// always emit these rules, even if they have only 1 argument
if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
emit_rule = true;
}
// if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
// rule should not be emitted if it has only 1 argument
if (rule->act & RULE_ACT_ALLOW_IDENT) {
emit_rule = false;
}
// always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
if (ADD_BLANK_NODE(rule)) {
emit_rule = true;
push_result_node(&parser, MP_PARSE_NODE_NULL);
i += 1;
}
// count number of arguments for the parse node
i = 0;
size_t num_not_nil = 0;
for (size_t x = 0; x < i; ++x) {
if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
num_not_nil += 1;
for (size_t x = n; x > 0;) {
--x;
if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
if (tok_kind == MP_TOKEN_NAME) {
// only tokens which were names are pushed to stack
i += 1;
num_not_nil += 1;
}
} else {
// rules are always pushed
if (peek_result(&parser, i) != MP_PARSE_NODE_NULL) {
num_not_nil += 1;
}
i += 1;
}
}
if (emit_rule || num_not_nil != 1) {
// need to add rule when num_not_nil==0 for, eg, atom_paren, testlist_comp_3b
push_result_rule(&parser, rule_src_line, rule, i);
} else {
// single result, leave it on stack
if (num_not_nil == 1 && (rule->act & RULE_ACT_ALLOW_IDENT)) {
// this rule has only 1 argument and should not be emitted
mp_parse_node_t pn = MP_PARSE_NODE_NULL;
for (size_t x = 0; x < i; ++x) {
mp_parse_node_t pn2 = pop_result(&parser);
@@ -905,6 +878,16 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
}
}
push_result_node(&parser, pn);
} else {
// this rule must be emitted
if (rule->act & RULE_ACT_ADD_BLANK) {
// and add an extra blank node at the end (used by the compiler to store data)
push_result_node(&parser, MP_PARSE_NODE_NULL);
i += 1;
}
push_result_rule(&parser, rule_src_line, rule, i);
}
break;
}