py: Simplify "and" action within parser by making ident-rules explicit.

Most grammar rules can optimise to the identity if they only have a single argument, saving a lot of RAM building the parse tree. Previous to this patch, whether a given grammar rule could be optimised was defined (mostly implicitly) by a complicated set of logic rules. With this patch the definition is always specified explicitly by using "and_ident" in the rule definition in the grammar. This simplifies the logic of the parser, making it a bit smaller and faster. RAM usage in unaffected.
2016-04-14 13:23:50 +01:00
parent 0a400a6333
commit 0c1de1cdee
2 changed files with 78 additions and 95 deletions
--- a/py/parse.c
+++ b/py/parse.c
@@ -56,8 +56,6 @@
 #define RULE_ARG_RULE           (0x2000)
 #define RULE_ARG_OPT_RULE       (0x3000)

-#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
-
 // (un)comment to use rule names; for debugging
 //#define USE_RULE_NAME (1)

@@ -80,10 +78,10 @@ enum {
    RULE_const_object, // special node for a constant, generic Python object
 };

-#define ident                   (RULE_ACT_ALLOW_IDENT)
-#define blank                   (RULE_ACT_ADD_BLANK)
 #define or(n)                   (RULE_ACT_OR | n)
 #define and(n)                  (RULE_ACT_AND | n)
+#define and_ident(n)            (RULE_ACT_AND | n | RULE_ACT_ALLOW_IDENT)
+#define and_blank(n)            (RULE_ACT_AND | n | RULE_ACT_ADD_BLANK)
 #define one_or_more             (RULE_ACT_LIST | 2)
 #define list                    (RULE_ACT_LIST | 1)
 #define list_with_end           (RULE_ACT_LIST | 3)
@@ -830,25 +828,6 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {

                // matched the rule, so now build the corresponding parse_node

-                // count number of arguments for the parse_node
-                i = 0;
-                bool emit_rule = false;
-                for (size_t x = 0; x < n; ++x) {
-                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
-                        mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
-                        if (tok_kind >= MP_TOKEN_NAME) {
-                            emit_rule = true;
-                        }
-                        if (tok_kind == MP_TOKEN_NAME) {
-                            // only tokens which were names are pushed to stack
-                            i += 1;
-                        }
-                    } else {
-                        // rules are always pushed
-                        i += 1;
-                    }
-                }
-
                #if !MICROPY_ENABLE_DOC_STRING
                // this code discards lonely statements, such as doc strings
                if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
@@ -868,35 +847,29 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                }
                #endif

-                // always emit these rules, even if they have only 1 argument
-                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
-                    emit_rule = true;
-                }
-
-                // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
-                // rule should not be emitted if it has only 1 argument
-                if (rule->act & RULE_ACT_ALLOW_IDENT) {
-                    emit_rule = false;
-                }
-
-                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
-                if (ADD_BLANK_NODE(rule)) {
-                    emit_rule = true;
-                    push_result_node(&parser, MP_PARSE_NODE_NULL);
-                    i += 1;
-                }
-
+                // count number of arguments for the parse node
+                i = 0;
                size_t num_not_nil = 0;
-                for (size_t x = 0; x < i; ++x) {
-                    if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
-                        num_not_nil += 1;
+                for (size_t x = n; x > 0;) {
+                    --x;
+                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+                        mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
+                        if (tok_kind == MP_TOKEN_NAME) {
+                            // only tokens which were names are pushed to stack
+                            i += 1;
+                            num_not_nil += 1;
+                        }
+                    } else {
+                        // rules are always pushed
+                        if (peek_result(&parser, i) != MP_PARSE_NODE_NULL) {
+                            num_not_nil += 1;
+                        }
+                        i += 1;
                    }
                }
-                if (emit_rule || num_not_nil != 1) {
-                    // need to add rule when num_not_nil==0 for, eg, atom_paren, testlist_comp_3b
-                    push_result_rule(&parser, rule_src_line, rule, i);
-                } else {
-                    // single result, leave it on stack
+
+                if (num_not_nil == 1 && (rule->act & RULE_ACT_ALLOW_IDENT)) {
+                    // this rule has only 1 argument and should not be emitted
                    mp_parse_node_t pn = MP_PARSE_NODE_NULL;
                    for (size_t x = 0; x < i; ++x) {
                        mp_parse_node_t pn2 = pop_result(&parser);
@@ -905,6 +878,16 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                        }
                    }
                    push_result_node(&parser, pn);
+                } else {
+                    // this rule must be emitted
+
+                    if (rule->act & RULE_ACT_ADD_BLANK) {
+                        // and add an extra blank node at the end (used by the compiler to store data)
+                        push_result_node(&parser, MP_PARSE_NODE_NULL);
+                        i += 1;
+                    }
+
+                    push_result_rule(&parser, rule_src_line, rule, i);
                }
                break;
            }