diff --git a/py/bc.c b/py/bc.c index c32d5c415..7544ffc5f 100644 --- a/py/bc.c +++ b/py/bc.c @@ -124,13 +124,14 @@ void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw code_state->frame = NULL; #endif - // get params - size_t n_state = mp_decode_uint(&code_state->ip); - code_state->ip = mp_decode_uint_skip(code_state->ip); // skip n_exc_stack - size_t scope_flags = *code_state->ip++; - size_t n_pos_args = *code_state->ip++; - size_t n_kwonly_args = *code_state->ip++; - size_t n_def_pos_args = *code_state->ip++; + // Get cached n_state (rather than decode it again) + size_t n_state = code_state->n_state; + + // Decode prelude + size_t n_state_unused, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args; + MP_BC_PRELUDE_SIG_DECODE_INTO(code_state->ip, n_state_unused, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args); + (void)n_state_unused; + (void)n_exc_stack_unused; code_state->sp = &code_state->state[0] - 1; code_state->exc_sp_idx = 0; diff --git a/py/bc.h b/py/bc.h index 69bce8902..1ae8c9925 100644 --- a/py/bc.h +++ b/py/bc.h @@ -32,12 +32,15 @@ // bytecode layout: // -// n_state : var uint -// n_exc_stack : var uint -// scope_flags : byte -// n_pos_args : byte number of arguments this function takes -// n_kwonly_args : byte number of keyword-only arguments this function takes -// n_def_pos_args : byte number of default positional arguments +// func signature : var uint +// contains six values interleaved bit-wise as: xSSSSEAA [xFSSKAED repeated] +// x = extension another byte follows +// S = n_state - 1 number of entries in Python value stack +// E = n_exc_stack number of entries in exception stack +// F = scope_flags four bits of flags, MP_SCOPE_FLAG_xxx +// A = n_pos_args number of arguments this function takes +// K = n_kwonly_args number of keyword-only arguments this function takes +// D = n_def_pos_args number of default positional arguments // // code_info_size : var uint | code_info_size counts bytes in this chunk // simple_name : var qstr | @@ -60,6 +63,65 @@ // const0 : obj // constN : obj +#define MP_BC_PRELUDE_SIG_ENCODE(S, E, scope, out_byte, out_env) \ +do { \ + /*// Get values to store in prelude */ \ + size_t F = scope->scope_flags & 0x0f; /* only need to store lower 4 flag bits */ \ + size_t A = scope->num_pos_args; \ + size_t K = scope->num_kwonly_args; \ + size_t D = scope->num_def_pos_args; \ + \ + /* Adjust S to shrink range, to compress better */ \ + S -= 1; \ + \ + /* Encode prelude */ \ + /* xSSSSEAA */ \ + uint8_t z = (S & 0xf) << 3 | (E & 1) << 2 | (A & 3); \ + S >>= 4; \ + E >>= 1; \ + A >>= 2; \ + while (S | E | F | A | K | D) { \ + out_byte(out_env, 0x80 | z); \ + /* xFSSKAED */ \ + z = (F & 1) << 6 | (S & 3) << 4 | (K & 1) << 3 \ + | (A & 1) << 2 | (E & 1) << 1 | (D & 1); \ + S >>= 2; \ + E >>= 1; \ + F >>= 1; \ + A >>= 1; \ + K >>= 1; \ + D >>= 1; \ + } \ + out_byte(out_env, z); \ +} while (0) + +#define MP_BC_PRELUDE_SIG_DECODE_INTO(ip, S, E, F, A, K, D) \ +do { \ + uint8_t z = *(ip)++; \ + /* xSSSSEAA */ \ + S = (z >> 3) & 0xf; \ + E = (z >> 2) & 0x1; \ + F = 0; \ + A = z & 0x3; \ + K = 0; \ + D = 0; \ + for (unsigned n = 0; z & 0x80; ++n) { \ + z = *(ip)++; \ + /* xFSSKAED */ \ + S |= (z & 0x30) << (2 * n); \ + E |= (z & 0x02) << n; \ + F |= ((z & 0x40) >> 6) << n; \ + A |= (z & 0x4) << n; \ + K |= ((z & 0x08) >> 3) << n; \ + D |= (z & 0x1) << n; \ + } \ + S += 1; \ +} while (0) + +#define MP_BC_PRELUDE_SIG_DECODE(ip) \ + size_t n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args; \ + MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args) + // Sentinel value for mp_code_state_t.exc_sp_idx #define MP_CODE_STATE_EXC_SP_IDX_SENTINEL ((uint16_t)-1) diff --git a/py/emitbc.c b/py/emitbc.c index a8d57d27b..1aa219ca8 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -328,7 +328,7 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { emit->bytecode_offset = 0; emit->code_info_offset = 0; - // Write local state size and exception stack size. + // Write local state size, exception stack size, scope flags and number of arguments { mp_uint_t n_state = scope->num_locals + scope->stack_size; if (n_state == 0) { @@ -341,16 +341,10 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { // An extra slot in the stack is needed to detect VM stack overflow n_state += 1; #endif - emit_write_code_info_uint(emit, n_state); - emit_write_code_info_uint(emit, scope->exc_stack_size); - } - // Write scope flags and number of arguments. - // TODO check that num args all fit in a byte - emit_write_code_info_byte(emit, emit->scope->scope_flags); - emit_write_code_info_byte(emit, emit->scope->num_pos_args); - emit_write_code_info_byte(emit, emit->scope->num_kwonly_args); - emit_write_code_info_byte(emit, emit->scope->num_def_pos_args); + size_t n_exc_stack = scope->exc_stack_size; + MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, scope, emit_write_code_info_byte, emit); + } // Write size of the rest of the code info. We don't know how big this // variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes diff --git a/py/emitnative.c b/py/emitnative.c index f5a18c987..d0e758f56 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -573,18 +573,19 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop } +static inline void emit_native_write_code_info_byte(emit_t *emit, byte val) { + mp_asm_base_data(&emit->as->base, 1, val); +} + STATIC void emit_native_end_pass(emit_t *emit) { emit_native_global_exc_exit(emit); if (!emit->do_viper_types) { emit->prelude_offset = mp_asm_base_get_code_pos(&emit->as->base); - mp_asm_base_data(&emit->as->base, 1, 0x80 | ((emit->n_state >> 7) & 0x7f)); - mp_asm_base_data(&emit->as->base, 1, emit->n_state & 0x7f); - mp_asm_base_data(&emit->as->base, 1, 0); // n_exc_stack - mp_asm_base_data(&emit->as->base, 1, emit->scope->scope_flags); - mp_asm_base_data(&emit->as->base, 1, emit->scope->num_pos_args); - mp_asm_base_data(&emit->as->base, 1, emit->scope->num_kwonly_args); - mp_asm_base_data(&emit->as->base, 1, emit->scope->num_def_pos_args); + + size_t n_state = emit->n_state; + size_t n_exc_stack = 0; // exc-stack not needed for native code + MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, emit->scope, emit_native_write_code_info_byte, emit); // write code info #if MICROPY_PERSISTENT_CODE diff --git a/py/objfun.c b/py/objfun.c index 114367b4e..053fe46b7 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -161,12 +161,7 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) { #endif const byte *bc = fun->bytecode; - bc = mp_decode_uint_skip(bc); // skip n_state - bc = mp_decode_uint_skip(bc); // skip n_exc_stack - bc++; // skip scope_params - bc++; // skip n_pos_args - bc++; // skip n_kwonly_args - bc++; // skip n_def_pos_args + MP_BC_PRELUDE_SIG_DECODE(bc); return mp_obj_code_get_name(bc); } @@ -197,10 +192,10 @@ STATIC void dump_args(const mp_obj_t *a, size_t sz) { #define DECODE_CODESTATE_SIZE(bytecode, n_state_out_var, state_size_out_var) \ { \ - /* bytecode prelude: state size and exception stack size */ \ - n_state_out_var = mp_decode_uint_value(bytecode); \ - size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(bytecode)); \ - \ + const uint8_t *ip = bytecode; \ + size_t n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_args; \ + MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state_out_var, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_args); \ + \ /* state size in bytes */ \ state_size_out_var = n_state_out_var * sizeof(mp_obj_t) \ + n_exc_stack * sizeof(mp_exc_stack_t); \ @@ -295,9 +290,11 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const assert(0); } } - const byte *bytecode_ptr = mp_decode_uint_skip(mp_decode_uint_skip(self->bytecode)); - size_t n_pos_args = bytecode_ptr[1]; - size_t n_kwonly_args = bytecode_ptr[2]; + const byte *bytecode_ptr = self->bytecode; + size_t n_state_unused, n_exc_stack_unused, scope_flags_unused; + size_t n_pos_args, n_kwonly_args, n_def_args_unused; + MP_BC_PRELUDE_SIG_DECODE_INTO(bytecode_ptr, n_state_unused, n_exc_stack_unused, + scope_flags_unused, n_pos_args, n_kwonly_args, n_def_args_unused); // We can't check the case when an exception is returned in state[0] // and there are no arguments, because in this case our detection slot may have // been overwritten by the returned exception (which is allowed). diff --git a/py/objgenerator.c b/py/objgenerator.c index 39dcbefb9..359dade88 100644 --- a/py/objgenerator.c +++ b/py/objgenerator.c @@ -51,8 +51,8 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons mp_obj_fun_bc_t *self_fun = MP_OBJ_TO_PTR(self_in); // bytecode prelude: get state size and exception stack size - size_t n_state = mp_decode_uint_value(self_fun->bytecode); - size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(self_fun->bytecode)); + const uint8_t *ip = self_fun->bytecode; + MP_BC_PRELUDE_SIG_DECODE(ip); // allocate the generator object, with room for local stack and exception stack mp_obj_gen_instance_t *o = m_new_obj_var(mp_obj_gen_instance_t, byte, @@ -88,7 +88,9 @@ STATIC mp_obj_t native_gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_k // Determine start of prelude, and extract n_state from it uintptr_t prelude_offset = ((uintptr_t*)self_fun->bytecode)[0]; - size_t n_state = mp_decode_uint_value(self_fun->bytecode + prelude_offset); + const uint8_t *ip = self_fun->bytecode + prelude_offset; + size_t n_state, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_args; + MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_args); size_t n_exc_stack = 0; // Allocate the generator object, with room for local stack and exception stack diff --git a/py/persistentcode.c b/py/persistentcode.c index 3b59746ba..9776acb1e 100644 --- a/py/persistentcode.c +++ b/py/persistentcode.c @@ -157,17 +157,16 @@ typedef struct _bytecode_prelude_t { uint code_info_size; } bytecode_prelude_t; -#if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_EMIT_MACHINE_CODE - // ip will point to start of opcodes // ip2 will point to simple_name, source_file qstrs STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_t *prelude) { - prelude->n_state = mp_decode_uint(ip); - prelude->n_exc_stack = mp_decode_uint(ip); - prelude->scope_flags = *(*ip)++; - prelude->n_pos_args = *(*ip)++; - prelude->n_kwonly_args = *(*ip)++; - prelude->n_def_pos_args = *(*ip)++; + MP_BC_PRELUDE_SIG_DECODE(*ip); + prelude->n_state = n_state; + prelude->n_exc_stack = n_exc_stack; + prelude->scope_flags = scope_flags; + prelude->n_pos_args = n_pos_args; + prelude->n_kwonly_args = n_kwonly_args; + prelude->n_def_pos_args = n_def_pos_args; *ip2 = *ip; prelude->code_info_size = mp_decode_uint(ip2); *ip += prelude->code_info_size; @@ -175,8 +174,6 @@ STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_ } } -#endif - #endif // MICROPY_PERSISTENT_CODE_LOAD || MICROPY_PERSISTENT_CODE_SAVE #if MICROPY_PERSISTENT_CODE_LOAD @@ -285,19 +282,19 @@ STATIC mp_obj_t load_obj(mp_reader_t *reader) { } STATIC void load_prelude(mp_reader_t *reader, byte **ip, byte **ip2, bytecode_prelude_t *prelude) { - prelude->n_state = read_uint(reader, ip); - prelude->n_exc_stack = read_uint(reader, ip); - read_bytes(reader, *ip, 4); - prelude->scope_flags = *(*ip)++; - prelude->n_pos_args = *(*ip)++; - prelude->n_kwonly_args = *(*ip)++; - prelude->n_def_pos_args = *(*ip)++; - *ip2 = *ip; - prelude->code_info_size = read_uint(reader, ip2); - read_bytes(reader, *ip2, prelude->code_info_size - (*ip2 - *ip)); - *ip += prelude->code_info_size; - while ((*(*ip)++ = read_byte(reader)) != 255) { + // Read in the prelude + byte *ip_read = *ip; + read_uint(reader, &ip_read); // read in n_state/etc (is effectively a var-uint) + byte *ip_read_save = ip_read; + size_t code_info_size = read_uint(reader, &ip_read); // read in code_info_size + code_info_size -= ip_read - ip_read_save; // subtract bytes taken by code_info_size itself + read_bytes(reader, ip_read, code_info_size); // read remaining code info + ip_read += code_info_size; + while ((*ip_read++ = read_byte(reader)) != 255) { } + + // Entire prelude has been read into *ip, now decode and extract values from it + extract_prelude((const byte**)ip, (const byte**)ip2, prelude); } STATIC void load_bytecode(mp_reader_t *reader, qstr_window_t *qw, byte *ip, byte *ip_top) { diff --git a/py/profile.c b/py/profile.c index 8c4feaa11..230bb7cc2 100644 --- a/py/profile.c +++ b/py/profile.c @@ -40,12 +40,13 @@ STATIC uint mp_prof_bytecode_lineno(const mp_raw_code_t *rc, size_t bc) { void mp_prof_extract_prelude(const byte *bytecode, mp_bytecode_prelude_t *prelude) { const byte *ip = bytecode; - prelude->n_state = mp_decode_uint(&ip); - prelude->n_exc_stack = mp_decode_uint(&ip); - prelude->scope_flags = *ip++; - prelude->n_pos_args = *ip++; - prelude->n_kwonly_args = *ip++; - prelude->n_def_pos_args = *ip++; + MP_BC_PRELUDE_SIG_DECODE(ip); + prelude->n_state = n_state; + prelude->n_exc_stack = n_exc_stack; + prelude->scope_flags = scope_flags; + prelude->n_pos_args = n_pos_args; + prelude->n_kwonly_args = n_kwonly_args; + prelude->n_def_pos_args = n_def_pos_args; const byte *code_info = ip; size_t code_info_size = mp_decode_uint(&ip); diff --git a/py/runtime0.h b/py/runtime0.h index fd284d47b..1df6d0d58 100644 --- a/py/runtime0.h +++ b/py/runtime0.h @@ -28,9 +28,9 @@ // The first four must fit in 8 bits, see emitbc.c // The remaining must fit in 16 bits, see scope.h -#define MP_SCOPE_FLAG_VARARGS (0x01) +#define MP_SCOPE_FLAG_GENERATOR (0x01) #define MP_SCOPE_FLAG_VARKEYWORDS (0x02) -#define MP_SCOPE_FLAG_GENERATOR (0x04) +#define MP_SCOPE_FLAG_VARARGS (0x04) #define MP_SCOPE_FLAG_DEFKWARGS (0x08) #define MP_SCOPE_FLAG_REFGLOBALS (0x10) // used only if native emitter enabled #define MP_SCOPE_FLAG_HASCONSTS (0x20) // used only if native emitter enabled diff --git a/py/showbc.c b/py/showbc.c index 78b5b0141..216f35266 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -83,13 +83,8 @@ const mp_uint_t *mp_showbc_const_table; void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const mp_uint_t *const_table) { mp_showbc_code_start = ip; - // get bytecode parameters - mp_uint_t n_state = mp_decode_uint(&ip); - mp_uint_t n_exc_stack = mp_decode_uint(&ip); - /*mp_uint_t scope_flags =*/ ip++; - mp_uint_t n_pos_args = *ip++; - mp_uint_t n_kwonly_args = *ip++; - /*mp_uint_t n_def_pos_args =*/ ip++; + // Decode prelude + MP_BC_PRELUDE_SIG_DECODE(ip); const byte *code_info = ip; mp_uint_t code_info_size = mp_decode_uint(&code_info); @@ -123,8 +118,8 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m } printf("\n"); - printf("(N_STATE " UINT_FMT ")\n", n_state); - printf("(N_EXC_STACK " UINT_FMT ")\n", n_exc_stack); + printf("(N_STATE %u)\n", (unsigned)n_state); + printf("(N_EXC_STACK %u)\n", (unsigned)n_exc_stack); // for printing line number info const byte *bytecode_start = ip; diff --git a/py/vm.c b/py/vm.c index d59771b6e..82218fd97 100644 --- a/py/vm.c +++ b/py/vm.c @@ -1440,12 +1440,7 @@ unwind_loop: && *code_state->ip != MP_BC_END_FINALLY && *code_state->ip != MP_BC_RAISE_LAST) { const byte *ip = code_state->fun_bc->bytecode; - ip = mp_decode_uint_skip(ip); // skip n_state - ip = mp_decode_uint_skip(ip); // skip n_exc_stack - ip++; // skip scope_params - ip++; // skip n_pos_args - ip++; // skip n_kwonly_args - ip++; // skip n_def_pos_args + MP_BC_PRELUDE_SIG_DECODE(ip); size_t bc = code_state->ip - ip; size_t code_info_size = mp_decode_uint_value(ip); ip = mp_decode_uint_skip(ip); // skip code_info_size diff --git a/tests/cmdline/cmd_verbose.py.exp b/tests/cmdline/cmd_verbose.py.exp index 371c8c4b5..60b499c26 100644 --- a/tests/cmdline/cmd_verbose.py.exp +++ b/tests/cmdline/cmd_verbose.py.exp @@ -1,6 +1,6 @@ File cmdline/cmd_verbose.py, code block '' (descriptor: \.\+, bytecode \.\+ bytes) Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+): - 02 \.\+ + 08 \.\+ ######## \.\+63 arg names: diff --git a/tests/import/mpy_native.py b/tests/import/mpy_native.py index 25fceb9bb..03abab349 100644 --- a/tests/import/mpy_native.py +++ b/tests/import/mpy_native.py @@ -56,8 +56,8 @@ user_files = { '/mod1.mpy': ( b'M\x05\x0b\x1f\x20' # header - b'\x38' # n bytes, bytecode - b'\x01\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\xff' # prelude + b'\x24' # n bytes, bytecode + b'\x00\x05\x00\x00\x00\x00\xff' # prelude b'\x51' # LOAD_CONST_NONE b'\x63' # RETURN_VALUE diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index 4f8e965d7..8671a2395 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -152,13 +152,38 @@ def decode_uint(bytecode, ip): break return ip, unum +def read_prelude_sig(read_byte): + z = read_byte() + # xSSSSEAA + S = (z >> 3) & 0xf + E = (z >> 2) & 0x1 + F = 0 + A = z & 0x3 + K = 0 + D = 0 + n = 0 + while z & 0x80: + z = read_byte() + # xFSSKAED + S |= (z & 0x30) << (2 * n) + E |= (z & 0x02) << n + F |= ((z & 0x40) >> 6) << n + A |= (z & 0x4) << n + K |= ((z & 0x08) >> 3) << n + D |= (z & 0x1) << n + n += 1 + S += 1 + return S, E, F, A, K, D + def extract_prelude(bytecode, ip): - ip, n_state = decode_uint(bytecode, ip) - ip, n_exc_stack = decode_uint(bytecode, ip) - scope_flags = bytecode[ip]; ip += 1 - n_pos_args = bytecode[ip]; ip += 1 - n_kwonly_args = bytecode[ip]; ip += 1 - n_def_pos_args = bytecode[ip]; ip += 1 + def local_read_byte(): + b = bytecode[ip_ref[0]] + ip_ref[0] += 1 + return b + ip_ref = [ip] # to close over ip in Python 2 and 3 + n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args = read_prelude_sig(local_read_byte) + ip = ip_ref[0] + ip2, code_info_size = decode_uint(bytecode, ip) ip += code_info_size while bytecode[ip] != 0xff: @@ -557,12 +582,7 @@ def read_obj(f): assert 0 def read_prelude(f, bytecode): - n_state = read_uint(f, bytecode) - n_exc_stack = read_uint(f, bytecode) - scope_flags = read_byte(f, bytecode) - n_pos_args = read_byte(f, bytecode) - n_kwonly_args = read_byte(f, bytecode) - n_def_pos_args = read_byte(f, bytecode) + n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args = read_prelude_sig(lambda: read_byte(f, bytecode)) l1 = bytecode.idx code_info_size = read_uint(f, bytecode) l2 = bytecode.idx