Pastie now auto-senses if line-wrap is a bad or good idea. Feedback?
## mark a section (Learn more)
This paste will be private.
diff --git a/shotgun/lib/baker.c b/shotgun/lib/baker.c index 1c16e44..e5781d9 100644 --- a/shotgun/lib/baker.c +++ b/shotgun/lib/baker.c @@ -9,8 +9,13 @@ #include "shotgun/lib/baker.h" #include "shotgun/lib/tuple.h" +/* how many times object should be traced before tenuring */ #define DEFAULT_TENURE_AGE 6 +/* +creates and initializes new copying GC instance +it start with space A as "from" space and B as "to" space +*/ baker_gc baker_gc_new(int size) { baker_gc g; g = (baker_gc)calloc(1, sizeof(struct baker_gc_struct)); @@ -29,27 +34,32 @@ baker_gc baker_gc_new(int size) { return g; } +/* prints various garbage collector information */ void baker_gc_describe(baker_gc g) { - printf("Size: %x (%d)\n", (unsigned int)g->current->size, (int)g->current->size); - printf("Current: %p => %p\n", (void*)g->current->address, (void*)g->current->last); - printf("Next: %p => %p\n", (void*)g->next->address, (void*)g->next->last); - printf("RS Size: %zd\n", ptr_array_length(g->remember_set)); + printf("Size : %x (%d)\n", (unsigned int)g->current->size, (int)g->current->size); + printf("Current (\"from\") heap space address: %p => %p\n", (void*)g->current->address, (void*)g->current->last); + printf("Next (\"to\") heap space address: %p => %p\n", (void*)g->next->address, (void*)g->next->last); + printf("Remember set size: %zd\n", ptr_array_length(g->remember_set)); } +/* baker GC uses two heap spaces of the same size */ int baker_gc_memory_allocated(baker_gc g) { return g->current->size * 2; } +/* returns how much memory is used by "from" heap space */ int baker_gc_memory_in_use(baker_gc g) { return g->current->current - g->current->address; } +/* enlarges "to" space of the heap (where new objects are allocated) by value of sz */ int baker_gc_enlarge_next(baker_gc g, size_t sz) { rheap h; h = heap_new(sz); return baker_gc_set_next(g, h); } +/* sets "to" heap space */ int baker_gc_set_next(baker_gc g, rheap h) { if(g->next == g->space_a) { g->space_a = h; @@ -60,21 +70,24 @@ int baker_gc_set_next(baker_gc g, rheap h) { return TRUE; } +/* gets start address of current "from" heap space */ address baker_gc_start_address(baker_gc g) { return g->current->address; } +/* total baker GC memory usage */ size_t baker_gc_used(baker_gc g) { return g->used; } +/* reset memory usage info */ void baker_gc_reset_used(baker_gc g) { g->used = 0; } +/* performs flip operation on heap spaces */ int baker_gc_swap(baker_gc g) { rheap tmp; - tmp = g->current; g->current = g->next; g->next = tmp; @@ -83,8 +96,6 @@ int baker_gc_swap(baker_gc g) { g->last_end = (char*)tmp->current; heap_reset(tmp); - /* Reset used to the what the current has used. */ - /* g->used = (uintptr_t)g->current->current - (uintptr_t)g->current->address; */ return TRUE; } @@ -95,6 +106,7 @@ int baker_gc_destroy(baker_gc g) { return TRUE; } +/* sets forwarding pointer on object */ void baker_gc_set_forwarding_address(OBJECT obj, OBJECT dest) { SET_FORWARDED(obj); obj->klass = dest; @@ -118,25 +130,6 @@ OBJECT baker_gc_forwarded_object(OBJECT obj) { } \ ret; }) -/* -static inline OBJECT baker_gc_maybe_mutate(baker_gc g, OBJECT iobj) { - OBJECT ret; - - if(baker_gc_forwarded_p(iobj)) { - ret = baker_gc_forwarded_object(iobj); - } else if(baker_gc_contains_p(g, iobj)) { - ret = baker_gc_mutate_object(g, iobj); - } else { - ret = iobj; - } - - // assert(baker_gc_contains_spill_p(g, ret)); - - CHECK_PTR(ret); - - return ret; -} -*/ int _object_stores_bytes(OBJECT self); static int depth = 0; @@ -191,11 +184,12 @@ static void _mutate_references(STATE, baker_gc g, OBJECT iobj) { //printf("%d: Mutating references of %p\n", depth, iobj); if(!_object_stores_bytes(iobj)) { + /* follow object field references and mutate them */ fields = NUM_FIELDS(iobj); for(i = 0; i < fields; i++) { tmp = NTH_FIELD(iobj, i); if(!REFERENCE_P(tmp)) continue; - + /* TODO: duplicated in other places: extract to separate function? */ if(FORWARDED_P(tmp)) { mut = tmp->klass; } else if(heap_contains_p(g->current, tmp) || heap_contains_p(state->om->contexts, tmp)) { @@ -203,8 +197,9 @@ static void _mutate_references(STATE, baker_gc g, OBJECT iobj) { } else { mut = tmp; } - + /* update field reference */ SET_FIELD_DIRECT(iobj, i, mut); + /* update remember set, set "remembered" flag on iobj */ RUN_WB2(om, iobj, mut); } } else { @@ -341,8 +336,6 @@ void baker_gc_mutate_context(STATE, baker_gc g, OBJECT iobj, int shifted, int to assert(NIL_P(fc->sender) || fc->sender->obj_type == MContextType || fc->sender->obj_type == BContextType); } - - fc_mutate(method); fc_mutate(block); fc_mutate(literals); @@ -418,6 +411,7 @@ OBJECT baker_gc_mutate_object(STATE, baker_gc g, OBJECT obj) { xassert(obj->klass != Qnil); dest = heap_copy_object(g->next, obj); g->used++; + /* when object is moved to new heap forwarded pointer is left in new generation ("from") heap */ baker_gc_set_forwarding_address(obj, dest); if(!obj->ForeverYoung) INCREMENT_AGE(dest); if(obj->obj_type == WrapsStructType) MARK_WRAPPED_STRUCT(obj); @@ -480,10 +474,11 @@ unsigned int baker_gc_collect(STATE, baker_gc g, ptr_array roots) { size_t i, sz; OBJECT tmp, root; struct method_cache *end, *ent; + /* rs for remember set */ ptr_array rs; saved_contexts = 0; - + /* increase number of GC cycles passed */ g->num_collection++; /* empty it out. */ @@ -491,8 +486,7 @@ unsigned int baker_gc_collect(STATE, baker_gc g, ptr_array roots) { ptr_array_clear(g->tenured_objects); // printf("Running garbage collector...\n"); - - + /* start tracing from root set */ sz = ptr_array_length(roots); for(i = 0; i < sz; i++) { root = (OBJECT)(ptr_array_get_index(roots, i)); @@ -522,7 +516,7 @@ unsigned int baker_gc_collect(STATE, baker_gc g, ptr_array roots) { } - /* Now the stack. */ + /* Now the stack, sp is for stack pointer. */ OBJECT *sp; sp = state->current_stack; diff --git a/shotgun/lib/baker.h b/shotgun/lib/baker.h index 15dfe90..90ae1ea 100644 --- a/shotgun/lib/baker.h +++ b/shotgun/lib/baker.h @@ -3,6 +3,38 @@ #include "shotgun/lib/heap.h" +/* + Rubinius uses Henry Baker's generational GC that divides heap into spaces: + notably "from" space and "to" space. After tracing live objects are + moved from "from" space to "to" + space leaving forwarding address that points to new object location in "to" + space at the old object location. When object moved (also referenced as + "evacuated") it may point to objects in "from" heap space. So these + referenced objects are copied as well and pointers are updates. + This is called scavenging. Then "from" space can be reused and + heap spaces flipped. + + Objects tenured to old generation after surviving certain number of + GC cycles. + + Pros of this alrogithm is that it does not stop the world for too long + and leaves much much less heap fragmentation than naive mark and sweep + algorithm. + + Overview of Baker's algorithm is available online at + http://web.media.mit.edu/~lieber/Lieberary/GC/Realtime/Realtime.html + + space_a and space_b are two heap spaces used + current is "from" heap space + next is "to" heap space + used is how much memory overall heap uses + (offset = current heap peak position - heap bottom) + + tenure_age is how many times object should be traced before + tenuring: may vary between GC instances + num_collection is how many GC cycles passed + +*/ struct baker_gc_struct { rheap space_a; rheap space_b; diff --git a/shotgun/lib/cpu.c b/shotgun/lib/cpu.c index 2126070..fcc4e17 100644 --- a/shotgun/lib/cpu.c +++ b/shotgun/lib/cpu.c @@ -66,6 +66,7 @@ OBJECT cpu_scope_pop(STATE, cpu c) { return c->current_scope; } +/* initializes VM core: from global methods, main routine to current scope, thread and so forth */ void cpu_initialize_context(STATE, cpu c) { c->active_context = Qnil; c->depth = 0; diff --git a/shotgun/lib/cpu.h b/shotgun/lib/cpu.h index db833f3..962e53a 100644 --- a/shotgun/lib/cpu.h +++ b/shotgun/lib/cpu.h @@ -46,11 +46,12 @@ unsigned int sp; \ unsigned int fp; +/* just an optimisation: context with direct access to size */ struct fast_context { CPU_REGISTERS unsigned int size; }; - +/* fast context treats OBJECT as series of bytes instead of normal object */ #define FASTCTX(ctx) ((struct fast_context*)BYTES_OF(ctx)) /* 1Meg of stack */ @@ -62,6 +63,27 @@ struct fast_context { #define TASK_SET_FLAG(task, flag) (task->flags |= flag) #define TASK_CLEAR_FLAG(task, flag) (task->flags ^= flag) +/* + * stack_slave: when tasks are duplicated they share the same stack (TODO: clarify) + * cache_index is deprecated after SendSite introduction + * exception + * stack_top is pointer to the stack top + * stack_size is obviously the size of the stack + * enclosing_class + * active_context is context being executed + * home_context + * main + * paths + * depth is stack depth + * current_scope is current visibility scope + * ip_ptr is instruction pointer + * sp_ptr is stack pointer + * call_flags + * debug_channel + * flags + * control_channel + * blockargs is list of arguments passed to the block of executed method + */ #define CPU_TASK_REGISTERS long int args; \ unsigned long int stack_slave; \ long int cache_index; \ @@ -85,14 +107,32 @@ struct cpu_task_shared { CPU_TASK_REGISTERS; }; +/* + active: true for active tasks + saved_errno: error code saved when the trouble strikes +*/ struct cpu_task { CPU_TASK_REGISTERS; unsigned int active; int saved_errno; }; +/* + Normal registers are saved and restored per new method call. + Task registers are saved and restored when tasks are switched. + + self is current object which self pseudo variable points to + sender is message sender + locals is list of local variables of the scope + IP is for instruction pointer + SP is for stack pointer + FP is for execution frame pointer + + these point to different locations on the stack + see http://en.wikipedia.org/wiki/Call_stack for more details + + */ struct rubinius_cpu { - /* Normal registers are saved and restored per new method call . */ OBJECT self, sender; OBJECT locals; IP_TYPE *data; @@ -108,7 +148,6 @@ struct rubinius_cpu { OBJECT current_thread, main_thread; int in_primitive; - /* Task registers are saved and restored when tasks are switched. */ CPU_TASK_REGISTERS; }; diff --git a/shotgun/lib/environment.c b/shotgun/lib/environment.c index 64d0942..904bc8b 100644 --- a/shotgun/lib/environment.c +++ b/shotgun/lib/environment.c @@ -182,6 +182,7 @@ int environment_load_machine(environment e, machine m) { return TRUE; } +/* NOTE: this duplicates rubinius_global from environment.h */ struct thread_args { environment e; machine m; diff --git a/shotgun/lib/environment.h b/shotgun/lib/environment.h index ee781d6..685ae80 100644 --- a/shotgun/lib/environment.h +++ b/shotgun/lib/environment.h @@ -4,6 +4,17 @@ #include <ev.h> #include "shotgun/lib/machine.h" +/* + Rubinius environment stores load paths, + platform configuration, list of spawned machines, + event loop + and synchronization mutex. + + One environment is automatically created on + VM start. + + Each environment lives in it's own pthread + */ struct rubinius_environment { pthread_mutex_t mutex; struct hashtable *machines; @@ -12,6 +23,7 @@ struct rubinius_environment { char *platform_path; char *core_path; char *loader_path; + int machine_id; struct hashtable *messages; diff --git a/shotgun/lib/hash.c b/shotgun/lib/hash.c index 4e6d5c6..cca840d 100644 --- a/shotgun/lib/hash.c +++ b/shotgun/lib/hash.c @@ -191,7 +191,7 @@ OBJECT hash_add(STATE, OBJECT h, unsigned int hsh, OBJECT key, OBJECT data) { // printf("hash_add: adding %od\n",hsh); entry = hash_find_entry(state, h, hsh); - + if(RTEST(entry)) { tuple_put(state, entry, 2, data); return data; @@ -207,6 +207,7 @@ OBJECT hash_add(STATE, OBJECT h, unsigned int hsh, OBJECT key, OBJECT data) { return data; } +/* Sets key/value pair to hash */ OBJECT hash_set(STATE, OBJECT hash, OBJECT key, OBJECT val) { return hash_add(state, hash, object_hash_int(state, key), key, val); } diff --git a/shotgun/lib/heap.c b/shotgun/lib/heap.c index 2a2b45a..5768ae1 100644 --- a/shotgun/lib/heap.c +++ b/shotgun/lib/heap.c @@ -34,19 +34,21 @@ int heap_allocate_memory(rheap h) { h->last = (void*)((uintptr_t)h->address + h->size - 1); return heap_reset(h); } - +/* make current and scan position point to heap bottom */ int heap_reset(rheap h) { h->current = h->address; h->scan = h->current; return TRUE; } - +n +/* heap allocation predicate */ int heap_allocated_p(rheap h) { return h->address > 0; } #ifndef FAST_HEAP +/* whether given address is between heap bottom and heap top */ int heap_contains_p(rheap h, address addr) { if(addr < h->address) return FALSE; @@ -65,6 +67,7 @@ address heap_allocate(rheap h, int size) { return addr; } +/* whether SIZE bytes fit the heap without calling for heap enlarge */ int heap_enough_space_p(rheap h, int size) { if (size < 0) abort(); if(h->current + size > h->last + 1) return FALSE; @@ -73,6 +76,7 @@ int heap_enough_space_p(rheap h, int size) { #endif +/* whether number of fields fit the heap without calling for heap enlarge */ int heap_enough_fields_p(rheap h, int fields) { int size; @@ -85,6 +89,7 @@ int heap_enough_fields_p(rheap h, int fields) { OBJECT heap_copy_object(rheap h, OBJECT obj) { address out; int size; + /* avoid copying what's already on the heap */ if(heap_contains_p(h, obj)) return obj; size = SIZE_IN_BYTES(obj); @@ -94,7 +99,11 @@ OBJECT heap_copy_object(rheap h, OBJECT obj) { return (OBJECT)out; } -/* Functions to support the cheney scan algorithm. */ +/* + * Shotgun uses very slight variation Cheney's algorithm for young generation. + * + * See http://en.wikipedia.org/wiki/Cheney_algorithm + */ OBJECT heap_next_object(rheap h) { return (OBJECT)(h->current); @@ -104,6 +113,7 @@ int heap_fully_scanned_p(rheap h) { return h->scan == h->current; } +/* makes scan point to next object location */ OBJECT heap_next_unscanned(rheap h) { OBJECT obj; if(heap_fully_scanned_p(h)) return 0; diff --git a/shotgun/lib/heap.h b/shotgun/lib/heap.h index 08265fb..09a9fe5 100644 --- a/shotgun/lib/heap.h +++ b/shotgun/lib/heap.h @@ -5,9 +5,13 @@ typedef void* address; struct heap { size_t size; + /* lower bound */ address address; + /* current tip of the heap */ address current; + /* upper bound */ address last; + /* GC's scanner position */ address scan; }; @@ -27,6 +31,7 @@ int heap_fully_scanned_p(rheap h); OBJECT heap_next_unscanned(rheap h); int heap_enough_fields_p(rheap h, int fields); +/* controls fast heap allocation using inline functions on and off */ #define FAST_HEAP 1 #ifdef FAST_HEAP @@ -68,4 +73,3 @@ unsigned int heap_enough_space_p(rheap h, unsigned int size); #endif #endif - diff --git a/shotgun/lib/machine.c b/shotgun/lib/machine.c index 1775975..62b795d 100644 --- a/shotgun/lib/machine.c +++ b/shotgun/lib/machine.c @@ -32,32 +32,45 @@ static int _recursive_reporting = 0; +/* use this to convert symbol into Ruby string + * st is for state + */ #define SYM2STR(st, sym) string_byte_address(st, rbs_symbol_to_string(st, sym)) +/* outputs limited number of lines of VM call stack */ void machine_print_callstack_limited(machine m, int maxlev) { + /* context we are in */ OBJECT context, tmp; + /* module name, method name and file name */ const char *modname, *methname, *filename; + /* means of optimisation: fast context treats OBJECT as series of bytes instead of normal object */ struct fast_context *fc; - + /* use current machine if not explicitly given */ if(!m) m = current_machine; - + /* get current context */ context = m->c->active_context; + /* flush stack and instruction pointers */ cpu_flush_ip(m->c); cpu_flush_sp(m->c); + /* make VM instruction pointer point to context instruction pointer */ FASTCTX(context)->ip = m->c->ip; - + + /* while there's a context and trace level is not reached */ while(RTEST(context) && maxlev--) { + methctx_reference(m->s, context); fc = FASTCTX(context); - + + /* figure out module name */ if(fc->method_module && RTEST(fc->method_module)) { modname = SYM2STR(m->s, module_get_name(fc->method_module)); } else { modname = "<none>"; } - + + /* figure out method name */ if(fc->type == FASTCTX_BLOCK) { methname = "<block>"; } else if(fc->name && RTEST(fc->name)) { @@ -70,6 +83,7 @@ void machine_print_callstack_limited(machine m, int maxlev) { methname = "<none>"; } + /* figure out filename */ if(fc->method && RTEST(fc->method)) { tmp = cmethod_get_file(fc->method); if(SYMBOL_P(tmp)) { @@ -81,24 +95,31 @@ void machine_print_callstack_limited(machine m, int maxlev) { filename = "<unknown>"; } + /* execution logging */ fprintf(stderr, "%10p %s#%s+%d in %s:%d\n", (void*)context, modname, methname, fc->ip, filename, cpu_ip2line(m->s, fc->method, fc->ip) ); + /* transfer control back to message sender */ context = fc->sender; } } +/* prints the whole VM call stack */ void machine_print_callstack(machine m) { machine_print_callstack_limited(m, -1); } +/* prints of given VM */ void machine_print_stack(machine m) { unsigned int i, start, end; + /* flush stack pointer */ cpu_flush_sp(m->c); + /* get stack pointer */ i = m->c->sp; + /* ASK */ start = (i < 5 ? 0 : i - 5); end = (i + 5 > m->c->stack_size) ? m->c->stack_size : i + 5; for(i = start; i < end; i++) { @@ -112,8 +133,11 @@ void machine_print_stack(machine m) { } +/* prints VM registers content */ void machine_print_registers(machine m) { + /* flush stack pointer */ cpu_flush_sp(m->c); + /* flush instruction pointer */ cpu_flush_ip(m->c); printf("IP: %04d\nSP: %04d\n", m->c->ip, m->c->sp); if(NIL_P(m->c->exception)) { @@ -123,18 +147,23 @@ void machine_print_registers(machine m) { } } +/* handles error reporting */ void _machine_error_reporter(int sig, siginfo_t *info, void *ctx) { + /* name of signal VM recieved */ const char *signame; + /* Rubinius native interface context */ rni_context *rni_ctx; OBJECT addr; /* See if the error happened during the running of a C function. If so, we raise an exception about the error. */ + /* Grab Subtend context first */ rni_ctx = subtend_retrieve_context(); if(rni_ctx->nmc && rni_ctx->nmc->system_set) { /* TODO: generate the C backtrace as a string array and pass it via the nmc or global_context so that the exception can include it. */ + /* Set RNI faulting instruction and switch to system context */ rni_ctx->fault_address = info->si_addr; rni_ctx->nmc->jump_val = SEGFAULT_DETECTED; setcontext(&rni_ctx->nmc->system); @@ -192,21 +221,27 @@ void _machine_error_reporter(int sig, siginfo_t *info, void *ctx) { exit(-2); } +/* initialize signals handling and errors reporting */ void machine_setup_signals(machine m) { - m->error_report.sa_sigaction = _machine_error_reporter; - sigemptyset(&m->error_report.sa_mask); - m->error_report.sa_flags = SA_SIGINFO; - sigaction(SIGSEGV, &m->error_report, NULL); - sigaction(SIGBUS, &m->error_report, NULL); - sigaction(SIGABRT, &m->error_report, NULL); + m->error_report.sa_sigaction = _machine_error_reporter; + sigemptyset(&m->error_report.sa_mask); + m->error_report.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &m->error_report, NULL); + sigaction(SIGBUS, &m->error_report, NULL); + sigaction(SIGABRT, &m->error_report, NULL); } +/* initialize event base used by libevent */ static void machine_setup_events(machine m) { /* libev will not "autodetect" kqueue because it is broken on darwin */ m->s->event_base = ev_loop_new(EVFLAG_FORKCHECK); m->s->thread_infos = NULL; } +/* Creates and initializes Rubinius VM. + * Sets up VM CPU, subtend, context and state, universe and everything. + * + */ machine machine_new(environment e) { machine m; int pipes[2]; @@ -220,21 +255,24 @@ machine machine_new(environment e) { /* Setup pipes used for message notification. */ m->message_read_fd = pipes[0]; m->message_write_fd = pipes[1]; - + /* initialize VM state */ m->s = rubinius_state_new(); + /* allocate memory for VM cpu and initialize it's paths list */ m->c = cpu_new(m->s); /* Initialize the instruction addresses. */ cpu_run(m->s, m->c, TRUE); m->c->ip_ptr = &m->s->external_ip; + /* setup signals and events handling */ machine_setup_signals(m); machine_setup_events(m); + cpu_initialize(m->s, m->c); cpu_bootstrap(m->s); subtend_setup(m->s); cpu_setup_top_scope(m->s, m->c); cpu_initialize_context(m->s, m->c); - + /* make MAIN Ruby contant point to main routine of the application*/ machine_set_const(m, "MAIN", m->c->main); cpu_task_configure_preemption(m->s); environment_add_machine(e, m); @@ -244,20 +282,27 @@ machine machine_new(environment e) { return m; } +/* destroys VM cpu and state objects, frees memory */ void machine_destroy(machine m) { cpu_destroy(m->c); state_destroy(m->s); free(m); } +/* handles errors gracefully */ void machine_handle_fire(int kind) { + /* store type of violation */ current_machine->g_access_violation = kind; + /* then switch to special context to handle it gracefully */ setcontext(¤t_machine->g_firesuit); } +/* handles type errors in the VM */ void machine_handle_type_error(OBJECT obj, const char *message) { + /* store error message */ current_machine->g_firesuit_message = strdup(message); - + + /* make errors handle to know error type */ if(FIXNUM_P(obj)) { current_machine->g_firesuit_arg = FixnumType; } else if(SYMBOL_P(obj)) { @@ -269,21 +314,25 @@ void machine_handle_type_error(OBJECT obj, const char *message) { } else { current_machine->g_firesuit_arg = 0; } - + /* now handle it */ machine_handle_fire(FIRE_TYPE); } +/* handles failed assertions in the VM code */ void machine_handle_assert(const char *reason, const char *file, int line) { fprintf(stderr, "VM Assertion: %s (%s:%d)\n", reason, file, line); + /* print the whole backtrace */ printf("\nRuby backtrace:\n"); machine_print_callstack(current_machine); + /* if firesuit is off exit otherwise store error type and switch to special handling context */ if(!current_machine->g_use_firesuit) abort(); current_machine->g_access_violation = FIRE_ASSERT; setcontext(¤t_machine->g_firesuit); } +/* returns unmarshalled file */ OBJECT machine_load_file(machine m, const char *path) { return cpu_unmarshal_file(m->s, path, 0); } @@ -306,10 +355,12 @@ void machine_show_exception(machine m, OBJECT exc) { puts(""); } +/* initializes VM globals, clear instruction pointer, op, firesuit and so forth */ int machine_run(machine m) { + /* here's where the actual initialization starts */ cpu_run(m->s, m->c, 0); m->c->ip_ptr = &m->s->external_ip; - + /* report if there's an exception */ if(RTEST(m->c->exception)) { printf("Toplevel exception detected.\n"); machine_show_exception(m, m->c->exception); @@ -318,6 +369,7 @@ int machine_run(machine m) { return TRUE; } +/* loads and executes a script */ int machine_run_file(machine m, const char *path) { OBJECT meth; int out; @@ -325,13 +377,14 @@ int machine_run_file(machine m, const char *path) { if(m->s->excessive_tracing) { printf("[ Loading file %s]\n", path); } - + meth = machine_load_file(m, path); if(!RTEST(meth)) { printf("Unable to load '%s'.\n", path); return FALSE; } - + + /* re-init cpu stack */ m->c->depth = 0; cpu_stack_push(m->s, m->c, meth, FALSE); cpu_run_script(m->s, m->c, meth); @@ -342,16 +395,19 @@ int machine_run_file(machine m, const char *path) { return out; } +/* sets contstand under given module or class */ void machine_set_const_under(machine m, const char *str, OBJECT val, OBJECT under) { OBJECT tbl; tbl = module_get_constants(under); lookuptable_store(m->s, tbl, string_new(m->s, str), val); } +/* Sets constant under Object class */ void machine_set_const(machine m, const char *str, OBJECT val) { machine_set_const_under(m, str, val, m->s->global->object); } +/* stores Ruby VM launch arguments */ void machine_save_args(machine m, int argc, char **argv) { char **na; na = calloc(argc, sizeof(char*)); @@ -363,6 +419,7 @@ void machine_save_args(machine m, int argc, char **argv) { machine_setup_argv(m, argc, argv); } +/* Sets standard IO streams (stdin, stdout, stderr) to Ruby constants */ void machine_setup_standard_io(machine m) { machine_set_const(m, "STDIN", io_new(m->s, 0, "r")); machine_set_const(m, "STDOUT", io_new(m->s, 1, "w")); @@ -391,6 +448,7 @@ int *machine_setup_piped_io(machine m) { return pipes; } +/* sets up RUBY_BIN_PATH Ruby constant and VM interpreter name */ void machine_setup_ruby(machine m, char *name) { char buf[MAXPATHLEN]; char wd[MAXPATHLEN]; @@ -407,6 +465,7 @@ void machine_setup_ruby(machine m, char *name) { m->interpreter = strdup(name); } +/* sets ARGV and ARG0 Ruby constants */ void machine_setup_argv(machine m, int argc, char **argv) { OBJECT ary; int i; @@ -421,6 +480,7 @@ void machine_setup_argv(machine m, int argc, char **argv) { machine_set_const(m, "ARGV", ary); } +/* utility: checks whether string contains only digits */ int is_number(char *str) { while(*str) { if(!isdigit(*str)) return FALSE; @@ -430,6 +490,7 @@ int is_number(char *str) { return TRUE; } +/* utility: strips trailing non-alnum chars from string */ static char *trim_str(char *str) { int i; while(*str && !isalnum(*str)) str++; @@ -508,20 +569,27 @@ void machine_parse_config_file(machine m, const char *path) { } void machine_migrate_config(machine m) { + /* hash table iterator */ struct hashtable_itr iter; + /* VM state */ rstate state = m->s; - + + /* initialize new hash for environment global configuration */ m->s->global->config = hash_new_sized(m->s, 500); - + + /* unless config is empty */ if(hashtable_count(m->s->config) > 0) { - + /* Iterate through it */ hashtable_iterator_init(&iter, m->s->config); do { + /* Key, value */ OBJECT ok, ov; bstring k = (bstring)hashtable_iterator_key(&iter); bstring v = (bstring)hashtable_iterator_value(&iter); + /* object key: Ruby string created from bstring library C string */ ok = string_newfrombstr(m->s, k); + /* ASK */ if(is_number(bdata(v))) { ov = LL2N(strtoll(bdatae(v,""), NULL, 10)); } else { @@ -532,11 +600,12 @@ void machine_migrate_config(machine m) { } while (hashtable_iterator_advance(&iter)); } - + /* Make RUBY_CONFIG point to global configuration */ machine_set_const(m, "RUBY_CONFIG", m->s->global->config); machine_setup_from_config(m); } +/* applies debug configuraiton options to VM state */ void machine_setup_from_config(machine m) { bstring s; @@ -555,6 +624,7 @@ void machine_setup_from_config(machine m) { bdestroy (s); } +/* initializes platform and arc related Ruby constants like RUBY_PLATFORM, OS and L64 */ void machine_setup_config(machine m) { OBJECT mod; STATE; @@ -698,6 +768,7 @@ void machine_setup_config(machine m) { machine_set_const_under(m, "MESSAGE_IO", io_new(m->s, m->message_read_fd, "r"), mod); } +/* sets up debugging flags */ void machine_config_env(machine m) { char *config; if(getenv("RDEBUG")) { @@ -719,6 +790,7 @@ void machine_config_env(machine m) { } } +/* loads files in the directory, respects load order hint file .load_order.txt */ int machine_load_directory(machine m, const char *prefix) { char *path; char *file; @@ -792,6 +864,12 @@ int machine_load_object(machine m, char *name, uint8_t *data, long length) { return TRUE; } +/* + * loads and executes Rubinius bytecode archive (*.rba, similar to *.jar or *.elc) + * + * Rubinius' rba files are essentialy zip archived bytecode. .load_order file + * must be loaded first to respect bytecode dependencies. + */ int machine_load_ar(machine m, const char *path) { int ret = FALSE; @@ -815,6 +893,7 @@ int machine_load_rba(machine m, const char *path) { int machine_load_bundle(machine m, const char *path) { struct stat sb; + /* return false if file does no exist */ if(stat(path, &sb) != 0) return FALSE; if(S_ISDIR(sb.st_mode)) { @@ -824,6 +903,11 @@ int machine_load_bundle(machine m, const char *path) { return machine_load_rba(m, path); } +/* 1. saves the command line arguments used to invoke the VM. + * 2. initializes platform and arc related Ruby constants + * 3. sets up debugging flags from configuration + * 4. sets standard IO streams (stdin, stdout, stderr) to Ruby constants STDIN, STDOUT and STDERR + */ void machine_setup_normal(machine m, int argc, char **argv) { machine_save_args(m, argc, argv); machine_setup_config(m); @@ -831,6 +915,13 @@ void machine_setup_normal(machine m, int argc, char **argv) { machine_setup_standard_io(m); } +/* + * 1. sets inferior machine flag to true (inferior machine is one spawned by another, a "child") + * 2. saves the command line arguments used to invoke the VM. + * 3. initializes platform and arc related Ruby constants + * 4. sets up debugging flags from configuration + * 5. sets up piped IO streams (stdin, stdout, stderr) to Ruby constants STDIN, STDOUT and STDERR + */ int *machine_setup_thread(machine m, int argc, char **argv) { m->sub = TRUE; machine_save_args(m, argc, argv); @@ -838,4 +929,3 @@ int *machine_setup_thread(machine m, int argc, char **argv) { machine_config_env(m); return machine_setup_piped_io(m); } - diff --git a/shotgun/lib/machine.h b/shotgun/lib/machine.h index c5e82b4..98f46a6 100644 --- a/shotgun/lib/machine.h +++ b/shotgun/lib/machine.h @@ -9,13 +9,37 @@ typedef struct rubinius_machine *machine; #include "shotgun/lib/shotgun.h" #include "shotgun/lib/environment.h" +/* + Rubinius supports multiple VM (MVM) feature: machines may be spawned off other machines, + every machine has an identifier which is incremented when new machine is added to environment. + + Machines pass messages to each other thus concurrency is cooperative like in Erlang. + Incoming and outcoming messages are passed using stream descriptors. + Incoming messages queue can be accessed in Ruby as MESSAGE_IO constant. + + Each VM operates in a separate pthread. Spawned VMs are known as "inferior VMs", + this is reflected by VM_INFERIOR constant value in Ruby. VMs has name and keep + arguments (notably argc/argv) passed on run. + + To carry VM context around Rubinius uses rstate structure. It contains a variety of things + from object memory to stack frames state and so forth. + + If SIGSEGV/SIGBUS/SIGABRT happens during the execution it is handled by special context + known as firesuite. Error message and type are accessible and shown in VM backtrace. + + To force Rubinius VM use print additional information set show_config flag to 1. + + Rubinius uses CPU abstraction for it's virtual machine. + */ struct rubinius_machine { int id; int parent_id; pthread_t pthread; + /* whether it is an inferior VM: i.e. spawn by another VM. In Ruby */ int sub; int message_read_fd; int message_write_fd; + /* VM state: carried around to keep VM context */ rstate s; cpu c; struct sigaction error_report; @@ -23,11 +47,13 @@ struct rubinius_machine { int argc; char **argv; int show_config; - ucontext_t g_firesuit; /* work around a bug in 10.5's libc versus header files */ #if defined(__APPLE__) && defined(HAS_UCONTEXT) /* patch for tiger */ _STRUCT_MCONTEXT __system_mc; #endif + +/* these members relate to segfaults handling so they get reported correctly and VM exit gracefully(ish) */ + ucontext_t g_firesuit; int g_use_firesuit; int g_access_violation; int g_firesuit_arg; diff --git a/shotgun/lib/object.h b/shotgun/lib/object.h index 5af6e35..5b5bca9 100644 --- a/shotgun/lib/object.h +++ b/shotgun/lib/object.h @@ -52,7 +52,7 @@ static inline void object_copy_body(STATE, OBJECT self, OBJECT dest) { memcpy(object_byte_start(state, dest), object_byte_start(state, self), s1); } - +/* Ruby's is_a? */ #define ISA(o, c) object_kind_of_p(state, o, c) static inline uintptr_t object_get_id(STATE, OBJECT self) { diff --git a/shotgun/lib/object_memory.h b/shotgun/lib/object_memory.h index b9c7746..fc5afc7 100644 --- a/shotgun/lib/object_memory.h +++ b/shotgun/lib/object_memory.h @@ -15,17 +15,26 @@ #define OMCollectYoung 0x1 #define OMCollectMature 0x2 +/* set of flags */ struct object_memory_struct { + /* */ int collect_now; + /* */ int enlarge_now; + /* */ int tenure_now; + /* */ int new_size; + /* */ int last_object_id; + /* Rubinius uses Baker's generational GC as well asm marking and sweeping */ baker_gc gc; mark_sweep_gc ms; + /* */ int last_tenured; + /* */ int bootstrap_loaded; - + /* */ rheap contexts; /* The first not referenced stack context */ OBJECT context_bottom; diff --git a/shotgun/lib/oop.h b/shotgun/lib/oop.h index 5631dc0..7c6e18c 100644 --- a/shotgun/lib/oop.h +++ b/shotgun/lib/oop.h @@ -20,7 +20,9 @@ typedef intptr_t native_int; #define TAG_REF 0x0 #define TAG_FIXNUM 0x1 +/* literals are numbers, symbols, ranges, regexp */ #define TAG_LITERAL 0x2 + #define TAG_DATA 0x3 #define TAG(v) (((intptr_t)v) & TAG_MASK) @@ -213,16 +215,18 @@ A rubinius object can be followed by: /* Object access, lowest level. These read and set fields of an OBJECT * directly. They're built on to integrate with the GC properly. */ - #define CLASS_OBJECT(obj) (obj->klass) #define SIZE_OF_OBJECT ((unsigned int)(sizeof(OBJECT))) - #define NUM_FIELDS(obj) (obj->field_count) #define SET_NUM_FIELDS(obj, fel) (obj->field_count = fel) +/* size of rubinius_object_t plus size of fields (all in bytes) */ #define SIZE_IN_BYTES_FIELDS(fel) ((unsigned int)(sizeof(struct rubinius_object_t) + \ fel*SIZE_OF_OBJECT)) +/* size of rubinius_object_t and fields in words */ #define SIZE_IN_WORDS_FIELDS(fel) (sizeof(struct rubinius_object_t)/SIZE_OF_OBJECT + fel) +/* size of object in bytes */ #define SIZE_IN_BYTES(obj) SIZE_IN_BYTES_FIELDS(obj->field_count) +/* size of fields only */ #define SIZE_OF_BODY(obj) (obj->field_count * SIZE_OF_OBJECT) #define ADDRESS_OF_FIELD(obj, fel) (&obj->field[fel]) #define NTH_FIELD_DIRECT(obj, fel) (obj->field[fel]) @@ -259,18 +263,24 @@ to be a simple test for that bit pattern. #define Qtrue ((OBJECT)10L) #define Qundef ((OBJECT)18L) +/* true if v is Ruby false */ #define FALSE_P(v) ((OBJECT)(v) == (OBJECT)Qfalse) +/* true if v is Ruby true */ #define TRUE_P(v) ((OBJECT)(v) == (OBJECT)Qtrue) +/* true if v is Ruby nil */ #define NIL_P(v) ((OBJECT)(v) == (OBJECT)Qnil) +/* true if value of v is undefined */ #define UNDEF_P(v) ((OBJECT)(v) == (OBJECT)Qundef) +/* true if v is not nil */ #define RTEST(v) (((uintptr_t)(v) & 0x7) != 0x6) - +/* true if v is a reference */ #define REFERENCE_P(v) (TAG(v) == TAG_REF) - +/* same as REFERENCE_P but checks v value first */ #define REFERENCE2_P(v) (v && REFERENCE_P(v)) #define INDEXED(obj) (REFERENCE_P(obj) && !obj->StoresBytes) +/* copy flags not used by garbage collector */ static inline void object_copy_nongc_flags(OBJECT target, OBJECT source) { target->obj_type = source->obj_type; @@ -282,17 +292,20 @@ static inline void object_copy_nongc_flags(OBJECT target, OBJECT source) } #define CLEAR_FLAGS(obj) (obj)->all_flags = 0 +/* stack context has GC zone unspecified */ #define stack_context_p(obj) ((obj)->gc_zone == UnspecifiedZone) +/* use this to check forwarded pointer on object: + * when object is copied from space to space by GC, + * forwarding pointer is left in old location + */ #define SET_FORWARDED(obj) (obj)->Forwarded = TRUE #define FORWARDED_P(obj) ((obj)->Forwarded) - +/* objects are getting old after surviving GC tracing */ #define AGE(obj) (obj->copy_count) #define CLEAR_AGE(obj) (obj->copy_count = 0) #define INCREMENT_AGE(obj) (obj->copy_count++) /* Object access. */ - -/* Setting the class of an object */ #define rbs_set_class(om, obj, cls) ({ \ OBJECT _o = (obj), _c = (cls); \ RUN_WB2(om, _o, _c); _o->klass = _c; }) @@ -331,10 +344,12 @@ static inline void object_copy_nongc_flags(OBJECT target, OBJECT source) RUN_WB2(om, _o, _v); \ SET_FIELD_DIRECT(_o, fel, _v); }) +/* alias macro for obtaining object field directly by position */ #define rbs_get_field(obj, fel) NTH_FIELD_DIRECT(obj, fel) #else /*DISABLE_CHECKS*/ +/* compared to _bad_reference prints more verbose error message */ static void _bad_reference2(OBJECT in, int fel) { printf("Attempted to access field %d in an object with %lu fields.\n", fel, (unsigned long)NUM_FIELDS(in)); @@ -346,8 +361,11 @@ static void _bad_reference2(OBJECT in, int fel) { #if EXTRA_PROTECTION +/* versions that check for reference */ + static void _bad_reference(OBJECT in) { printf("Attempted to access field of non-reference.\n"); + /* handle segfault */ if(current_machine->g_use_firesuit) { machine_handle_fire(FIRE_NULL); } @@ -369,7 +387,7 @@ static void _bad_reference(OBJECT in) { #else /*EXTRA_PROTECTION*/ -/* These are the typically used versions. The don't check for ref, they +/* These are the typically used versions. They don't check for ref, they the segfault handler do that. */ #define rbs_set_field(om, obj, fel, val) ({ \ @@ -400,6 +418,7 @@ static void _bad_reference(OBJECT in) { /* Type tests. */ #define RTYPE(obj,type) (REFERENCE_P(obj) && obj->obj_type == type) +/* shortcut, doing what Ruby's is_a? does */ #define RISA(obj,cls) (REFERENCE_P(obj) && ISA(obj,BASIC_CLASS(cls))) #define BIGNUM_P(obj) (RTYPE(obj, BignumType)) diff --git a/shotgun/lib/shotgun.h b/shotgun/lib/shotgun.h index 0f9fb5e..15a2c9b 100644 --- a/shotgun/lib/shotgun.h +++ b/shotgun/lib/shotgun.h @@ -2,7 +2,7 @@ #define RBS_SHOTGUN_H #define INTERNAL_DEBUG 0 - +/* whether tracking various VM stats */ #define TRACK_STATS 0 #define DISABLE_CHECKS 1 // #define TIME_LOOKUP 1 diff --git a/shotgun/lib/state.h b/shotgun/lib/state.h index f070a4e..958a4b2 100644 --- a/shotgun/lib/state.h +++ b/shotgun/lib/state.h @@ -137,6 +137,7 @@ struct rubinius_state { rni_handle_table *handle_tbl; + /* pointer to bottom of the stack */ unsigned long *stack_bottom; struct hashtable *cleanup; @@ -146,9 +147,10 @@ struct rubinius_state { void *thread_infos; unsigned int event_id; + /* Stuff sampling profiler uses, not critical for VM operations */ OBJECT *samples; int max_samples, cur_sample; - + /* again, profiler stats */ int excessive_tracing, gc_stats; int check_events, pending_threads, pending_events; diff --git a/shotgun/lib/string.c b/shotgun/lib/string.c index b50c926..ba8d07b 100644 --- a/shotgun/lib/string.c +++ b/shotgun/lib/string.c @@ -109,6 +109,7 @@ OBJECT string_append(STATE, OBJECT self, OBJECT other) { return self; } +/* returns pointer to bytearray string based on */ char *string_byte_address(STATE, OBJECT self) { OBJECT data; diff --git a/shotgun/lib/subtend/nmc.h b/shotgun/lib/subtend/nmc.h index 8e88102..aaa02b0 100644 --- a/shotgun/lib/subtend/nmc.h +++ b/shotgun/lib/subtend/nmc.h @@ -4,7 +4,7 @@ #include "shotgun/lib/cpu.h" #include "shotgun/lib/subtend/nmethod.h" - +/* Rubinius native interface: native method context */ struct rni_nmc { int num_handles; int used;
From the Design Piracy series on my blog: