Pastie now auto-senses if line-wrap is a bad or good idea. Feedback?
## mark a section (Learn more)
This paste will be private.
diff --git a/shotgun/lib/baker.c b/shotgun/lib/baker.c index 1c16e44..1931bf9 100644 --- a/shotgun/lib/baker.c +++ b/shotgun/lib/baker.c @@ -9,9 +9,12 @@ #include "shotgun/lib/baker.h" #include "shotgun/lib/tuple.h" +/* how many times object should be traced before tenuring */ #define DEFAULT_TENURE_AGE 6 +/* creates and initializes new copying GC instance */ baker_gc baker_gc_new(int size) { + /* holds copying GC state */ baker_gc g; g = (baker_gc)calloc(1, sizeof(struct baker_gc_struct)); g->remember_set = ptr_array_new(8); @@ -20,6 +23,7 @@ baker_gc baker_gc_new(int size) { g->space_a = heap_new(size); g->space_b = heap_new(size); + /* start with space A as "from" space and B as "to" space */ g->current = g->space_a; g->next = g->space_b; g->used = 0; @@ -29,13 +33,15 @@ baker_gc baker_gc_new(int size) { return g; } +/* prints various garbage collector information */ void baker_gc_describe(baker_gc g) { - printf("Size: %x (%d)\n", (unsigned int)g->current->size, (int)g->current->size); - printf("Current: %p => %p\n", (void*)g->current->address, (void*)g->current->last); - printf("Next: %p => %p\n", (void*)g->next->address, (void*)g->next->last); - printf("RS Size: %zd\n", ptr_array_length(g->remember_set)); + printf("Size : %x (%d)\n", (unsigned int)g->current->size, (int)g->current->size); + printf("Current (\"from\") heap space address: %p => %p\n", (void*)g->current->address, (void*)g->current->last); + printf("Next (\"to\") heap space address: %p => %p\n", (void*)g->next->address, (void*)g->next->last); + printf("Remember set size: %zd\n", ptr_array_length(g->remember_set)); } +/* baker GC uses two heap spaces of the same size */ int baker_gc_memory_allocated(baker_gc g) { return g->current->size * 2; } @@ -44,12 +50,14 @@ int baker_gc_memory_in_use(baker_gc g) { return g->current->current - g->current->address; } +/* enlarges "to" space of the heap (where new objects are allocated) by value of sz */ int baker_gc_enlarge_next(baker_gc g, size_t sz) { rheap h; h = heap_new(sz); return baker_gc_set_next(g, h); } +/* sets "to" heap space */ int baker_gc_set_next(baker_gc g, rheap h) { if(g->next == g->space_a) { g->space_a = h; @@ -60,21 +68,25 @@ int baker_gc_set_next(baker_gc g, rheap h) { return TRUE; } +/* gets start address of current "from" heap space */ address baker_gc_start_address(baker_gc g) { return g->current->address; } +/* Baker GC memory usage */ size_t baker_gc_used(baker_gc g) { return g->used; } +/* reset memory usage info */ void baker_gc_reset_used(baker_gc g) { g->used = 0; } +/* performs flip operation on heap spaces */ int baker_gc_swap(baker_gc g) { rheap tmp; - + /* flip two heaps */ tmp = g->current; g->current = g->next; g->next = tmp; @@ -88,6 +100,7 @@ int baker_gc_swap(baker_gc g) { return TRUE; } +/* deallocates heap spaces and frees GC memory */ int baker_gc_destroy(baker_gc g) { heap_deallocate(g->space_a); heap_deallocate(g->space_b); @@ -95,6 +108,7 @@ int baker_gc_destroy(baker_gc g) { return TRUE; } +/* sets forwarding pointer on object */ void baker_gc_set_forwarding_address(OBJECT obj, OBJECT dest) { SET_FORWARDED(obj); obj->klass = dest; @@ -118,25 +132,6 @@ OBJECT baker_gc_forwarded_object(OBJECT obj) { } \ ret; }) -/* -static inline OBJECT baker_gc_maybe_mutate(baker_gc g, OBJECT iobj) { - OBJECT ret; - - if(baker_gc_forwarded_p(iobj)) { - ret = baker_gc_forwarded_object(iobj); - } else if(baker_gc_contains_p(g, iobj)) { - ret = baker_gc_mutate_object(g, iobj); - } else { - ret = iobj; - } - - // assert(baker_gc_contains_spill_p(g, ret)); - - CHECK_PTR(ret); - - return ret; -} -*/ int _object_stores_bytes(OBJECT self); static int depth = 0; @@ -191,11 +186,12 @@ static void _mutate_references(STATE, baker_gc g, OBJECT iobj) { //printf("%d: Mutating references of %p\n", depth, iobj); if(!_object_stores_bytes(iobj)) { + /* follow object field references and mutate them */ fields = NUM_FIELDS(iobj); for(i = 0; i < fields; i++) { tmp = NTH_FIELD(iobj, i); if(!REFERENCE_P(tmp)) continue; - + /* TODO: duplicated in other places: extract to separate function? */ if(FORWARDED_P(tmp)) { mut = tmp->klass; } else if(heap_contains_p(g->current, tmp) || heap_contains_p(state->om->contexts, tmp)) { @@ -203,8 +199,9 @@ static void _mutate_references(STATE, baker_gc g, OBJECT iobj) { } else { mut = tmp; } - + /* update field reference */ SET_FIELD_DIRECT(iobj, i, mut); + /* update remember set, set "remembered" flag on iobj */ RUN_WB2(om, iobj, mut); } } else { @@ -418,6 +415,7 @@ OBJECT baker_gc_mutate_object(STATE, baker_gc g, OBJECT obj) { xassert(obj->klass != Qnil); dest = heap_copy_object(g->next, obj); g->used++; + /* when object is moved to new heap forwarded pointer is left in new generation ("from") heap */ baker_gc_set_forwarding_address(obj, dest); if(!obj->ForeverYoung) INCREMENT_AGE(dest); if(obj->obj_type == WrapsStructType) MARK_WRAPPED_STRUCT(obj); @@ -480,10 +478,11 @@ unsigned int baker_gc_collect(STATE, baker_gc g, ptr_array roots) { size_t i, sz; OBJECT tmp, root; struct method_cache *end, *ent; + /* rs for remember set */ ptr_array rs; saved_contexts = 0; - + /* increase number of GC cycles passed */ g->num_collection++; /* empty it out. */ @@ -491,8 +490,7 @@ unsigned int baker_gc_collect(STATE, baker_gc g, ptr_array roots) { ptr_array_clear(g->tenured_objects); // printf("Running garbage collector...\n"); - - + /* start tracing from root set */ sz = ptr_array_length(roots); for(i = 0; i < sz; i++) { root = (OBJECT)(ptr_array_get_index(roots, i)); diff --git a/shotgun/lib/baker.h b/shotgun/lib/baker.h index 15dfe90..44df639 100644 --- a/shotgun/lib/baker.h +++ b/shotgun/lib/baker.h @@ -3,22 +3,51 @@ #include "shotgun/lib/heap.h" +/* +* Rubinius uses Henry Baker's generational GC that divides heap into spaces: +* notably "from" space and "to" space. After tracing live objects are +* moved from "from" space to "to" +* space leaving forwarding address that points to new object location in "to" +* space at the old object location. When object moved (also referenced as +* "evacuated") it may point to objects in "from" heap space. So these +* referenced objects are copied as well and pointers are updates. +* This is called scavenging. Then "from" space can be reused and +* heap spaces flipped. +* +* Pros of this alrogithm is that it does not stop the world for too long +* and leaves much much less heap fragmentation than naive mark and sweep +* algorithm. +* +* Overview of Baker's algorithm is available online at +* http://web.media.mit.edu/~lieber/Lieberary/GC/Realtime/Realtime.html +*/ struct baker_gc_struct { + /* heap spaces copying GC operates on */ rheap space_a; rheap space_b; + /* "from" heap space */ rheap current; + /* "to" heap space */ rheap next; + /* memory usage: offset = current heap peak position - heap bottom */ size_t used; + /* how many times object should be traced before tenuring: may vary between GC instances */ int tenure_age; + /* pointer array of visited objects */ ptr_array remember_set; + /* tenured data and tenuring function */ void *tenure_data; OBJECT (*tenure)(void*, OBJECT obj); int tenure_now; + /* object memory */ void *om; + /* array of weak references being visited during GC cycle */ ptr_array seen_weak_refs; OBJECT become_from, become_to; char *last_start, *last_end; + /* number of GC cycles passed */ int num_collection; + /* objects tenured to old generation after surviving certain number of GC cycles */ ptr_array tenured_objects; }; diff --git a/shotgun/lib/cpu.c b/shotgun/lib/cpu.c index 2126070..fcc4e17 100644 --- a/shotgun/lib/cpu.c +++ b/shotgun/lib/cpu.c @@ -66,6 +66,7 @@ OBJECT cpu_scope_pop(STATE, cpu c) { return c->current_scope; } +/* initializes VM core: from global methods, main routine to current scope, thread and so forth */ void cpu_initialize_context(STATE, cpu c) { c->active_context = Qnil; c->depth = 0; diff --git a/shotgun/lib/cpu.h b/shotgun/lib/cpu.h index db833f3..ef35034 100644 --- a/shotgun/lib/cpu.h +++ b/shotgun/lib/cpu.h @@ -46,11 +46,12 @@ unsigned int sp; \ unsigned int fp; +/* just an optimisation: context with direct access to size */ struct fast_context { CPU_REGISTERS unsigned int size; }; - +/* fast context treats OBJECT as series of bytes instead of normal object */ #define FASTCTX(ctx) ((struct fast_context*)BYTES_OF(ctx)) /* 1Meg of stack */ @@ -62,6 +63,27 @@ struct fast_context { #define TASK_SET_FLAG(task, flag) (task->flags |= flag) #define TASK_CLEAR_FLAG(task, flag) (task->flags ^= flag) +/* ASK + * stack_slave + * cache_index + * exception + * stack_top is pointer to the stack top + * stack_size is obviously the size of the stack + * enclosing_class + * active_context + * home_context + * main + * paths + * depth + * current_scope + * ip_ptr + * sp_ptr + * call_flags + * debug_channel + * flags + * control_channel + * blockargs + */ #define CPU_TASK_REGISTERS long int args; \ unsigned long int stack_slave; \ long int cache_index; \ @@ -87,6 +109,7 @@ struct cpu_task_shared { struct cpu_task { CPU_TASK_REGISTERS; + /* whether the task is active */ unsigned int active; int saved_errno; }; @@ -94,12 +117,16 @@ struct cpu_task { struct rubinius_cpu { /* Normal registers are saved and restored per new method call . */ OBJECT self, sender; + /* Local variables */ OBJECT locals; IP_TYPE *data; unsigned short type; unsigned short argcount; + /* Instruction pointer */ unsigned int ip; + /* Stack pointer */ unsigned int sp; + /* Frame pointer */ unsigned int fp; // CPU_REGISTERS; diff --git a/shotgun/lib/environment.h b/shotgun/lib/environment.h index ee781d6..0c20976 100644 --- a/shotgun/lib/environment.h +++ b/shotgun/lib/environment.h @@ -5,13 +5,24 @@ #include "shotgun/lib/machine.h" struct rubinius_environment { + /* mutext which synchronizers several VMs */ pthread_mutex_t mutex; struct hashtable *machines; + /* platform configuration file path */ char *platform_config; + /* bootstrap file path */ char *bootstrap_path; + /* platform-specific code path */ char *platform_path; + /* core classes file path */ char *core_path; + /* loader file path */ char *loader_path; + + /* machines identified by id + * this id is automatically incremented + * when new machine is spawned + */ int machine_id; struct hashtable *messages; diff --git a/shotgun/lib/hash.c b/shotgun/lib/hash.c index 4e6d5c6..cca840d 100644 --- a/shotgun/lib/hash.c +++ b/shotgun/lib/hash.c @@ -191,7 +191,7 @@ OBJECT hash_add(STATE, OBJECT h, unsigned int hsh, OBJECT key, OBJECT data) { // printf("hash_add: adding %od\n",hsh); entry = hash_find_entry(state, h, hsh); - + if(RTEST(entry)) { tuple_put(state, entry, 2, data); return data; @@ -207,6 +207,7 @@ OBJECT hash_add(STATE, OBJECT h, unsigned int hsh, OBJECT key, OBJECT data) { return data; } +/* Sets key/value pair to hash */ OBJECT hash_set(STATE, OBJECT hash, OBJECT key, OBJECT val) { return hash_add(state, hash, object_hash_int(state, key), key, val); } diff --git a/shotgun/lib/heap.c b/shotgun/lib/heap.c index 2a2b45a..5768ae1 100644 --- a/shotgun/lib/heap.c +++ b/shotgun/lib/heap.c @@ -34,19 +34,21 @@ int heap_allocate_memory(rheap h) { h->last = (void*)((uintptr_t)h->address + h->size - 1); return heap_reset(h); } - +/* make current and scan position point to heap bottom */ int heap_reset(rheap h) { h->current = h->address; h->scan = h->current; return TRUE; } - +n +/* heap allocation predicate */ int heap_allocated_p(rheap h) { return h->address > 0; } #ifndef FAST_HEAP +/* whether given address is between heap bottom and heap top */ int heap_contains_p(rheap h, address addr) { if(addr < h->address) return FALSE; @@ -65,6 +67,7 @@ address heap_allocate(rheap h, int size) { return addr; } +/* whether SIZE bytes fit the heap without calling for heap enlarge */ int heap_enough_space_p(rheap h, int size) { if (size < 0) abort(); if(h->current + size > h->last + 1) return FALSE; @@ -73,6 +76,7 @@ int heap_enough_space_p(rheap h, int size) { #endif +/* whether number of fields fit the heap without calling for heap enlarge */ int heap_enough_fields_p(rheap h, int fields) { int size; @@ -85,6 +89,7 @@ int heap_enough_fields_p(rheap h, int fields) { OBJECT heap_copy_object(rheap h, OBJECT obj) { address out; int size; + /* avoid copying what's already on the heap */ if(heap_contains_p(h, obj)) return obj; size = SIZE_IN_BYTES(obj); @@ -94,7 +99,11 @@ OBJECT heap_copy_object(rheap h, OBJECT obj) { return (OBJECT)out; } -/* Functions to support the cheney scan algorithm. */ +/* + * Shotgun uses very slight variation Cheney's algorithm for young generation. + * + * See http://en.wikipedia.org/wiki/Cheney_algorithm + */ OBJECT heap_next_object(rheap h) { return (OBJECT)(h->current); @@ -104,6 +113,7 @@ int heap_fully_scanned_p(rheap h) { return h->scan == h->current; } +/* makes scan point to next object location */ OBJECT heap_next_unscanned(rheap h) { OBJECT obj; if(heap_fully_scanned_p(h)) return 0; diff --git a/shotgun/lib/heap.h b/shotgun/lib/heap.h index 08265fb..09a9fe5 100644 --- a/shotgun/lib/heap.h +++ b/shotgun/lib/heap.h @@ -5,9 +5,13 @@ typedef void* address; struct heap { size_t size; + /* lower bound */ address address; + /* current tip of the heap */ address current; + /* upper bound */ address last; + /* GC's scanner position */ address scan; }; @@ -27,6 +31,7 @@ int heap_fully_scanned_p(rheap h); OBJECT heap_next_unscanned(rheap h); int heap_enough_fields_p(rheap h, int fields); +/* controls fast heap allocation using inline functions on and off */ #define FAST_HEAP 1 #ifdef FAST_HEAP @@ -68,4 +73,3 @@ unsigned int heap_enough_space_p(rheap h, unsigned int size); #endif #endif - diff --git a/shotgun/lib/machine.c b/shotgun/lib/machine.c index 1775975..62b795d 100644 --- a/shotgun/lib/machine.c +++ b/shotgun/lib/machine.c @@ -32,32 +32,45 @@ static int _recursive_reporting = 0; +/* use this to convert symbol into Ruby string + * st is for state + */ #define SYM2STR(st, sym) string_byte_address(st, rbs_symbol_to_string(st, sym)) +/* outputs limited number of lines of VM call stack */ void machine_print_callstack_limited(machine m, int maxlev) { + /* context we are in */ OBJECT context, tmp; + /* module name, method name and file name */ const char *modname, *methname, *filename; + /* means of optimisation: fast context treats OBJECT as series of bytes instead of normal object */ struct fast_context *fc; - + /* use current machine if not explicitly given */ if(!m) m = current_machine; - + /* get current context */ context = m->c->active_context; + /* flush stack and instruction pointers */ cpu_flush_ip(m->c); cpu_flush_sp(m->c); + /* make VM instruction pointer point to context instruction pointer */ FASTCTX(context)->ip = m->c->ip; - + + /* while there's a context and trace level is not reached */ while(RTEST(context) && maxlev--) { + methctx_reference(m->s, context); fc = FASTCTX(context); - + + /* figure out module name */ if(fc->method_module && RTEST(fc->method_module)) { modname = SYM2STR(m->s, module_get_name(fc->method_module)); } else { modname = "<none>"; } - + + /* figure out method name */ if(fc->type == FASTCTX_BLOCK) { methname = "<block>"; } else if(fc->name && RTEST(fc->name)) { @@ -70,6 +83,7 @@ void machine_print_callstack_limited(machine m, int maxlev) { methname = "<none>"; } + /* figure out filename */ if(fc->method && RTEST(fc->method)) { tmp = cmethod_get_file(fc->method); if(SYMBOL_P(tmp)) { @@ -81,24 +95,31 @@ void machine_print_callstack_limited(machine m, int maxlev) { filename = "<unknown>"; } + /* execution logging */ fprintf(stderr, "%10p %s#%s+%d in %s:%d\n", (void*)context, modname, methname, fc->ip, filename, cpu_ip2line(m->s, fc->method, fc->ip) ); + /* transfer control back to message sender */ context = fc->sender; } } +/* prints the whole VM call stack */ void machine_print_callstack(machine m) { machine_print_callstack_limited(m, -1); } +/* prints of given VM */ void machine_print_stack(machine m) { unsigned int i, start, end; + /* flush stack pointer */ cpu_flush_sp(m->c); + /* get stack pointer */ i = m->c->sp; + /* ASK */ start = (i < 5 ? 0 : i - 5); end = (i + 5 > m->c->stack_size) ? m->c->stack_size : i + 5; for(i = start; i < end; i++) { @@ -112,8 +133,11 @@ void machine_print_stack(machine m) { } +/* prints VM registers content */ void machine_print_registers(machine m) { + /* flush stack pointer */ cpu_flush_sp(m->c); + /* flush instruction pointer */ cpu_flush_ip(m->c); printf("IP: %04d\nSP: %04d\n", m->c->ip, m->c->sp); if(NIL_P(m->c->exception)) { @@ -123,18 +147,23 @@ void machine_print_registers(machine m) { } } +/* handles error reporting */ void _machine_error_reporter(int sig, siginfo_t *info, void *ctx) { + /* name of signal VM recieved */ const char *signame; + /* Rubinius native interface context */ rni_context *rni_ctx; OBJECT addr; /* See if the error happened during the running of a C function. If so, we raise an exception about the error. */ + /* Grab Subtend context first */ rni_ctx = subtend_retrieve_context(); if(rni_ctx->nmc && rni_ctx->nmc->system_set) { /* TODO: generate the C backtrace as a string array and pass it via the nmc or global_context so that the exception can include it. */ + /* Set RNI faulting instruction and switch to system context */ rni_ctx->fault_address = info->si_addr; rni_ctx->nmc->jump_val = SEGFAULT_DETECTED; setcontext(&rni_ctx->nmc->system); @@ -192,21 +221,27 @@ void _machine_error_reporter(int sig, siginfo_t *info, void *ctx) { exit(-2); } +/* initialize signals handling and errors reporting */ void machine_setup_signals(machine m) { - m->error_report.sa_sigaction = _machine_error_reporter; - sigemptyset(&m->error_report.sa_mask); - m->error_report.sa_flags = SA_SIGINFO; - sigaction(SIGSEGV, &m->error_report, NULL); - sigaction(SIGBUS, &m->error_report, NULL); - sigaction(SIGABRT, &m->error_report, NULL); + m->error_report.sa_sigaction = _machine_error_reporter; + sigemptyset(&m->error_report.sa_mask); + m->error_report.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &m->error_report, NULL); + sigaction(SIGBUS, &m->error_report, NULL); + sigaction(SIGABRT, &m->error_report, NULL); } +/* initialize event base used by libevent */ static void machine_setup_events(machine m) { /* libev will not "autodetect" kqueue because it is broken on darwin */ m->s->event_base = ev_loop_new(EVFLAG_FORKCHECK); m->s->thread_infos = NULL; } +/* Creates and initializes Rubinius VM. + * Sets up VM CPU, subtend, context and state, universe and everything. + * + */ machine machine_new(environment e) { machine m; int pipes[2]; @@ -220,21 +255,24 @@ machine machine_new(environment e) { /* Setup pipes used for message notification. */ m->message_read_fd = pipes[0]; m->message_write_fd = pipes[1]; - + /* initialize VM state */ m->s = rubinius_state_new(); + /* allocate memory for VM cpu and initialize it's paths list */ m->c = cpu_new(m->s); /* Initialize the instruction addresses. */ cpu_run(m->s, m->c, TRUE); m->c->ip_ptr = &m->s->external_ip; + /* setup signals and events handling */ machine_setup_signals(m); machine_setup_events(m); + cpu_initialize(m->s, m->c); cpu_bootstrap(m->s); subtend_setup(m->s); cpu_setup_top_scope(m->s, m->c); cpu_initialize_context(m->s, m->c); - + /* make MAIN Ruby contant point to main routine of the application*/ machine_set_const(m, "MAIN", m->c->main); cpu_task_configure_preemption(m->s); environment_add_machine(e, m); @@ -244,20 +282,27 @@ machine machine_new(environment e) { return m; } +/* destroys VM cpu and state objects, frees memory */ void machine_destroy(machine m) { cpu_destroy(m->c); state_destroy(m->s); free(m); } +/* handles errors gracefully */ void machine_handle_fire(int kind) { + /* store type of violation */ current_machine->g_access_violation = kind; + /* then switch to special context to handle it gracefully */ setcontext(¤t_machine->g_firesuit); } +/* handles type errors in the VM */ void machine_handle_type_error(OBJECT obj, const char *message) { + /* store error message */ current_machine->g_firesuit_message = strdup(message); - + + /* make errors handle to know error type */ if(FIXNUM_P(obj)) { current_machine->g_firesuit_arg = FixnumType; } else if(SYMBOL_P(obj)) { @@ -269,21 +314,25 @@ void machine_handle_type_error(OBJECT obj, const char *message) { } else { current_machine->g_firesuit_arg = 0; } - + /* now handle it */ machine_handle_fire(FIRE_TYPE); } +/* handles failed assertions in the VM code */ void machine_handle_assert(const char *reason, const char *file, int line) { fprintf(stderr, "VM Assertion: %s (%s:%d)\n", reason, file, line); + /* print the whole backtrace */ printf("\nRuby backtrace:\n"); machine_print_callstack(current_machine); + /* if firesuit is off exit otherwise store error type and switch to special handling context */ if(!current_machine->g_use_firesuit) abort(); current_machine->g_access_violation = FIRE_ASSERT; setcontext(¤t_machine->g_firesuit); } +/* returns unmarshalled file */ OBJECT machine_load_file(machine m, const char *path) { return cpu_unmarshal_file(m->s, path, 0); } @@ -306,10 +355,12 @@ void machine_show_exception(machine m, OBJECT exc) { puts(""); } +/* initializes VM globals, clear instruction pointer, op, firesuit and so forth */ int machine_run(machine m) { + /* here's where the actual initialization starts */ cpu_run(m->s, m->c, 0); m->c->ip_ptr = &m->s->external_ip; - + /* report if there's an exception */ if(RTEST(m->c->exception)) { printf("Toplevel exception detected.\n"); machine_show_exception(m, m->c->exception); @@ -318,6 +369,7 @@ int machine_run(machine m) { return TRUE; } +/* loads and executes a script */ int machine_run_file(machine m, const char *path) { OBJECT meth; int out; @@ -325,13 +377,14 @@ int machine_run_file(machine m, const char *path) { if(m->s->excessive_tracing) { printf("[ Loading file %s]\n", path); } - + meth = machine_load_file(m, path); if(!RTEST(meth)) { printf("Unable to load '%s'.\n", path); return FALSE; } - + + /* re-init cpu stack */ m->c->depth = 0; cpu_stack_push(m->s, m->c, meth, FALSE); cpu_run_script(m->s, m->c, meth); @@ -342,16 +395,19 @@ int machine_run_file(machine m, const char *path) { return out; } +/* sets contstand under given module or class */ void machine_set_const_under(machine m, const char *str, OBJECT val, OBJECT under) { OBJECT tbl; tbl = module_get_constants(under); lookuptable_store(m->s, tbl, string_new(m->s, str), val); } +/* Sets constant under Object class */ void machine_set_const(machine m, const char *str, OBJECT val) { machine_set_const_under(m, str, val, m->s->global->object); } +/* stores Ruby VM launch arguments */ void machine_save_args(machine m, int argc, char **argv) { char **na; na = calloc(argc, sizeof(char*)); @@ -363,6 +419,7 @@ void machine_save_args(machine m, int argc, char **argv) { machine_setup_argv(m, argc, argv); } +/* Sets standard IO streams (stdin, stdout, stderr) to Ruby constants */ void machine_setup_standard_io(machine m) { machine_set_const(m, "STDIN", io_new(m->s, 0, "r")); machine_set_const(m, "STDOUT", io_new(m->s, 1, "w")); @@ -391,6 +448,7 @@ int *machine_setup_piped_io(machine m) { return pipes; } +/* sets up RUBY_BIN_PATH Ruby constant and VM interpreter name */ void machine_setup_ruby(machine m, char *name) { char buf[MAXPATHLEN]; char wd[MAXPATHLEN]; @@ -407,6 +465,7 @@ void machine_setup_ruby(machine m, char *name) { m->interpreter = strdup(name); } +/* sets ARGV and ARG0 Ruby constants */ void machine_setup_argv(machine m, int argc, char **argv) { OBJECT ary; int i; @@ -421,6 +480,7 @@ void machine_setup_argv(machine m, int argc, char **argv) { machine_set_const(m, "ARGV", ary); } +/* utility: checks whether string contains only digits */ int is_number(char *str) { while(*str) { if(!isdigit(*str)) return FALSE; @@ -430,6 +490,7 @@ int is_number(char *str) { return TRUE; } +/* utility: strips trailing non-alnum chars from string */ static char *trim_str(char *str) { int i; while(*str && !isalnum(*str)) str++; @@ -508,20 +569,27 @@ void machine_parse_config_file(machine m, const char *path) { } void machine_migrate_config(machine m) { + /* hash table iterator */ struct hashtable_itr iter; + /* VM state */ rstate state = m->s; - + + /* initialize new hash for environment global configuration */ m->s->global->config = hash_new_sized(m->s, 500); - + + /* unless config is empty */ if(hashtable_count(m->s->config) > 0) { - + /* Iterate through it */ hashtable_iterator_init(&iter, m->s->config); do { + /* Key, value */ OBJECT ok, ov; bstring k = (bstring)hashtable_iterator_key(&iter); bstring v = (bstring)hashtable_iterator_value(&iter); + /* object key: Ruby string created from bstring library C string */ ok = string_newfrombstr(m->s, k); + /* ASK */ if(is_number(bdata(v))) { ov = LL2N(strtoll(bdatae(v,""), NULL, 10)); } else { @@ -532,11 +600,12 @@ void machine_migrate_config(machine m) { } while (hashtable_iterator_advance(&iter)); } - + /* Make RUBY_CONFIG point to global configuration */ machine_set_const(m, "RUBY_CONFIG", m->s->global->config); machine_setup_from_config(m); } +/* applies debug configuraiton options to VM state */ void machine_setup_from_config(machine m) { bstring s; @@ -555,6 +624,7 @@ void machine_setup_from_config(machine m) { bdestroy (s); } +/* initializes platform and arc related Ruby constants like RUBY_PLATFORM, OS and L64 */ void machine_setup_config(machine m) { OBJECT mod; STATE; @@ -698,6 +768,7 @@ void machine_setup_config(machine m) { machine_set_const_under(m, "MESSAGE_IO", io_new(m->s, m->message_read_fd, "r"), mod); } +/* sets up debugging flags */ void machine_config_env(machine m) { char *config; if(getenv("RDEBUG")) { @@ -719,6 +790,7 @@ void machine_config_env(machine m) { } } +/* loads files in the directory, respects load order hint file .load_order.txt */ int machine_load_directory(machine m, const char *prefix) { char *path; char *file; @@ -792,6 +864,12 @@ int machine_load_object(machine m, char *name, uint8_t *data, long length) { return TRUE; } +/* + * loads and executes Rubinius bytecode archive (*.rba, similar to *.jar or *.elc) + * + * Rubinius' rba files are essentialy zip archived bytecode. .load_order file + * must be loaded first to respect bytecode dependencies. + */ int machine_load_ar(machine m, const char *path) { int ret = FALSE; @@ -815,6 +893,7 @@ int machine_load_rba(machine m, const char *path) { int machine_load_bundle(machine m, const char *path) { struct stat sb; + /* return false if file does no exist */ if(stat(path, &sb) != 0) return FALSE; if(S_ISDIR(sb.st_mode)) { @@ -824,6 +903,11 @@ int machine_load_bundle(machine m, const char *path) { return machine_load_rba(m, path); } +/* 1. saves the command line arguments used to invoke the VM. + * 2. initializes platform and arc related Ruby constants + * 3. sets up debugging flags from configuration + * 4. sets standard IO streams (stdin, stdout, stderr) to Ruby constants STDIN, STDOUT and STDERR + */ void machine_setup_normal(machine m, int argc, char **argv) { machine_save_args(m, argc, argv); machine_setup_config(m); @@ -831,6 +915,13 @@ void machine_setup_normal(machine m, int argc, char **argv) { machine_setup_standard_io(m); } +/* + * 1. sets inferior machine flag to true (inferior machine is one spawned by another, a "child") + * 2. saves the command line arguments used to invoke the VM. + * 3. initializes platform and arc related Ruby constants + * 4. sets up debugging flags from configuration + * 5. sets up piped IO streams (stdin, stdout, stderr) to Ruby constants STDIN, STDOUT and STDERR + */ int *machine_setup_thread(machine m, int argc, char **argv) { m->sub = TRUE; machine_save_args(m, argc, argv); @@ -838,4 +929,3 @@ int *machine_setup_thread(machine m, int argc, char **argv) { machine_config_env(m); return machine_setup_piped_io(m); } - diff --git a/shotgun/lib/machine.h b/shotgun/lib/machine.h index c5e82b4..8e5fdce 100644 --- a/shotgun/lib/machine.h +++ b/shotgun/lib/machine.h @@ -10,24 +10,42 @@ typedef struct rubinius_machine *machine; #include "shotgun/lib/environment.h" struct rubinius_machine { + /* every machine has an identifier which is incremented when new machine is added to environment */ int id; + /* machine may have a parent: parent VM is VM spawned this VM */ int parent_id; + /* thread this machine operates in */ pthread_t pthread; + /* whether it is an inferior VM: i.e. spawn by another VM. In Ruby this is reflected by VM_INFERIOR constant value */ int sub; + /* descriptor of incoming messages stream: reflected by MESSAGE_IO constant in Ruby */ int message_read_fd; + /* descriptor of outcoming messages stream */ int message_write_fd; + /* various VM state information: from object memory to stack frames state + * this structure is organized way to carry this global VM stuff around + */ rstate s; + /* CPU abstraction Rubinius uses to run it's VM */ cpu c; + /* SIGSEGV/SIGBUS/SIGABRT error information + */ struct sigaction error_report; + /* Ruby VM name */ char *interpreter; + /* that's argc VM was invoked with */ int argc; + /* that's char **argv VM was invoked with */ char **argv; + /* whether or not output extra debug information */ int show_config; - ucontext_t g_firesuit; /* work around a bug in 10.5's libc versus header files */ #if defined(__APPLE__) && defined(HAS_UCONTEXT) /* patch for tiger */ _STRUCT_MCONTEXT __system_mc; #endif + +/* these members relate to segfaults handling so they get reported correctly and VM exit gracefully(ish) */ + ucontext_t g_firesuit; int g_use_firesuit; int g_access_violation; int g_firesuit_arg; diff --git a/shotgun/lib/object.h b/shotgun/lib/object.h index 5af6e35..5b5bca9 100644 --- a/shotgun/lib/object.h +++ b/shotgun/lib/object.h @@ -52,7 +52,7 @@ static inline void object_copy_body(STATE, OBJECT self, OBJECT dest) { memcpy(object_byte_start(state, dest), object_byte_start(state, self), s1); } - +/* Ruby's is_a? */ #define ISA(o, c) object_kind_of_p(state, o, c) static inline uintptr_t object_get_id(STATE, OBJECT self) { diff --git a/shotgun/lib/object_memory.h b/shotgun/lib/object_memory.h index b9c7746..fc5afc7 100644 --- a/shotgun/lib/object_memory.h +++ b/shotgun/lib/object_memory.h @@ -15,17 +15,26 @@ #define OMCollectYoung 0x1 #define OMCollectMature 0x2 +/* set of flags */ struct object_memory_struct { + /* */ int collect_now; + /* */ int enlarge_now; + /* */ int tenure_now; + /* */ int new_size; + /* */ int last_object_id; + /* Rubinius uses Baker's generational GC as well asm marking and sweeping */ baker_gc gc; mark_sweep_gc ms; + /* */ int last_tenured; + /* */ int bootstrap_loaded; - + /* */ rheap contexts; /* The first not referenced stack context */ OBJECT context_bottom; diff --git a/shotgun/lib/oop.h b/shotgun/lib/oop.h index 5631dc0..18e6a99 100644 --- a/shotgun/lib/oop.h +++ b/shotgun/lib/oop.h @@ -20,11 +20,15 @@ typedef intptr_t native_int; #define TAG_REF 0x0 #define TAG_FIXNUM 0x1 +/* literals are numbers, symbols, ranges, regexp */ #define TAG_LITERAL 0x2 + #define TAG_DATA 0x3 #define TAG(v) (((intptr_t)v) & TAG_MASK) +/* applies bit mask of given tag by setting 2 least significant bits */ #define APPLY_TAG(v, tag) ((OBJECT)(((intptr_t)v << TAG_SHIFT) | tag)) +/* removes tag bit mask from value by unshifting 2 least significant bits */ #define STRIP_TAG(v) (((intptr_t)v) >> TAG_SHIFT) #define DATA_P(v) (TAG(v) == TAG_DATA) @@ -213,19 +217,26 @@ A rubinius object can be followed by: /* Object access, lowest level. These read and set fields of an OBJECT * directly. They're built on to integrate with the GC properly. */ - #define CLASS_OBJECT(obj) (obj->klass) +/* returns size of OBJECT */ #define SIZE_OF_OBJECT ((unsigned int)(sizeof(OBJECT))) - +/* returns number of fields obj has */ #define NUM_FIELDS(obj) (obj->field_count) +/* sets number of fields to obj */ #define SET_NUM_FIELDS(obj, fel) (obj->field_count = fel) +/* size of rubinius_object_t plus size of fields (all in bytes) */ #define SIZE_IN_BYTES_FIELDS(fel) ((unsigned int)(sizeof(struct rubinius_object_t) + \ fel*SIZE_OF_OBJECT)) +/* size of rubinius_object_t and fields in words */ #define SIZE_IN_WORDS_FIELDS(fel) (sizeof(struct rubinius_object_t)/SIZE_OF_OBJECT + fel) +/* size of object in bytes */ #define SIZE_IN_BYTES(obj) SIZE_IN_BYTES_FIELDS(obj->field_count) +/* size of fields only */ #define SIZE_OF_BODY(obj) (obj->field_count * SIZE_OF_OBJECT) #define ADDRESS_OF_FIELD(obj, fel) (&obj->field[fel]) +/* obtain object field directly by position */ #define NTH_FIELD_DIRECT(obj, fel) (obj->field[fel]) +/* set object field directly by position */ #define SET_FIELD_DIRECT(obj, fel, val) (obj->field[fel] = val) #define BYTES_OF(obj) ((char*)obj->field) @@ -259,18 +270,24 @@ to be a simple test for that bit pattern. #define Qtrue ((OBJECT)10L) #define Qundef ((OBJECT)18L) +/* true if v is Ruby false */ #define FALSE_P(v) ((OBJECT)(v) == (OBJECT)Qfalse) +/* true if v is Ruby true */ #define TRUE_P(v) ((OBJECT)(v) == (OBJECT)Qtrue) +/* true if v is Ruby nil */ #define NIL_P(v) ((OBJECT)(v) == (OBJECT)Qnil) +/* true if value of v is undefined */ #define UNDEF_P(v) ((OBJECT)(v) == (OBJECT)Qundef) +/* true if v is not nil */ #define RTEST(v) (((uintptr_t)(v) & 0x7) != 0x6) - +/* true if v is a reference */ #define REFERENCE_P(v) (TAG(v) == TAG_REF) - +/* semantics is same as REFERENCE_P but checks v value first */ #define REFERENCE2_P(v) (v && REFERENCE_P(v)) #define INDEXED(obj) (REFERENCE_P(obj) && !obj->StoresBytes) +/* copy flags not used by garbage collector */ static inline void object_copy_nongc_flags(OBJECT target, OBJECT source) { target->obj_type = source->obj_type; @@ -281,18 +298,25 @@ static inline void object_copy_nongc_flags(OBJECT target, OBJECT source) target->IsMeta = source->IsMeta; } +/* sets binary flags field to 0 */ #define CLEAR_FLAGS(obj) (obj)->all_flags = 0 +/* stack context has GC zone unspecified */ #define stack_context_p(obj) ((obj)->gc_zone == UnspecifiedZone) +/* setter and predicate for forwarded pointer flag, see baker.h and baker.c for more details */ #define SET_FORWARDED(obj) (obj)->Forwarded = TRUE +/* use this to check forwarded pointer on object: + * when object is copied from space to space by GC, + * forwarding pointer is left in old location + */ #define FORWARDED_P(obj) ((obj)->Forwarded) - +/* objects are getting old after surviving GC tracing */ #define AGE(obj) (obj->copy_count) #define CLEAR_AGE(obj) (obj->copy_count = 0) #define INCREMENT_AGE(obj) (obj->copy_count++) /* Object access. */ -/* Setting the class of an object */ +/* setting the class of an object */ #define rbs_set_class(om, obj, cls) ({ \ OBJECT _o = (obj), _c = (cls); \ RUN_WB2(om, _o, _c); _o->klass = _c; }) @@ -331,10 +355,12 @@ static inline void object_copy_nongc_flags(OBJECT target, OBJECT source) RUN_WB2(om, _o, _v); \ SET_FIELD_DIRECT(_o, fel, _v); }) +/* alias macro for obtaining object field directly by position */ #define rbs_get_field(obj, fel) NTH_FIELD_DIRECT(obj, fel) #else /*DISABLE_CHECKS*/ +/* compared to _bad_reference prints more verbose error message */ static void _bad_reference2(OBJECT in, int fel) { printf("Attempted to access field %d in an object with %lu fields.\n", fel, (unsigned long)NUM_FIELDS(in)); @@ -346,8 +372,11 @@ static void _bad_reference2(OBJECT in, int fel) { #if EXTRA_PROTECTION +/* versions that check for reference */ + static void _bad_reference(OBJECT in) { printf("Attempted to access field of non-reference.\n"); + /* handle segfault */ if(current_machine->g_use_firesuit) { machine_handle_fire(FIRE_NULL); } @@ -369,7 +398,7 @@ static void _bad_reference(OBJECT in) { #else /*EXTRA_PROTECTION*/ -/* These are the typically used versions. The don't check for ref, they +/* These are the typically used versions. They don't check for ref, they the segfault handler do that. */ #define rbs_set_field(om, obj, fel, val) ({ \ @@ -400,6 +429,7 @@ static void _bad_reference(OBJECT in) { /* Type tests. */ #define RTYPE(obj,type) (REFERENCE_P(obj) && obj->obj_type == type) +/* shortcut, doing what Ruby's is_a? does */ #define RISA(obj,cls) (REFERENCE_P(obj) && ISA(obj,BASIC_CLASS(cls))) #define BIGNUM_P(obj) (RTYPE(obj, BignumType)) diff --git a/shotgun/lib/shotgun.h b/shotgun/lib/shotgun.h index 0f9fb5e..15a2c9b 100644 --- a/shotgun/lib/shotgun.h +++ b/shotgun/lib/shotgun.h @@ -2,7 +2,7 @@ #define RBS_SHOTGUN_H #define INTERNAL_DEBUG 0 - +/* whether tracking various VM stats */ #define TRACK_STATS 0 #define DISABLE_CHECKS 1 // #define TIME_LOOKUP 1 diff --git a/shotgun/lib/state.h b/shotgun/lib/state.h index f070a4e..958a4b2 100644 --- a/shotgun/lib/state.h +++ b/shotgun/lib/state.h @@ -137,6 +137,7 @@ struct rubinius_state { rni_handle_table *handle_tbl; + /* pointer to bottom of the stack */ unsigned long *stack_bottom; struct hashtable *cleanup; @@ -146,9 +147,10 @@ struct rubinius_state { void *thread_infos; unsigned int event_id; + /* Stuff sampling profiler uses, not critical for VM operations */ OBJECT *samples; int max_samples, cur_sample; - + /* again, profiler stats */ int excessive_tracing, gc_stats; int check_events, pending_threads, pending_events; diff --git a/shotgun/lib/string.c b/shotgun/lib/string.c index b50c926..ba8d07b 100644 --- a/shotgun/lib/string.c +++ b/shotgun/lib/string.c @@ -109,6 +109,7 @@ OBJECT string_append(STATE, OBJECT self, OBJECT other) { return self; } +/* returns pointer to bytearray string based on */ char *string_byte_address(STATE, OBJECT self) { OBJECT data; diff --git a/shotgun/lib/subtend/nmc.h b/shotgun/lib/subtend/nmc.h index 8e88102..aaa02b0 100644 --- a/shotgun/lib/subtend/nmc.h +++ b/shotgun/lib/subtend/nmc.h @@ -4,7 +4,7 @@ #include "shotgun/lib/cpu.h" #include "shotgun/lib/subtend/nmethod.h" - +/* Rubinius native interface: native method context */ struct rni_nmc { int num_handles; int used; diff --git a/shotgun/main.c b/shotgun/main.c index 5fd16ed..da075c0 100644 --- a/shotgun/main.c +++ b/shotgun/main.c @@ -56,7 +56,7 @@ int main(int argc, char **argv) { /* We build up the environment information by looking around * the filesystem now. */ - /* Find the platform config */ + /* Find the platform configuration */ e->platform_config = search_for("RBX_PLATFORM_CONF", "platform.conf"); /* Find the bootstrap. */ @@ -65,6 +65,7 @@ int main(int argc, char **argv) { printf("Unable to find a bootstrap to load!\n"); return 1; } + /* Store boostrap path in the environment */ e->bootstrap_path = archive; /* Find the platform. */ @@ -73,7 +74,7 @@ int main(int argc, char **argv) { printf("Unable to find a platform to load!\n"); return 1; } - + /* Store platform path in the environment */ e->platform_path = archive; /* Find the core. */ @@ -82,7 +83,7 @@ int main(int argc, char **argv) { printf("Unable to find a core to load!\n"); return 1; } - + /* Store core path in the environment */ e->core_path = archive; /* Load the loader.rbc */ @@ -91,9 +92,16 @@ int main(int argc, char **argv) { printf("Unable to find loader.rbc to load!\n"); return 1; } - + /* Store loader path in the environment */ e->loader_path = archive; + /* Loads the environment. + * + * What it does is parsing of configuration file + * and bundles + * + * + */ environment_load_machine(e, m); /* Done! */
From the Design Piracy series on my blog: