Thank you to anyone who has already donated - your generous donations helped make three months of treatment possible.

My brother Nate continues to fight stage IV Hodgkin's lymphoma. He's just 31, with a wife and baby girl. They have no active income (since he's been unable to return to work), no insurance, and cannot afford the treatment he needs. Nate and his family need your help. Please consider a donation, every dollar helps. Thanks.


(gdb) info threads
  19 Thread 16091  0x00007fbdc2350c5d in nanosleep () at ../sysdeps/unix/syscall-template.S:82
  18 Thread 16120  pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
  17 Thread 16119  pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
  16 Thread 16118  pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
  15 Thread 16117  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
  14 Thread 16116  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
  13 Thread 16115  0x00007fbdc2374c13 in *__GI___poll (fds=<value optimized out>, nfds=<value optimized out>, timeout=120000) at ../sysdeps/unix/sysv/linux/poll.c:87
  12 Thread 16114  pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
  11 Thread 16113  pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
  10 Thread 16112  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
  9 Thread 16111  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
  8 Thread 16110  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
  7 Thread 16109  pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
  6 Thread 16108  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
  5 Thread 16107  pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
  4 Thread 16104  0x00007fbdc2350c5d in nanosleep () at ../sysdeps/unix/syscall-template.S:82
  3 Thread 16103  0x00007fbdc2350c5d in nanosleep () at ../sysdeps/unix/syscall-template.S:82
  2 Thread 16102  0x00007fbdc282838d in accept () at ../sysdeps/unix/syscall-template.S:82
* 1 Thread 16099  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136

Thread 15 and 8 are in a deadlock. This occurs when a array job is in
the process of being generated and a second job is submitted with a
dependency on the first job is submitted.

(gdb) thread 15
[Switching to thread 15 (Thread 16117)]#0  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
136	in ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb) bt
#0  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
#1  0x00007fbdc2823179 in _L_lock_953 () from /lib/libpthread.so.0
#2  0x00007fbdc2822f9b in __pthread_mutex_lock (mutex=0x70cd800) at pthread_mutex_lock.c:61
#3  0x000000000040a6b4 in is_array (id=Unhandled dwarf expression opcode 0xf3
) at array_func.c:137
#4  0x0000000000425e3c in dispatch_request (sfds=65535, request=0x7fbd94048ff0) at process_request.c:927
#5  0x000000000040eaac in issue_to_svr (servern=Unhandled dwarf expression opcode 0xf3
) at issue_request.c:324
#6  0x000000000043a713 in send_depend_req (pjob=0x7c876d0, pparent=0x7fbd94043160, type=12, op=1, schedhint=0, postfunc=0x439d40 <post_doq>) at req_register.c:2262
#7  0x000000000043bbb9 in depend_on_que (pattr=Unhandled dwarf expression opcode 0xf3
) at req_register.c:1237
#8  0x000000000044a5c5 in svr_enquejob (pjob=0x7c876d0, has_sv_qs_mutex=0, prev_job_index=65) at svr_jobfunc.c:575
#9  0x00000000004121fc in job_clone_wt (cloned_id=Unhandled dwarf expression opcode 0xf3
) at job_func.c:1153
#10 0x000000000045acd2 in work_thread (a=0x7fff7fa38120) at u_threadpool.c:307
#11 0x00007fbdc28208ca in start_thread (arg=<value optimized out>) at pthread_create.c:300
#12 0x00007fbdc237fb6d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#13 0x0000000000000000 in ?? ()
(gdb) print *(pthread_mutex_t*)0x70cd800
$2 = {__data = {__lock = 2, __count = 0, __owner = 16110, __nusers = 1, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 
  __size = "\002\000\000\000\000\000\000\000\356>\000\000\001", '\000' <repeats 26 times>, __align = 2}
Lock is hold by thread 8

(gdb) thread 8
[Switching to thread 8 (Thread 16110)]#0  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
136	in ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb) bt
#0  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
#1  0x00007fbdc2823179 in _L_lock_953 () from /lib/libpthread.so.0
#2  0x00007fbdc2822f9b in __pthread_mutex_lock (mutex=0x7fbdb02858b0) at pthread_mutex_lock.c:61
#3  0x000000000044a61d in lock_ai_mutex (pa=0x7fbdb02103f0, id=Unhandled dwarf expression opcode 0xf3
) at svr_jobfunc.c:2940
#4  0x000000000040a77c in get_array (id=Unhandled dwarf expression opcode 0xf3
) at array_func.c:164
#5  0x000000000043a342 in req_registerarray (preq=0x7fbdb82e0170) at req_register.c:718
#6  0x0000000000425e4c in dispatch_request (sfds=65535, request=0x7fbdb82e0170) at process_request.c:929
#7  0x000000000040eaac in issue_to_svr (servern=Unhandled dwarf expression opcode 0xf3
) at issue_request.c:324
#8  0x000000000043a713 in send_depend_req (pjob=0x7fbdb82e32e0, pparent=0x7fbdb80a5ed0, type=12, op=1, schedhint=0, postfunc=0x439d40 <post_doq>) at req_register.c:2262
#9  0x000000000043bbb9 in depend_on_que (pattr=Unhandled dwarf expression opcode 0xf3
) at req_register.c:1237
#10 0x000000000044a5c5 in svr_enquejob (pjob=0x7fbdb82e32e0, has_sv_qs_mutex=0, prev_job_index=-1) at svr_jobfunc.c:575
#11 0x0000000000438cf4 in req_commit (preq=0x7fbdb81bfb80) at req_quejob.c:2141
#12 0x0000000000426119 in dispatch_request (sfds=11, request=0x7fbdb81bfb80) at process_request.c:741
#13 0x000000000042687e in process_request (chan=Unhandled dwarf expression opcode 0xf3
) at process_request.c:662
#14 0x0000000000422da8 in process_pbs_server_port (sock=11, is_scheduler_port=0) at pbsd_main.c:410
#15 0x0000000000422e9a in start_process_pbs_server_port (new_sock=Unhandled dwarf expression opcode 0xf3
) at pbsd_main.c:541
#16 0x000000000045acd2 in work_thread (a=0x7fff7fa38120) at u_threadpool.c:307
#17 0x00007fbdc28208ca in start_thread (arg=<value optimized out>) at pthread_create.c:300
#18 0x00007fbdc237fb6d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#19 0x0000000000000000 in ?? ()
(gdb)  print *(pthread_mutex_t*)0x7fbdb02858b0
$3 = {__data = {__lock = 2, __count = 0, __owner = 16117, __nusers = 1, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 
  __size = "\002\000\000\000\000\000\000\000\365>\000\000\001", '\000' <repeats 26 times>, __align = 2}
Lock is hold by thread 15

Thread 15 holds the ai lock. I do not know where the lock is aquired,
but it is release in job_clone_wt shortly after all jobs are enqueued.

(gdb) thread 8
[Switching to thread 8 (Thread 16110)]#0  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
136	in ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb) frame 4
#4  0x000000000040a77c in get_array (id=Unhandled dwarf expression opcode 0xf3
) at array_func.c:164
164	    lock_ai_mutex(pa, __func__, NULL, LOGLEVEL);
Current language:  auto
The current source language is "auto; currently c".
(gdb) print pa
$4 = (job_array *) 0x7fbdb02103f0

(gdb) thread 15
[Switching to thread 15 (Thread 16117)]#0  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
136	../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S: No such file or directory.
	in ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
Current language:  auto
The current source language is "auto; currently asm".
(gdb) frame 9
#9  0x00000000004121fc in job_clone_wt (cloned_id=Unhandled dwarf expression opcode 0xf3
) at job_func.c:1153
1153	      if ((rc = svr_enquejob(pjobclone, FALSE, prev_index)))
Current language:  auto
The current source language is "auto; currently c".
(gdb) print pa
$5 = (job_array *) 0x7fbdb02103f0