Discussion:
[ast-developers] Crash in signal code (|set_trapinfo()|) when processing signal storm...
Roland Mainz
2013-07-20 03:52:38 UTC
Permalink
Hi!

-----

The following testcase...
-- snip --
set -o nounset

integer i

compound c=(
compound child=(
float sum=0.0
)
)
integer -r pid=$$
integer -r numprocs=128

trap '(( c.child.sum-=1.7 ))' RTMIN
trap '(( c.child.sum-=3.3 ))' RTMAX
trap '(( c.child.sum+=.sh.sig.status ))' CHLD

for (( i=0 ; i < numprocs ; i++ )) ; do
{
sleep $((numprocs / 64.))
kill -q0 -s RTMAX ${pid}
kill -q0 -s RTMIN ${pid}
exit 5
} &
done

float start=$SECONDS
while ! wait ; do
/usr/bin/true

if (( (SECONDS-start) > 20 )) ; then
print '# Aborting wait loop...'
break
fi
done

if (( c.child.sum == 0.0 )) ; then
printf '# success.\n'
exit 0
else
printf 'sum for all signals=%f (should be "0")\n' \
c.child.sum
exit 1
fi

# notreached
-- snip --

... crashes ast-ksh.2013-06-28 on SuSE 12.3/AMD64/64bit like this:
-- snip --
[New LWP 9177]
Core was generated by
`../work/ast_ksh_20130628/build_i386_64bit_debug_patched/arch/linux.i386-64/bin/'.
Program terminated with signal 11, Segmentation fault.
#0 0x00000000004140f4 in set_trapinfo (shp=0x803200 <sh>, sig=64,
info=0x7fff6490c770)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/fault.c:102
102 jp->last->next = ip;
(gdb) where
#0 0x00000000004140f4 in set_trapinfo (shp=0x803200 <sh>, sig=64,
info=0x7fff6490c770)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/fault.c:102
#1 0x00000000004145b2 in sh_fault (sig=64, info=0x7fff6490c770,
context=0x7fff6490c640)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/fault.c:231
#2 <signal handler called>
#3 sh_fault (sig=0, info=0x0, context=0x0) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/fault.c:117
#4 <signal handler called>
#5 0x00007f8abf84d19a in __libc_waitpid (pid=-1,
stat_loc=0x7fff6490ccec, options=10) at
../sysdeps/unix/sysv/linux/waitpid.c:31
#6 0x00000000004295ff in job_reap (sig=0) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/jobs.c:464
#7 0x000000000042c48d in job_wait (pid=1) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/jobs.c:1700
#8 0x000000000042a634 in job_bwait (jobs=0x7f8ac0389318) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/jobs.c:921
#9 0x0000000000484dab in b_wait (n=1, argv=0x7f8ac0389318,
context=0x803758 <sh+1368>)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/bltins/misc.c:386
#10 0x0000000000468d7f in sh_exec (shp=0x803200 <sh>,
t=0x7f8ac03892b0, flags=512)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/xec.c:1357
#11 0x000000000046d5f0 in sh_exec (shp=0x803200 <sh>,
t=0x7f8ac0389280, flags=512)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/xec.c:2626
#12 0x000000000046cce1 in sh_exec (shp=0x803200 <sh>,
t=0x7f8ac0389240, flags=4) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/xec.c:2464
#13 0x000000000040f4e2 in exfile (shp=0x803200 <sh>,
iop=0x7f8ac034c4d0, fno=11) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/main.c:603
#14 0x000000000040e72d in sh_main (ac=2, av=0x7fff6490e258,
userinit=0x0) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/main.c:375
#15 0x000000000040d901 in main (argc=2, argv=0x7fff6490e258) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/pmain.c:45
-- snip --

----

Bye,
Roland
--
__ . . __
(o.\ \/ /.o) roland.mainz at nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)
Roland Mainz
2013-07-20 04:11:56 UTC
Permalink
Post by Roland Mainz
The following testcase...
-- snip --
set -o nounset
integer i
compound c=(
compound child=(
float sum=0.0
)
)
integer -r pid=$$
integer -r numprocs=128
trap '(( c.child.sum-=1.7 ))' RTMIN
trap '(( c.child.sum-=3.3 ))' RTMAX
trap '(( c.child.sum+=.sh.sig.status ))' CHLD
for (( i=0 ; i < numprocs ; i++ )) ; do
{
sleep $((numprocs / 64.))
kill -q0 -s RTMAX ${pid}
kill -q0 -s RTMIN ${pid}
exit 5
} &
done
float start=$SECONDS
while ! wait ; do
/usr/bin/true
if (( (SECONDS-start) > 20 )) ; then
print '# Aborting wait loop...'
break
fi
done
if (( c.child.sum == 0.0 )) ; then
printf '# success.\n'
exit 0
else
printf 'sum for all signals=%f (should be "0")\n' \
c.child.sum
exit 1
fi
# notreached
-- snip --
-- snip --
[New LWP 9177]
Core was generated by
`../work/ast_ksh_20130628/build_i386_64bit_debug_patched/arch/linux.i386-64/bin/'.
Program terminated with signal 11, Segmentation fault.
#0 0x00000000004140f4 in set_trapinfo (shp=0x803200 <sh>, sig=64,
info=0x7fff6490c770)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/fault.c:102
102 jp->last->next = ip;
(gdb) where
#0 0x00000000004140f4 in set_trapinfo (shp=0x803200 <sh>, sig=64,
info=0x7fff6490c770)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/fault.c:102
#1 0x00000000004145b2 in sh_fault (sig=64, info=0x7fff6490c770,
context=0x7fff6490c640)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/fault.c:231
#2 <signal handler called>
#3 sh_fault (sig=0, info=0x0, context=0x0) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/fault.c:117
#4 <signal handler called>
#5 0x00007f8abf84d19a in __libc_waitpid (pid=-1,
stat_loc=0x7fff6490ccec, options=10) at
../sysdeps/unix/sysv/linux/waitpid.c:31
#6 0x00000000004295ff in job_reap (sig=0) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/jobs.c:464
#7 0x000000000042c48d in job_wait (pid=1) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/jobs.c:1700
#8 0x000000000042a634 in job_bwait (jobs=0x7f8ac0389318) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/jobs.c:921
#9 0x0000000000484dab in b_wait (n=1, argv=0x7f8ac0389318,
context=0x803758 <sh+1368>)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/bltins/misc.c:386
#10 0x0000000000468d7f in sh_exec (shp=0x803200 <sh>,
t=0x7f8ac03892b0, flags=512)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/xec.c:1357
#11 0x000000000046d5f0 in sh_exec (shp=0x803200 <sh>,
t=0x7f8ac0389280, flags=512)
at /home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/xec.c:2626
#12 0x000000000046cce1 in sh_exec (shp=0x803200 <sh>,
t=0x7f8ac0389240, flags=4) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/xec.c:2464
#13 0x000000000040f4e2 in exfile (shp=0x803200 <sh>,
iop=0x7f8ac034c4d0, fno=11) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/main.c:603
#14 0x000000000040e72d in sh_main (ac=2, av=0x7fff6490e258,
userinit=0x0) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/main.c:375
#15 0x000000000040d901 in main (argc=2, argv=0x7fff6490e258) at
/home/test001/work/ast_ksh_20130628/build_i386_64bit_debug_patched/src/cmd/ksh93/sh/pmain.c:45
-- snip --
Prototype patch which fixes the issue:
-- snip --
--- src/cmd/ksh93/sh/fault.c 2013-07-11 17:44:57.000000000 +0200
+++ src/cmd/ksh93/sh/fault.c 2013-07-20 05:51:00.413497251 +0200
@@ -86,7 +86,7 @@
if(info)
{
struct Siginfo *jp,*ip;
- ip = malloc(sizeof(struct Siginfo));
+ ip = calloc(1, sizeof(struct Siginfo));
ip->next = 0;
memcpy(&ip->info,info,sizeof(siginfo_t));
if(!(jp=(struct Siginfo*)shp->siginfo[sig]))
@@ -326,7 +326,7 @@
else
{
if(!shp->siginfo)
- shp->siginfo =
(void**)calloc(sizeof(void*),shp->gd->sigmax);
+ shp->siginfo =
(void**)calloc(sizeof(void*),shp->gd->sigmax+256);
flag |= SH_SIGFAULT;
if(sig==SIGALRM && fun!=SIG_DFL &&
fun!=(sh_sigfun_t)sh_fault)
signal(sig,fun);
-- snip --

In this case |shp->gd->sigmax| is |64| while $ ~/bin/ksh -c 'kill -l
RTMAX' # returns |64| ...

... in theory this patch should fix the issue...
-- snip --
--- src/cmd/ksh93/sh/fault.c 2013-07-11 17:44:57.000000000 +0200
+++ src/cmd/ksh93/sh/fault.c 2013-07-20 06:00:32.020446072 +0200
@@ -284,7 +284,7 @@
}
tp++;
}
- shp->gd->sigmax = n++;
+ shp->gd->sigmax = ++n;
shp->st.trapcom = (char**)calloc(n,sizeof(char*));
shp->sigflag = (unsigned char*)calloc(n,sizeof(char));
shp->gd->sigmsg = (char**)calloc(n,sizeof(char*));
-- snip --

... but I am *NOT* sure whether this is the correct fix.

Erm... David... ping ... what do you think ?

----

Bye,
Roland
--
__ . . __
(o.\ \/ /.o) roland.mainz at nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)
Loading...