Roland Mainz
2013-06-18 13:47:51 UTC
ast-ksh alpha 2013-06-13 source posted to
http://www.research.att.com/sw/download/alpha/
still a work in progess, but progress has been made in the
handling of signals in libast/vmalloc and ksh
more comments later this morning ...
The following testcase (from hell) triggers a valgrind hit...http://www.research.att.com/sw/download/alpha/
still a work in progess, but progress has been made in the
handling of signals in libast/vmalloc and ksh
more comments later this morning ...
-- snip --
set -o nounset
integer i
compound c=(
compound -a child=(
[1024][2048][4096]=( float sum=0.0 )
)
)
integer -r pid=$$
integer -r numprocs=128
trap 'nameref n=c.child[1024][2048][4096]; (( n.sum-=3.3 ))' RTMIN+0
trap '(( c.child[1024][2048][4096].sum-=1.7 ))' RTMIN+1
trap 'nameref n=c.child[1024][2048][4096]; (( n.sum-=3.3 ))' RTMIN+2
trap '(( c.child[1024][2048][4096].sum-=1.7 ))' RTMIN+3
trap 'nameref n=c.child[1024][2048][4096]; (( n.sum-=3.3 ))' RTMIN+4
trap '(( c.child[1024][2048][4096].sum-=1.7 ))' RTMIN+5
trap 'nameref n=c.child[1024][2048][4096]; (( n.sum-=3.3 ))' RTMIN+6
trap '(( c.child[1024][2048][4096].sum-=1.7 ))' RTMIN+7
trap 'nameref n=c.child[1024][2048][4096]; (( n.sum-=3.3 ))' RTMIN+8
trap '(( c.child[1024][2048][4096].sum-=1.7 ))' RTMIN+9
trap 'nameref n=c.child[1024][2048][4096]; (( n.sum-=3.3 ))' RTMIN+10
trap '(( c.child[1024][2048][4096].sum-=1.7 ))' RTMIN+11
trap 'nameref n=c.child[1024][2048][4096]; (( n.sum-=3.3 ))' RTMIN+12
trap '(( c.child[1024][2048][4096].sum-=1.7 ))' RTMIN+13
trap 'nameref n=c.child[1024][2048][4096]; (( n.sum-=3.3 ))' RTMIN+14
trap '(( c.child[1024][2048][4096].sum-=1.7 ))' RTMIN+15
trap '(( c.child[1024][2048][4096].sum+=.sh.sig.status ))' CHLD
for (( i=0 ; i < numprocs ; i++ )) ; do
VMALLOC_OPTIONS='' cpid=$pid cnumprocs=$numprocs exec ~/bin/ksh -c $'
sleep $((cnumprocs / 256.))
integer j
for (( j=0 ; j < 16 ; j++ )) ; do
kill -q 1 -s RTMIN+$j ${cpid} || print -u2 -f "fail %s\n" "RTMIN+$j"
done
exit 40' &
done
float start=$SECONDS
while ! wait ; do
/usr/bin/true
if (( (SECONDS-start) > 60 )) ; then
print '# Aborting wait loop...'
break
fi
done
if (( fabs(c.child[1024][2048][4096].sum-0.0) < 0.000000001 )) ; then
printf '# success (0.0 == %20.20f / %a).\n' \
c.child[1024][2048][4096].sum c.child[1024][2048][4096].sum
exit 0
else
printf 'sum for all signals=%20.20f (%a, should be "0")\n' \
c.child[1024][2048][4096].sum c.child[1024][2048][4096].sum
exit 1
fi
# notreached
-- snip --
... the valgrind hit itself looks like this:
-- snip --
==36381== Conditional jump or move depends on uninitialised value(s)
==36381== at 0x43B723: job_chldtrap (jobs.c:232)
==36381== by 0x427A04: sh_chktrap (fault.c:484)
==36381== by 0x481AF7: sh_exec (xec.c:2955)
==36381== by 0x47FEA5: sh_exec (xec.c:2466)
==36381== by 0x47EFA4: sh_exec (xec.c:2222)
==36381== by 0x40F3E4: exfile (main.c:599)
==36381== by 0x40E58B: sh_main (main.c:371)
==36381== by 0x40D6C0: main (pmain.c:45)
==36381== Uninitialised value was created by a heap allocation
==36381== at 0x4C29C83: _ast_malloc (vg_replace_malloc.c:1000)
==36381== by 0x43E34D: job_post (jobs.c:1421)
==36381== by 0x48236E: _sh_fork (xec.c:3158)
==36381== by 0x48284C: sh_fork (xec.c:3260)
==36381== by 0x47D1FD: sh_exec (xec.c:1695)
==36381== by 0x47FEA5: sh_exec (xec.c:2466)
==36381== by 0x47EFA4: sh_exec (xec.c:2222)
==36381== by 0x40F3E4: exfile (main.c:599)
==36381== by 0x40E58B: sh_main (main.c:371)
==36381== by 0x40D6C0: main (pmain.c:45)
-- snip --
... it seems to be an issue that |pw->p_wstat| in jobs.c line 232 has
no value assigned (and therefore the value is more or less random):
-- snip --
sorc = WIFSTOPPED(pw->p_wstat) || WIFCONTINUED(pw->p_wstat);
-- snip --
Erm... my guess is the valgrind hit happens when the |pw->p_wstat|
field is probed before the value is set when the process child
quits... right ?
I worked-around the issue with the following patch but I'm not sure
whether this is the correct solution:
-- snip --
--- src/cmd/ksh93/sh/jobs.c 2013-06-14 22:55:44.000000000 +0200
+++ src/cmd/ksh93/sh/jobs.c 2013-06-18 15:31:26.009972939 +0200
@@ -1419,6 +1419,7 @@
freelist = pw->p_nxtjob;
else
pw = new_of(struct process,0);
+ pw->p_wstat = 0;
pw->p_flag = 0;
job.numpost++;
pw->p_exitval = job.exitval;
-- snip --
----
Bye,
Roland
--
__ . . __
(o.\ \/ /.o) roland.mainz at nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)
__ . . __
(o.\ \/ /.o) roland.mainz at nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)