lijo george
2017-05-16 12:38:06 UTC
Hi,
The "ksh -x" option behaves differently between 2011 and the 2012 versions
for the following script
.
Here's a sample output for the testcase attached.
ksh93 2011 version
***********************************************************************
$ ksh --version
version sh (AT&T Research) 93u 2011-02-08
$ cat test.sh
#!/bin/ksh
echo "æ£åžžçµäº"
echo "æ£åžžçµäºæ£åžžçµäº"
echo "æ£åžžçµäº æ£åžžçµäº"
echo "æ£åžžçµäº æ£åžžçµäº"
exit 0
$ ksh -x test.sh
+ echo $'æ£åžžçµäº\r'
æ£åžžçµäº
+ echo $'æ£åžžçµäºæ£åžžçµäº\r'
æ£åžžçµäºæ£åžžçµäº
+ echo $'æ£åžžçµäº æ£åžžçµäº\r'
æ£åžžçµäº æ£åžžçµäº
+ echo $'æ£åžžçµäº æ£åžžçµäº\r'
æ£åžžçµäº æ£åžžçµäº
+ exit $'0\r'
$
ksh93 2012 version
***********************************************************************
# ksh --version
version sh (AT&T Research) 93u+ 2012-08-01
# cat test.sh
#!/bin/ksh
echo "æ£åžžçµäº"
echo "æ£åžžçµäºæ£åžžçµäº"
echo "æ£åžžçµäº æ£åžžçµäº"
echo "æ£åžžçµäº æ£åžžçµäº"
exit 0
# ksh -x test.sh
+ echo $'\u[6b63]\u[5e38]\u[7d42]\u[4e86]\r'
æ£åžžçµäº
+ echo $'\u[6b63]\u[5e38]\u[7d42]\u[4e86]\u[6b63]\u[5e38]\u[7d42]\u[4e86]\r'
æ£åžžçµäºæ£åžžçµäº
+ echo $'\u[6b63]\u[5e38]\u[7d42]\u[4e86]
\u[6b63]\u[5e38]\u[7d42]\u[4e86]\r'
æ£åžžçµäº æ£åžžçµäº
+ echo
$'\u[6b63]\u[5e38]\u[7d42]\u[4e86]\u[3000]\u[6b63]\u[5e38]\u[7d42]\u[4e86]\r'
æ£åžžçµäº æ£åžžçµäº
+ exit $'0\r'
#
Is this a bug or an intended behavior.
I was going through the code changes which looked relevant and it looks
like it has been added for a reason. But I'm not able to figure out what
exactly the change is doing.
The following changes in the sh_fmtq function in string.c results in the
old behavior
bash-4.1$ less trace.patch
--- INIT.2012-08-01.old/src/cmd/ksh93/sh/string.c 2017-03-20
05:24:11.355975870 -0700
+++ INIT.2012-08-01/src/cmd/ksh93/sh/string.c 2017-03-20
13:03:39.978917130 -0700
@@ -360,7 +360,7 @@
for(;c;c= mbchar(cp))
{
#if SHOPT_MULTIBYTE
- if(c=='\'' || c>=128 || c<0 || !iswprint(c))
+ if(c=='\'' || !iswprint(c))
#else
if(c=='\'' || !isprint(c))
#endif /* SHOPT_MULTIBYTE */
@@ -379,7 +379,6 @@
}
else
{
- int isbyte=0;
stakwrite("$'",2);
cp = string;
#if SHOPT_MULTIBYTE
@@ -416,26 +415,20 @@
break;
default:
#if SHOPT_MULTIBYTE
- isbyte = 0;
- if(c<0)
+ if(!iswprint(c))
{
- c = *((unsigned char *)op);
- cp = op+1;
- isbyte = 1;
- }
- if(mbwide() && ((cp-op)>1))
- {
- sfprintf(staksp,"\\u[%x]",c);
+ while(op<cp)
+ sfprintf(staksp,
+"\\%.3o",*(unsigned char*)op++);
continue;
}
- else if(!iswprint(c) || isbyte)
#else
if(!isprint(c))
-#endif
{
- sfprintf(staksp,"\\x%.2x",c);
+ sfprintf(staksp,"\\%.3o",c);
continue;
}
+#endif
state=0;
break;
}
Thanks,
Lijo
The "ksh -x" option behaves differently between 2011 and the 2012 versions
for the following script
.
Here's a sample output for the testcase attached.
ksh93 2011 version
***********************************************************************
$ ksh --version
version sh (AT&T Research) 93u 2011-02-08
$ cat test.sh
#!/bin/ksh
echo "æ£åžžçµäº"
echo "æ£åžžçµäºæ£åžžçµäº"
echo "æ£åžžçµäº æ£åžžçµäº"
echo "æ£åžžçµäº æ£åžžçµäº"
exit 0
$ ksh -x test.sh
+ echo $'æ£åžžçµäº\r'
æ£åžžçµäº
+ echo $'æ£åžžçµäºæ£åžžçµäº\r'
æ£åžžçµäºæ£åžžçµäº
+ echo $'æ£åžžçµäº æ£åžžçµäº\r'
æ£åžžçµäº æ£åžžçµäº
+ echo $'æ£åžžçµäº æ£åžžçµäº\r'
æ£åžžçµäº æ£åžžçµäº
+ exit $'0\r'
$
ksh93 2012 version
***********************************************************************
# ksh --version
version sh (AT&T Research) 93u+ 2012-08-01
# cat test.sh
#!/bin/ksh
echo "æ£åžžçµäº"
echo "æ£åžžçµäºæ£åžžçµäº"
echo "æ£åžžçµäº æ£åžžçµäº"
echo "æ£åžžçµäº æ£åžžçµäº"
exit 0
# ksh -x test.sh
+ echo $'\u[6b63]\u[5e38]\u[7d42]\u[4e86]\r'
æ£åžžçµäº
+ echo $'\u[6b63]\u[5e38]\u[7d42]\u[4e86]\u[6b63]\u[5e38]\u[7d42]\u[4e86]\r'
æ£åžžçµäºæ£åžžçµäº
+ echo $'\u[6b63]\u[5e38]\u[7d42]\u[4e86]
\u[6b63]\u[5e38]\u[7d42]\u[4e86]\r'
æ£åžžçµäº æ£åžžçµäº
+ echo
$'\u[6b63]\u[5e38]\u[7d42]\u[4e86]\u[3000]\u[6b63]\u[5e38]\u[7d42]\u[4e86]\r'
æ£åžžçµäº æ£åžžçµäº
+ exit $'0\r'
#
Is this a bug or an intended behavior.
I was going through the code changes which looked relevant and it looks
like it has been added for a reason. But I'm not able to figure out what
exactly the change is doing.
The following changes in the sh_fmtq function in string.c results in the
old behavior
bash-4.1$ less trace.patch
--- INIT.2012-08-01.old/src/cmd/ksh93/sh/string.c 2017-03-20
05:24:11.355975870 -0700
+++ INIT.2012-08-01/src/cmd/ksh93/sh/string.c 2017-03-20
13:03:39.978917130 -0700
@@ -360,7 +360,7 @@
for(;c;c= mbchar(cp))
{
#if SHOPT_MULTIBYTE
- if(c=='\'' || c>=128 || c<0 || !iswprint(c))
+ if(c=='\'' || !iswprint(c))
#else
if(c=='\'' || !isprint(c))
#endif /* SHOPT_MULTIBYTE */
@@ -379,7 +379,6 @@
}
else
{
- int isbyte=0;
stakwrite("$'",2);
cp = string;
#if SHOPT_MULTIBYTE
@@ -416,26 +415,20 @@
break;
default:
#if SHOPT_MULTIBYTE
- isbyte = 0;
- if(c<0)
+ if(!iswprint(c))
{
- c = *((unsigned char *)op);
- cp = op+1;
- isbyte = 1;
- }
- if(mbwide() && ((cp-op)>1))
- {
- sfprintf(staksp,"\\u[%x]",c);
+ while(op<cp)
+ sfprintf(staksp,
+"\\%.3o",*(unsigned char*)op++);
continue;
}
- else if(!iswprint(c) || isbyte)
#else
if(!isprint(c))
-#endif
{
- sfprintf(staksp,"\\x%.2x",c);
+ sfprintf(staksp,"\\%.3o",c);
continue;
}
+#endif
state=0;
break;
}
Thanks,
Lijo