To: vim-dev@vim.org
Subject: Patch 6.0.244
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
MIME-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 8bit
------------

Patch 6.0.244
Problem:    Multi-byte: Problems with (illegal) UTF-8 characters in menu and
	    file name (e.g., icon text, status line).
Solution:   Correctly handle unprintable characters.  Catch illegal UTF-8
	    characters and replace them with <xx>.  Truncating the status line
	    wasn't done correctly at a multi-byte character. (Yasuhiro
	    Matsumoto)
	    Added correct_cmdspos() and transchar_byte().
Files:	    src/buffer.c, src/charset.c, src/ex_getln.c, src/gui.c,
	    src/message.c, src/proto/charset.pro, src/screen.c, src/vim.h


*** ../vim60.243/src/buffer.c	Tue Feb 12 10:13:15 2002
--- src/buffer.c	Thu Feb 21 10:50:09 2002
***************
*** 2504,2512 ****
  	    name = curbuf->b_ffname;
  	home_replace(shorthelp ? curbuf : NULL, name, p,
  					  (int)(IOSIZE - (p - buffer)), TRUE);
- 	/* the file name may contain unprintable characters, esp. when using
- 	 * multi-byte chars */
- 	trans_characters(buffer, IOSIZE);
      }
  
      sprintf((char *)buffer + STRLEN(buffer),
--- 2504,2509 ----
***************
*** 2735,2745 ****
  	    else		    /* use file name only in icon */
  		i_name = gettail(curbuf->b_ffname);
  	    *i_str = NUL;
! 	    /* Truncate name at 100 chars. */
! 	    if (STRLEN(i_name) > 100)
! 		i_name += STRLEN(i_name) - 100;
! 	    while (*i_name)
! 		STRCAT(i_str, transchar(*i_name++));
  	}
      }
  
--- 2732,2750 ----
  	    else		    /* use file name only in icon */
  		i_name = gettail(curbuf->b_ffname);
  	    *i_str = NUL;
! 	    /* Truncate name at 100 bytes. */
! 	    len = STRLEN(i_name);
! 	    if (len > 100)
! 	    {
! 		len -= 100;
! #ifdef FEAT_MBYTE
! 		if (has_mbyte)
! 		    len += (*mb_tail_off)(i_name, i_name + len) + 1;
! #endif
! 		i_name += len;
! 	    }
! 	    STRCPY(i_str, i_name);
! 	    trans_characters(i_str, IOSIZE);
  	}
      }
  
*** ../vim60.243/src/charset.c	Mon Feb 11 14:09:33 2002
--- src/charset.c	Thu Feb 21 12:15:54 2002
***************
*** 276,282 ****
  
  /*
   * Translate any special characters in buf[bufsize] in-place.
!  * If there is not enough room, not all characters will be translated.
   */
      void
  trans_characters(buf, bufsize)
--- 276,283 ----
  
  /*
   * Translate any special characters in buf[bufsize] in-place.
!  * The result is a string with only printable characters, but if there is not
!  * enough room, not all characters will be translated.
   */
      void
  trans_characters(buf, bufsize)
***************
*** 293,316 ****
      while (*buf != 0)
      {
  #ifdef FEAT_MBYTE
- 	char    bstr[7];
- 
  	/* Assume a multi-byte character doesn't need translation. */
  	if (has_mbyte && (trs_len = (*mb_ptr2len_check)(buf)) > 1)
  	    len -= trs_len;
  	else
  #endif
  	{
! #ifdef FEAT_MBYTE
! 	    /* catch illegal UTF-8 byte */
! 	    if (enc_utf8 && *buf >= 0x80)
! 	    {
! 		transchar_nonprint(bstr, *buf);
! 		trs = bstr;
! 	    }
! 	    else
! #endif
! 		trs = transchar(*buf);
  	    trs_len = (int)STRLEN(trs);
  	    if (trs_len > 1)
  	    {
--- 294,306 ----
      while (*buf != 0)
      {
  #ifdef FEAT_MBYTE
  	/* Assume a multi-byte character doesn't need translation. */
  	if (has_mbyte && (trs_len = (*mb_ptr2len_check)(buf)) > 1)
  	    len -= trs_len;
  	else
  #endif
  	{
! 	    trs = transchar_byte(*buf);
  	    trs_len = (int)STRLEN(trs);
  	    if (trs_len > 1)
  	    {
***************
*** 361,367 ****
  		if (l > 0)
  		    len += l;
  		else
! 		    ++len;	/* illegal byte sequence */
  	    }
  	}
  	res = alloc((unsigned)(len + 1));
--- 351,357 ----
  		if (l > 0)
  		    len += l;
  		else
! 		    len += 4;	/* illegal byte sequence */
  	    }
  	}
  	res = alloc((unsigned)(len + 1));
***************
*** 378,389 ****
  #ifdef FEAT_MBYTE
  	    if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
  	    {
! 		STRNCAT(res, p, l);
  		p += l;
  	    }
  	    else
  #endif
! 		STRCAT(res, transchar(*p++));
  	}
      }
      return res;
--- 368,379 ----
  #ifdef FEAT_MBYTE
  	    if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
  	    {
! 		STRNCAT(res, p, l);	/* append printable multi-byte char */
  		p += l;
  	    }
  	    else
  #endif
! 		STRCAT(res, transchar_byte(*p++));
  	}
      }
      return res;
***************
*** 430,448 ****
   * initialized, and initializing options may cause transchar() to be called!
   * When chartab_initialized == FALSE don't use chartab[].
   * Does NOT work for multi-byte characters, c must be <= 255.
   */
      char_u *
  transchar(c)
      int		c;
  {
-     static char_u	buf[7];
      int			i;
  
      i = 0;
      if (IS_SPECIAL(c))	    /* special key code, display as ~@ char */
      {
! 	buf[0] = '~';
! 	buf[1] = '@';
  	i = 2;
  	c = K_SECOND(c);
      }
--- 420,441 ----
   * initialized, and initializing options may cause transchar() to be called!
   * When chartab_initialized == FALSE don't use chartab[].
   * Does NOT work for multi-byte characters, c must be <= 255.
+  * Also doesn't work for the first byte of a multi-byte, "c" must be a
+  * character!
   */
+ static char_u	transchar_buf[7];
+ 
      char_u *
  transchar(c)
      int		c;
  {
      int			i;
  
      i = 0;
      if (IS_SPECIAL(c))	    /* special key code, display as ~@ char */
      {
! 	transchar_buf[0] = '~';
! 	transchar_buf[1] = '@';
  	i = 2;
  	c = K_SECOND(c);
      }
***************
*** 459,471 ****
  		)) || (c < 256 && vim_isprintc_strict(c)))
      {
  	/* printable character */
! 	buf[i] = c;
! 	buf[i + 1] = NUL;
      }
      else
! 	transchar_nonprint(buf + i, c);
!     return buf;
  }
  
  /*
   * Convert non-printable character to two or more printable characters in
--- 452,482 ----
  		)) || (c < 256 && vim_isprintc_strict(c)))
      {
  	/* printable character */
! 	transchar_buf[i] = c;
! 	transchar_buf[i + 1] = NUL;
      }
      else
! 	transchar_nonprint(transchar_buf + i, c);
!     return transchar_buf;
  }
+ 
+ #if defined(FEAT_MBYTE) || defined(PROTO)
+ /*
+  * Like transchar(), but called with a byte instead of a character.  Checks
+  * for an illegal UTF-8 byte.
+  */
+     char_u *
+ transchar_byte(c)
+     int		c;
+ {
+     if (enc_utf8 && c >= 0x80)
+     {
+ 	transchar_nonprint(transchar_buf, c);
+ 	return transchar_buf;
+     }
+     return transchar(c);
+ }
+ #endif
  
  /*
   * Convert non-printable character to two or more printable characters in
*** ../vim60.243/src/ex_getln.c	Mon Feb  4 22:30:34 2002
--- src/ex_getln.c	Thu Feb 21 11:30:52 2002
***************
*** 72,77 ****
--- 72,80 ----
  static int	cmdline_charsize __ARGS((int idx));
  static void	set_cmdspos __ARGS((void));
  static void	set_cmdspos_cursor __ARGS((void));
+ #ifdef FEAT_MBYTE
+ static void	correct_cmdspos __ARGS((int idx, int cells));
+ #endif
  static void	alloc_cmdbuff __ARGS((int len));
  static int	realloc_cmdbuff __ARGS((int len));
  #ifdef FEAT_WILDMENU
***************
*** 1078,1086 ****
  		    if (has_mbyte)
  		    {
  			/* Count ">" for double-wide char that doesn't fit. */
! 			if ((*mb_ptr2cells)(ccline.cmdbuff + ccline.cmdpos) > 1
! 				    && ccline.cmdspos % Columns + i > Columns)
! 			    ccline.cmdspos++;
  			ccline.cmdpos += (*mb_ptr2len_check)(ccline.cmdbuff
  							 + ccline.cmdpos) - 1;
  		    }
--- 1081,1087 ----
  		    if (has_mbyte)
  		    {
  			/* Count ">" for double-wide char that doesn't fit. */
! 			correct_cmdspos(ccline.cmdpos, i);
  			ccline.cmdpos += (*mb_ptr2len_check)(ccline.cmdbuff
  							 + ccline.cmdpos) - 1;
  		    }
***************
*** 1580,1590 ****
      {
  	c = cmdline_charsize(i);
  #ifdef FEAT_MBYTE
! 	/* Count ">" for double-wide char that doesn't fit. */
! 	if (has_mbyte
! 		&& (*mb_ptr2cells)(ccline.cmdbuff + i) > 1
! 		&& ccline.cmdspos % Columns + c > Columns)
! 	    ccline.cmdspos++;
  #endif
  	/* If the cmdline doesn't fit, put cursor on last visible char. */
  	if ((ccline.cmdspos += c) >= m)
--- 1581,1589 ----
      {
  	c = cmdline_charsize(i);
  #ifdef FEAT_MBYTE
! 	/* Count ">" for double-wide multi-byte char that doesn't fit. */
! 	if (has_mbyte)
! 	    correct_cmdspos(i, c);
  #endif
  	/* If the cmdline doesn't fit, put cursor on last visible char. */
  	if ((ccline.cmdspos += c) >= m)
***************
*** 1600,1605 ****
--- 1599,1621 ----
      }
  }
  
+ #ifdef FEAT_MBYTE
+ /*
+  * Check if the character at "idx", which is "cells" wide, is a multi-byte
+  * character that doesn't fit, so that a ">" must be displayed.
+  */
+     static void
+ correct_cmdspos(idx, cells)
+     int		idx;
+     int		cells;
+ {
+     if ((*mb_ptr2len_check)(ccline.cmdbuff + idx) > 1
+ 		&& (*mb_ptr2cells)(ccline.cmdbuff + idx) > 1
+ 		&& ccline.cmdspos % Columns + cells > Columns)
+ 	ccline.cmdspos++;
+ }
+ #endif
+ 
  /*
   * Get an Ex command line for the ":" command.
   */
***************
*** 2058,2067 ****
  		c = cmdline_charsize(ccline.cmdpos);
  #ifdef FEAT_MBYTE
  		/* count ">" for a double-wide char that doesn't fit. */
! 		if (has_mbyte
! 			&& (*mb_ptr2cells)(ccline.cmdbuff + ccline.cmdpos) > 1
! 			&& ccline.cmdspos % Columns + c > Columns)
! 		    ccline.cmdspos++;
  #endif
  		/* Stop cursor at the end of the screen */
  		if (ccline.cmdspos + c >= m)
--- 2074,2081 ----
  		c = cmdline_charsize(ccline.cmdpos);
  #ifdef FEAT_MBYTE
  		/* count ">" for a double-wide char that doesn't fit. */
! 		if (has_mbyte)
! 		    correct_cmdspos(ccline.cmdpos, c);
  #endif
  		/* Stop cursor at the end of the screen */
  		if (ccline.cmdspos + c >= m)
*** ../vim60.243/src/gui.c	Thu Feb 21 12:32:53 2002
--- src/gui.c	Thu Feb 21 12:11:52 2002
***************
*** 1398,1404 ****
  	    }
  	    else
  	    {
! 		str = transchar(s[i]);
  		if (str[0] && str[1])
  		    printf("<%s>", (char *)str);
  		else
--- 1398,1404 ----
  	    }
  	    else
  	    {
! 		str = transchar_byte(s[i]);
  		if (str[0] && str[1])
  		    printf("<%s>", (char *)str);
  		else
*** ../vim60.243/src/message.c	Thu Feb 21 12:58:20 2002
--- src/message.c	Thu Feb 21 12:52:20 2002
***************
*** 1060,1066 ****
  	return p + l;
      }
  #endif
!     msg_puts_attr(transchar(*p), attr);
      return p + 1;
  }
  
--- 1060,1066 ----
  	return p + l;
      }
  #endif
!     msg_puts_attr(transchar_byte(*p), attr);
      return p + 1;
  }
  
***************
*** 1110,1116 ****
  	else
  #endif
  	{
! 	    s = transchar(*str);
  	    if (attr == 0 && s[1] != NUL)
  		msg_puts_attr(s, hl_attr(HLF_8));	/* unprintable char */
  	    else
--- 1110,1116 ----
  	else
  #endif
  	{
! 	    s = transchar_byte(*str);
  	    if (attr == 0 && s[1] != NUL)
  		msg_puts_attr(s, hl_attr(HLF_8));	/* unprintable char */
  	    else
***************
*** 1357,1363 ****
  	    else if (c != NUL && (n = byte2cells(c)) > 1)
  	    {
  		n_extra = n - 1;
! 		p_extra = transchar(c);
  		c_extra = NUL;
  		c = *p_extra++;
  	    }
--- 1357,1363 ----
  	    else if (c != NUL && (n = byte2cells(c)) > 1)
  	    {
  		n_extra = n - 1;
! 		p_extra = transchar_byte(c);
  		c_extra = NUL;
  		c = *p_extra++;
  	    }
*** ../vim60.243/src/proto/charset.pro	Tue Sep 25 21:49:10 2001
--- src/proto/charset.pro	Thu Feb 21 12:15:35 2002
***************
*** 5,10 ****
--- 5,11 ----
  char_u *transstr __ARGS((char_u *s));
  void str_foldcase __ARGS((char_u *p));
  char_u *transchar __ARGS((int c));
+ char_u *transchar_byte __ARGS((int c));
  void transchar_nonprint __ARGS((char_u *buf, int c));
  void transchar_hex __ARGS((char_u *buf, int c));
  int byte2cells __ARGS((int b));
*** ../vim60.243/src/screen.c	Tue Feb  5 22:26:51 2002
--- src/screen.c	Thu Feb 21 12:10:20 2002
***************
*** 4559,4565 ****
  	    else
  #endif
  	    {
! 		STRCPY(buf + len, transchar(*s));
  		len += (int)STRLEN(buf + len);
  	    }
  	}
--- 4564,4570 ----
  	    else
  #endif
  	    {
! 		STRCPY(buf + len, transchar_byte(*s));
  		len += (int)STRLEN(buf + len);
  	    }
  	}
***************
*** 4739,4753 ****
  		    clen += (*mb_ptr2cells)(p + i);
  		/* Find first character that will fit.
  		 * Going from start to end is much faster for DBCS. */
! 		for (i = 0; p[i] != NUL && clen > this_ru_col - 1;
  					      i += (*mb_ptr2len_check)(p + i))
  		    clen -= (*mb_ptr2cells)(p + i);
  		if (i > 0)
  		{
  		    p = p + i - 1;
  		    *p = '<';
  		}
- 		len = clen;
  
  	    }
  	    else
--- 4744,4759 ----
  		    clen += (*mb_ptr2cells)(p + i);
  		/* Find first character that will fit.
  		 * Going from start to end is much faster for DBCS. */
! 		for (i = 0; p[i] != NUL && clen >= this_ru_col - 1;
  					      i += (*mb_ptr2len_check)(p + i))
  		    clen -= (*mb_ptr2cells)(p + i);
+ 		len = clen;
  		if (i > 0)
  		{
  		    p = p + i - 1;
  		    *p = '<';
+ 		    ++len;
  		}
  
  	    }
  	    else
*** ../vim60.243/src/vim.h	Sun Feb 17 23:22:34 2002
--- src/vim.h	Thu Feb 21 12:15:37 2002
***************
*** 1198,1203 ****
--- 1198,1204 ----
  #else
  # define MB_STRICMP(d, s)	STRICMP((d), (s))
  # define MB_STRNICMP(d, s, n)	STRNICMP((d), (s), (n))
+ # define transchar_byte(c)	transchar(c)
  #endif
  
  #define STRCAT(d, s)	    strcat((char *)(d), (char *)(s))
*** ../vim60.243/src/version.c	Thu Feb 21 12:58:20 2002
--- src/version.c	Thu Feb 21 13:51:56 2002
***************
*** 608,609 ****
--- 608,611 ----
  {   /* Add new patch number below this line */
+ /**/
+     244,
  /**/

-- 
"You're fired." (1980)
"You're laid off." (1985)
"You're downsized." (1990)
"You're rightsized." (1992)
				(Scott Adams - The Dilbert principle)

 ///  Bram Moolenaar -- Bram@moolenaar.net -- http://www.moolenaar.net  \\\
///   Creator of Vim -- http://vim.sf.net -- ftp://ftp.vim.org/pub/vim   \\\
\\\           Project leader for A-A-P -- http://www.a-a-p.org           ///
 \\\  Help me helping AIDS orphans in Uganda - http://iccf-holland.org  ///