BASH PATCH REPORT ================= Bash-Release: 5.2 Patch-ID: bash52-036 Bug-Reported-by: Grisha Levit Bug-Reference-ID: Bug-Reference-URL: https://lists.gnu.org/archive/html/bug-bash/2023-04/msg00082.html Bug-Description: When readline is accumulating bytes until it reads a complete multibyte character, reading a byte that makes the multibyte character invalid can result in discarding the bytes in the partial character. Patch (apply with `patch -p0'): *** ../bash-5.2-patched/lib/readline/text.c Mon May 1 09:37:52 2023 --- lib/readline/text.c Mon May 29 12:22:29 2023 *************** *** 86,90 **** rl_insert_text (const char *string) { ! register int i, l; l = (string && *string) ? strlen (string) : 0; --- 86,91 ---- rl_insert_text (const char *string) { ! register int i; ! size_t l; l = (string && *string) ? strlen (string) : 0; *************** *** 705,709 **** /* Insert the character C at the current location, moving point forward. If C introduces a multibyte sequence, we read the whole sequence and ! then insert the multibyte char into the line buffer. */ int _rl_insert_char (int count, int c) --- 706,714 ---- /* Insert the character C at the current location, moving point forward. If C introduces a multibyte sequence, we read the whole sequence and ! then insert the multibyte char into the line buffer. ! If C == 0, we immediately insert any pending partial multibyte character, ! assuming that we have read a character that doesn't map to self-insert. ! This doesn't completely handle characters that are part of a multibyte ! character but map to editing functions. */ int _rl_insert_char (int count, int c) *************** *** 719,727 **** #endif if (count <= 0) return 0; ! #if defined (HANDLE_MULTIBYTE) ! if (MB_CUR_MAX == 1 || rl_byte_oriented) { incoming[0] = c; --- 724,749 ---- #endif + #if !defined (HANDLE_MULTIBYTE) if (count <= 0) return 0; + #else + if (count < 0) + return 0; + if (count == 0) + { + if (pending_bytes_length == 0) + return 0; + if (stored_count <= 0) + stored_count = count; + else + count = stored_count; ! memcpy (incoming, pending_bytes, pending_bytes_length); ! incoming[pending_bytes_length] = '\0'; ! incoming_length = pending_bytes_length; ! pending_bytes_length = 0; ! memset (&ps, 0, sizeof (mbstate_t)); ! } ! else if (MB_CUR_MAX == 1 || rl_byte_oriented) { incoming[0] = c; *************** *** 731,734 **** --- 753,759 ---- else if (_rl_utf8locale && (c & 0x80) == 0) { + if (pending_bytes_length) + _rl_insert_char (0, 0); + incoming[0] = c; incoming[1] = '\0'; *************** *** 765,769 **** incoming_length = 1; pending_bytes_length--; ! memmove (pending_bytes, pending_bytes + 1, pending_bytes_length); /* Clear the state of the byte sequence, because in this case the effect of mbstate is undefined. */ --- 790,795 ---- incoming_length = 1; pending_bytes_length--; ! if (pending_bytes_length) ! memmove (pending_bytes, pending_bytes + 1, pending_bytes_length); /* Clear the state of the byte sequence, because in this case the effect of mbstate is undefined. */ *************** *** 828,832 **** --- 854,862 ---- xfree (string); + #if defined (HANDLE_MULTIBYTE) + return (pending_bytes_length != 0); + #else return 0; + #endif } *************** *** 861,864 **** --- 891,896 ---- incoming_length = 0; stored_count = 0; + + return (pending_bytes_length != 0); #else /* !HANDLE_MULTIBYTE */ char str[TEXT_COUNT_MAX+1]; *************** *** 874,880 **** count -= decreaser; } - #endif /* !HANDLE_MULTIBYTE */ return 0; } --- 906,912 ---- count -= decreaser; } return 0; + #endif /* !HANDLE_MULTIBYTE */ } *************** *** 904,910 **** stored_count = 0; } ! #endif ! return 0; } --- 936,944 ---- stored_count = 0; } ! ! return (pending_bytes_length != 0); ! #else return 0; + #endif } *************** *** 984,987 **** --- 1018,1026 ---- } + /* If we didn't insert n and there are pending bytes, we need to insert + them if _rl_insert_char didn't do that on its own. */ + if (r == 1 && rl_insert_mode == RL_IM_INSERT) + r = _rl_insert_char (0, 0); /* flush partial multibyte char */ + if (n != (unsigned short)-2) /* -2 = sentinel value for having inserted N */ { *************** *** 1055,1058 **** --- 1094,1099 ---- rl_quoted_insert (int count, int key) { + int r; + /* Let's see...should the callback interface futz with signal handling? */ #if defined (HANDLE_SIGNALS) *************** *** 1073,1085 **** if (count < 0) { - int r; - do r = _rl_insert_next (1); while (r == 0 && ++count < 0); - return r; } ! return _rl_insert_next (count); } --- 1114,1128 ---- if (count < 0) { do r = _rl_insert_next (1); while (r == 0 && ++count < 0); } + else + r = _rl_insert_next (count); ! if (r == 1) ! _rl_insert_char (0, 0); /* insert partial multibyte character */ ! ! return r; } *** ../bash-5.2/patchlevel.h 2020-06-22 14:51:03.000000000 -0400 --- patchlevel.h 2020-10-01 11:01:28.000000000 -0400 *************** *** 26,30 **** looks for to find the patch level (for the sccs version string). */ ! #define PATCHLEVEL 35 #endif /* _PATCHLEVEL_H_ */ --- 26,30 ---- looks for to find the patch level (for the sccs version string). */ ! #define PATCHLEVEL 36 #endif /* _PATCHLEVEL_H_ */