make the rest of the justify code support multibyte characters

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2371 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
master
David Lawrence Ramsey 2005-03-15 05:44:03 +00:00
parent 666644efbc
commit 38156d4491
4 changed files with 120 additions and 38 deletions

View File

@ -175,12 +175,12 @@ CVS code -
paragraph-searching utility functions when possible instead of paragraph-searching utility functions when possible instead of
duplicating code. Also overhaul the justify code to make it duplicating code. Also overhaul the justify code to make it
leave the right number of spaces at the ends of the lines of a leave the right number of spaces at the ends of the lines of a
paragraph, to make it (partially) support multibyte paragraph, to make it support multibyte characters, and to
characters, and to make it simpler. Also, don't remove a make it simpler. Also, don't remove a space after a duplicate
space after a duplicate character in punct anymore, as it character in punct anymore, as it doesn't really make us more
doesn't really make us more compatible with Pico. New compatible with Pico. New functions mbstrchr(),
functions do_para_begin_void() and do_para_end_void(); changes do_para_begin_void(), and do_para_end_void(); changes to
to justify_format(), do_para_begin(), inpar(), do_para_end(), justify_format(), do_para_begin(), inpar(), do_para_end(),
break_line(), do_para_search() (renamed find_paragraph()), and break_line(), do_para_search() (renamed find_paragraph()), and
do_justify(); removal of breakable(). (DLR) do_justify(); removal of breakable(). (DLR)
- Still more steps toward full wide/multibyte character support. - Still more steps toward full wide/multibyte character support.

View File

@ -811,3 +811,48 @@ size_t mbstrnlen(const char *s, size_t maxlen)
nstrnlen(s, maxlen); nstrnlen(s, maxlen);
#endif #endif
} }
#ifndef DISABLE_JUSTIFY
/* This function is equivalent to strchr() for multibyte strings. */
char *mbstrchr(const char *s, char *c)
{
assert(s != NULL && c != NULL);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
char *s_mb = charalloc(MB_CUR_MAX);
const char *q = s;
wchar_t ws, wc;
int s_mb_len, c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
if (c_mb_len <= 0) {
mbtowc(NULL, NULL, 0);
wc = (unsigned char)*c;
}
while (*s != '\0') {
s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
if (mbtowc(&ws, s_mb, s_mb_len) <= 0) {
mbtowc(NULL, NULL, 0);
ws = (unsigned char)*s;
}
if (ws == wc)
break;
s += s_mb_len;
q += s_mb_len;
}
free(s_mb);
if (ws != wc)
q = NULL;
return (char *)q;
} else
#endif
return strchr(s, *c);
}
#endif

View File

@ -2382,68 +2382,95 @@ void justify_format(filestruct *paragraph, size_t skip)
new_end = new_paragraph_data + skip; new_end = new_paragraph_data + skip;
while (*end != '\0') { while (*end != '\0') {
int end_len;
/* If this character is blank, make sure that it's a space with /* If this character is blank, make sure that it's a space with
* no blanks after it. */ * no blanks after it. */
if (is_blank_char(*end)) { if (is_blank_mbchar(end)) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
*new_end = ' '; *new_end = ' ';
new_end++; new_end++;
end++; end += end_len;
while (*end != '\0' && is_blank_char(*end)) { while (*end != '\0' && is_blank_mbchar(end)) {
end++; end_len = parse_mbchar(end, NULL, NULL, NULL);
shift++;
end += end_len;
shift += end_len;
#ifndef NANO_SMALL #ifndef NANO_SMALL
/* Keep track of the change in the current line. */ /* Keep track of the change in the current line. */
if (mark_beginbuf == paragraph && if (mark_beginbuf == paragraph &&
mark_beginx >= end - paragraph->data) mark_beginx >= end - paragraph->data)
mark_shift++; mark_shift += end_len;
#endif #endif
} }
/* If this character is punctuation optionally followed by a /* If this character is punctuation optionally followed by a
* bracket and then followed by blanks, make sure there are no * bracket and then followed by blanks, make sure there are no
* more than two blanks after it, and make sure that the blanks * more than two blanks after it, and make sure that the blanks
* are spaces. */ * are spaces. */
} else if (strchr(punct, *end) != NULL) { } else if (mbstrchr(punct, end) != NULL) {
*new_end = *end; end_len = parse_mbchar(end, NULL, NULL, NULL);
new_end++;
end++;
if (*end != '\0' && strchr(brackets, *end) != NULL) { while (end_len > 0) {
*new_end = *end; *new_end = *end;
new_end++; new_end++;
end++; end++;
end_len--;
} }
if (*end != '\0' && is_blank_char(*end)) { if (*end != '\0' && mbstrchr(brackets, end) != NULL) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
while (end_len > 0) {
*new_end = *end;
new_end++;
end++;
end_len--;
}
}
if (*end != '\0' && is_blank_mbchar(end)) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
*new_end = ' '; *new_end = ' ';
new_end++; new_end++;
end++; end += end_len;
} }
if (*end != '\0' && is_blank_char(*end)) { if (*end != '\0' && is_blank_mbchar(end)) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
*new_end = ' '; *new_end = ' ';
new_end++; new_end++;
end++; end += end_len;
} }
while (*end != '\0' && is_blank_char(*end)) { while (*end != '\0' && is_blank_mbchar(end)) {
end++; end_len = parse_mbchar(end, NULL, NULL, NULL);
shift++;
end += end_len;
shift += end_len;
#ifndef NANO_SMALL #ifndef NANO_SMALL
/* Keep track of the change in the current line. */ /* Keep track of the change in the current line. */
if (mark_beginbuf == paragraph && if (mark_beginbuf == paragraph &&
mark_beginx >= end - paragraph->data) mark_beginx >= end - paragraph->data)
mark_shift++; mark_shift += end_len;
#endif #endif
} }
/* If this character is neither blank nor punctuation, leave it /* If this character is neither blank nor punctuation, leave it
* alone. */ * alone. */
} else { } else {
*new_end = *end; end_len = parse_mbchar(end, NULL, NULL, NULL);
new_end++;
end++; while (end_len > 0) {
*new_end = *end;
new_end++;
end++;
end_len--;
}
} }
} }
@ -2743,11 +2770,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
* found with short enough display width. */ * found with short enough display width. */
ssize_t cur_loc = 0; ssize_t cur_loc = 0;
/* Current index in line. */ /* Current index in line. */
int line_len;
assert(line != NULL); assert(line != NULL);
while (*line != '\0' && goal >= 0) { while (*line != '\0' && goal >= 0) {
int line_len;
size_t pos = 0; size_t pos = 0;
line_len = parse_mbchar(line, NULL, NULL, &pos); line_len = parse_mbchar(line, NULL, NULL, &pos);
@ -2770,7 +2797,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
bool found_blank = FALSE; bool found_blank = FALSE;
while (*line != '\0') { while (*line != '\0') {
int line_len = parse_mbchar(line, NULL, NULL, NULL); line_len = parse_mbchar(line, NULL, NULL, NULL);
if (is_blank_mbchar(line)) { if (is_blank_mbchar(line)) {
if (!found_blank) if (!found_blank)
@ -2786,11 +2813,18 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
} }
} }
/* Perhaps the character after blank_loc is a blank. But because /* Move to the last blank after blank_loc, if there is one. */
* of justify_format(), there can be only two adjacent. */ line -= cur_loc;
if (*(line - cur_loc + blank_loc + 1) == ' ' || line += blank_loc;
*(line - cur_loc + blank_loc + 1) == '\0') line_len = parse_mbchar(line, NULL, NULL, NULL);
blank_loc++; line += line_len;
while (*line != '\0' && is_blank_mbchar(line)) {
line_len = parse_mbchar(line, NULL, NULL, NULL);
line += line_len;
blank_loc += line_len;
}
return blank_loc; return blank_loc;
} }

View File

@ -207,6 +207,9 @@ size_t mbstrlen(const char *s);
size_t nstrnlen(const char *s, size_t maxlen); size_t nstrnlen(const char *s, size_t maxlen);
#endif #endif
size_t mbstrnlen(const char *s, size_t maxlen); size_t mbstrnlen(const char *s, size_t maxlen);
#ifndef DISABLE_JUSTIFY
char *mbstrchr(const char *s, char *c);
#endif
/* Public functions in color.c. */ /* Public functions in color.c. */
#ifdef ENABLE_COLOR #ifdef ENABLE_COLOR