make the rest of the justify code support multibyte characters

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2371 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
master
David Lawrence Ramsey 2005-03-15 05:44:03 +00:00
parent 666644efbc
commit 38156d4491
4 changed files with 120 additions and 38 deletions

View File

@ -175,12 +175,12 @@ CVS code -
paragraph-searching utility functions when possible instead of
duplicating code. Also overhaul the justify code to make it
leave the right number of spaces at the ends of the lines of a
paragraph, to make it (partially) support multibyte
characters, and to make it simpler. Also, don't remove a
space after a duplicate character in punct anymore, as it
doesn't really make us more compatible with Pico. New
functions do_para_begin_void() and do_para_end_void(); changes
to justify_format(), do_para_begin(), inpar(), do_para_end(),
paragraph, to make it support multibyte characters, and to
make it simpler. Also, don't remove a space after a duplicate
character in punct anymore, as it doesn't really make us more
compatible with Pico. New functions mbstrchr(),
do_para_begin_void(), and do_para_end_void(); changes to
justify_format(), do_para_begin(), inpar(), do_para_end(),
break_line(), do_para_search() (renamed find_paragraph()), and
do_justify(); removal of breakable(). (DLR)
- Still more steps toward full wide/multibyte character support.

View File

@ -811,3 +811,48 @@ size_t mbstrnlen(const char *s, size_t maxlen)
nstrnlen(s, maxlen);
#endif
}
#ifndef DISABLE_JUSTIFY
/* This function is equivalent to strchr() for multibyte strings. */
char *mbstrchr(const char *s, char *c)
{
assert(s != NULL && c != NULL);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
char *s_mb = charalloc(MB_CUR_MAX);
const char *q = s;
wchar_t ws, wc;
int s_mb_len, c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
if (c_mb_len <= 0) {
mbtowc(NULL, NULL, 0);
wc = (unsigned char)*c;
}
while (*s != '\0') {
s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
if (mbtowc(&ws, s_mb, s_mb_len) <= 0) {
mbtowc(NULL, NULL, 0);
ws = (unsigned char)*s;
}
if (ws == wc)
break;
s += s_mb_len;
q += s_mb_len;
}
free(s_mb);
if (ws != wc)
q = NULL;
return (char *)q;
} else
#endif
return strchr(s, *c);
}
#endif

View File

@ -2382,68 +2382,95 @@ void justify_format(filestruct *paragraph, size_t skip)
new_end = new_paragraph_data + skip;
while (*end != '\0') {
int end_len;
/* If this character is blank, make sure that it's a space with
* no blanks after it. */
if (is_blank_char(*end)) {
if (is_blank_mbchar(end)) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
*new_end = ' ';
new_end++;
end++;
end += end_len;
while (*end != '\0' && is_blank_char(*end)) {
end++;
shift++;
while (*end != '\0' && is_blank_mbchar(end)) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
end += end_len;
shift += end_len;
#ifndef NANO_SMALL
/* Keep track of the change in the current line. */
if (mark_beginbuf == paragraph &&
mark_beginx >= end - paragraph->data)
mark_shift++;
mark_shift += end_len;
#endif
}
/* If this character is punctuation optionally followed by a
* bracket and then followed by blanks, make sure there are no
* more than two blanks after it, and make sure that the blanks
* are spaces. */
} else if (strchr(punct, *end) != NULL) {
*new_end = *end;
new_end++;
end++;
} else if (mbstrchr(punct, end) != NULL) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
if (*end != '\0' && strchr(brackets, *end) != NULL) {
while (end_len > 0) {
*new_end = *end;
new_end++;
end++;
end_len--;
}
if (*end != '\0' && is_blank_char(*end)) {
if (*end != '\0' && mbstrchr(brackets, end) != NULL) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
while (end_len > 0) {
*new_end = *end;
new_end++;
end++;
end_len--;
}
}
if (*end != '\0' && is_blank_mbchar(end)) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
*new_end = ' ';
new_end++;
end++;
end += end_len;
}
if (*end != '\0' && is_blank_char(*end)) {
if (*end != '\0' && is_blank_mbchar(end)) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
*new_end = ' ';
new_end++;
end++;
end += end_len;
}
while (*end != '\0' && is_blank_char(*end)) {
end++;
shift++;
while (*end != '\0' && is_blank_mbchar(end)) {
end_len = parse_mbchar(end, NULL, NULL, NULL);
end += end_len;
shift += end_len;
#ifndef NANO_SMALL
/* Keep track of the change in the current line. */
if (mark_beginbuf == paragraph &&
mark_beginx >= end - paragraph->data)
mark_shift++;
/* Keep track of the change in the current line. */
if (mark_beginbuf == paragraph &&
mark_beginx >= end - paragraph->data)
mark_shift += end_len;
#endif
}
/* If this character is neither blank nor punctuation, leave it
* alone. */
} else {
*new_end = *end;
new_end++;
end++;
end_len = parse_mbchar(end, NULL, NULL, NULL);
while (end_len > 0) {
*new_end = *end;
new_end++;
end++;
end_len--;
}
}
}
@ -2743,11 +2770,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
* found with short enough display width. */
ssize_t cur_loc = 0;
/* Current index in line. */
int line_len;
assert(line != NULL);
while (*line != '\0' && goal >= 0) {
int line_len;
size_t pos = 0;
line_len = parse_mbchar(line, NULL, NULL, &pos);
@ -2770,7 +2797,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
bool found_blank = FALSE;
while (*line != '\0') {
int line_len = parse_mbchar(line, NULL, NULL, NULL);
line_len = parse_mbchar(line, NULL, NULL, NULL);
if (is_blank_mbchar(line)) {
if (!found_blank)
@ -2786,11 +2813,18 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
}
}
/* Perhaps the character after blank_loc is a blank. But because
* of justify_format(), there can be only two adjacent. */
if (*(line - cur_loc + blank_loc + 1) == ' ' ||
*(line - cur_loc + blank_loc + 1) == '\0')
blank_loc++;
/* Move to the last blank after blank_loc, if there is one. */
line -= cur_loc;
line += blank_loc;
line_len = parse_mbchar(line, NULL, NULL, NULL);
line += line_len;
while (*line != '\0' && is_blank_mbchar(line)) {
line_len = parse_mbchar(line, NULL, NULL, NULL);
line += line_len;
blank_loc += line_len;
}
return blank_loc;
}

View File

@ -207,6 +207,9 @@ size_t mbstrlen(const char *s);
size_t nstrnlen(const char *s, size_t maxlen);
#endif
size_t mbstrnlen(const char *s, size_t maxlen);
#ifndef DISABLE_JUSTIFY
char *mbstrchr(const char *s, char *c);
#endif
/* Public functions in color.c. */
#ifdef ENABLE_COLOR