make the rest of the justify code support multibyte characters
git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2371 35c25a1d-7b9e-4130-9fde-d3aeb78583b8master
parent
666644efbc
commit
38156d4491
12
ChangeLog
12
ChangeLog
|
@ -175,12 +175,12 @@ CVS code -
|
|||
paragraph-searching utility functions when possible instead of
|
||||
duplicating code. Also overhaul the justify code to make it
|
||||
leave the right number of spaces at the ends of the lines of a
|
||||
paragraph, to make it (partially) support multibyte
|
||||
characters, and to make it simpler. Also, don't remove a
|
||||
space after a duplicate character in punct anymore, as it
|
||||
doesn't really make us more compatible with Pico. New
|
||||
functions do_para_begin_void() and do_para_end_void(); changes
|
||||
to justify_format(), do_para_begin(), inpar(), do_para_end(),
|
||||
paragraph, to make it support multibyte characters, and to
|
||||
make it simpler. Also, don't remove a space after a duplicate
|
||||
character in punct anymore, as it doesn't really make us more
|
||||
compatible with Pico. New functions mbstrchr(),
|
||||
do_para_begin_void(), and do_para_end_void(); changes to
|
||||
justify_format(), do_para_begin(), inpar(), do_para_end(),
|
||||
break_line(), do_para_search() (renamed find_paragraph()), and
|
||||
do_justify(); removal of breakable(). (DLR)
|
||||
- Still more steps toward full wide/multibyte character support.
|
||||
|
|
45
src/chars.c
45
src/chars.c
|
@ -811,3 +811,48 @@ size_t mbstrnlen(const char *s, size_t maxlen)
|
|||
nstrnlen(s, maxlen);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef DISABLE_JUSTIFY
|
||||
/* This function is equivalent to strchr() for multibyte strings. */
|
||||
char *mbstrchr(const char *s, char *c)
|
||||
{
|
||||
assert(s != NULL && c != NULL);
|
||||
|
||||
#ifdef NANO_WIDE
|
||||
if (!ISSET(NO_UTF8)) {
|
||||
char *s_mb = charalloc(MB_CUR_MAX);
|
||||
const char *q = s;
|
||||
wchar_t ws, wc;
|
||||
int s_mb_len, c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
|
||||
|
||||
if (c_mb_len <= 0) {
|
||||
mbtowc(NULL, NULL, 0);
|
||||
wc = (unsigned char)*c;
|
||||
}
|
||||
|
||||
while (*s != '\0') {
|
||||
s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
|
||||
|
||||
if (mbtowc(&ws, s_mb, s_mb_len) <= 0) {
|
||||
mbtowc(NULL, NULL, 0);
|
||||
ws = (unsigned char)*s;
|
||||
}
|
||||
|
||||
if (ws == wc)
|
||||
break;
|
||||
|
||||
s += s_mb_len;
|
||||
q += s_mb_len;
|
||||
}
|
||||
|
||||
free(s_mb);
|
||||
|
||||
if (ws != wc)
|
||||
q = NULL;
|
||||
|
||||
return (char *)q;
|
||||
} else
|
||||
#endif
|
||||
return strchr(s, *c);
|
||||
}
|
||||
#endif
|
||||
|
|
98
src/nano.c
98
src/nano.c
|
@ -2382,68 +2382,95 @@ void justify_format(filestruct *paragraph, size_t skip)
|
|||
new_end = new_paragraph_data + skip;
|
||||
|
||||
while (*end != '\0') {
|
||||
int end_len;
|
||||
|
||||
/* If this character is blank, make sure that it's a space with
|
||||
* no blanks after it. */
|
||||
if (is_blank_char(*end)) {
|
||||
if (is_blank_mbchar(end)) {
|
||||
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||
|
||||
*new_end = ' ';
|
||||
new_end++;
|
||||
end++;
|
||||
end += end_len;
|
||||
|
||||
while (*end != '\0' && is_blank_char(*end)) {
|
||||
end++;
|
||||
shift++;
|
||||
while (*end != '\0' && is_blank_mbchar(end)) {
|
||||
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||
|
||||
end += end_len;
|
||||
shift += end_len;
|
||||
|
||||
#ifndef NANO_SMALL
|
||||
/* Keep track of the change in the current line. */
|
||||
if (mark_beginbuf == paragraph &&
|
||||
mark_beginx >= end - paragraph->data)
|
||||
mark_shift++;
|
||||
mark_shift += end_len;
|
||||
#endif
|
||||
}
|
||||
/* If this character is punctuation optionally followed by a
|
||||
* bracket and then followed by blanks, make sure there are no
|
||||
* more than two blanks after it, and make sure that the blanks
|
||||
* are spaces. */
|
||||
} else if (strchr(punct, *end) != NULL) {
|
||||
*new_end = *end;
|
||||
new_end++;
|
||||
end++;
|
||||
} else if (mbstrchr(punct, end) != NULL) {
|
||||
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||
|
||||
if (*end != '\0' && strchr(brackets, *end) != NULL) {
|
||||
while (end_len > 0) {
|
||||
*new_end = *end;
|
||||
new_end++;
|
||||
end++;
|
||||
end_len--;
|
||||
}
|
||||
|
||||
if (*end != '\0' && is_blank_char(*end)) {
|
||||
if (*end != '\0' && mbstrchr(brackets, end) != NULL) {
|
||||
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||
|
||||
while (end_len > 0) {
|
||||
*new_end = *end;
|
||||
new_end++;
|
||||
end++;
|
||||
end_len--;
|
||||
}
|
||||
}
|
||||
|
||||
if (*end != '\0' && is_blank_mbchar(end)) {
|
||||
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||
|
||||
*new_end = ' ';
|
||||
new_end++;
|
||||
end++;
|
||||
end += end_len;
|
||||
}
|
||||
|
||||
if (*end != '\0' && is_blank_char(*end)) {
|
||||
if (*end != '\0' && is_blank_mbchar(end)) {
|
||||
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||
|
||||
*new_end = ' ';
|
||||
new_end++;
|
||||
end++;
|
||||
end += end_len;
|
||||
}
|
||||
|
||||
while (*end != '\0' && is_blank_char(*end)) {
|
||||
end++;
|
||||
shift++;
|
||||
while (*end != '\0' && is_blank_mbchar(end)) {
|
||||
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||
|
||||
end += end_len;
|
||||
shift += end_len;
|
||||
|
||||
#ifndef NANO_SMALL
|
||||
/* Keep track of the change in the current line. */
|
||||
if (mark_beginbuf == paragraph &&
|
||||
mark_beginx >= end - paragraph->data)
|
||||
mark_shift++;
|
||||
/* Keep track of the change in the current line. */
|
||||
if (mark_beginbuf == paragraph &&
|
||||
mark_beginx >= end - paragraph->data)
|
||||
mark_shift += end_len;
|
||||
#endif
|
||||
}
|
||||
/* If this character is neither blank nor punctuation, leave it
|
||||
* alone. */
|
||||
} else {
|
||||
*new_end = *end;
|
||||
new_end++;
|
||||
end++;
|
||||
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||
|
||||
while (end_len > 0) {
|
||||
*new_end = *end;
|
||||
new_end++;
|
||||
end++;
|
||||
end_len--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2743,11 +2770,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
|
|||
* found with short enough display width. */
|
||||
ssize_t cur_loc = 0;
|
||||
/* Current index in line. */
|
||||
int line_len;
|
||||
|
||||
assert(line != NULL);
|
||||
|
||||
while (*line != '\0' && goal >= 0) {
|
||||
int line_len;
|
||||
size_t pos = 0;
|
||||
|
||||
line_len = parse_mbchar(line, NULL, NULL, &pos);
|
||||
|
@ -2770,7 +2797,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
|
|||
bool found_blank = FALSE;
|
||||
|
||||
while (*line != '\0') {
|
||||
int line_len = parse_mbchar(line, NULL, NULL, NULL);
|
||||
line_len = parse_mbchar(line, NULL, NULL, NULL);
|
||||
|
||||
if (is_blank_mbchar(line)) {
|
||||
if (!found_blank)
|
||||
|
@ -2786,11 +2813,18 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
|
|||
}
|
||||
}
|
||||
|
||||
/* Perhaps the character after blank_loc is a blank. But because
|
||||
* of justify_format(), there can be only two adjacent. */
|
||||
if (*(line - cur_loc + blank_loc + 1) == ' ' ||
|
||||
*(line - cur_loc + blank_loc + 1) == '\0')
|
||||
blank_loc++;
|
||||
/* Move to the last blank after blank_loc, if there is one. */
|
||||
line -= cur_loc;
|
||||
line += blank_loc;
|
||||
line_len = parse_mbchar(line, NULL, NULL, NULL);
|
||||
line += line_len;
|
||||
|
||||
while (*line != '\0' && is_blank_mbchar(line)) {
|
||||
line_len = parse_mbchar(line, NULL, NULL, NULL);
|
||||
|
||||
line += line_len;
|
||||
blank_loc += line_len;
|
||||
}
|
||||
|
||||
return blank_loc;
|
||||
}
|
||||
|
|
|
@ -207,6 +207,9 @@ size_t mbstrlen(const char *s);
|
|||
size_t nstrnlen(const char *s, size_t maxlen);
|
||||
#endif
|
||||
size_t mbstrnlen(const char *s, size_t maxlen);
|
||||
#ifndef DISABLE_JUSTIFY
|
||||
char *mbstrchr(const char *s, char *c);
|
||||
#endif
|
||||
|
||||
/* Public functions in color.c. */
|
||||
#ifdef ENABLE_COLOR
|
||||
|
|
Loading…
Reference in New Issue