make the rest of the justify code support multibyte characters
git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2371 35c25a1d-7b9e-4130-9fde-d3aeb78583b8master
parent
666644efbc
commit
38156d4491
12
ChangeLog
12
ChangeLog
|
@ -175,12 +175,12 @@ CVS code -
|
||||||
paragraph-searching utility functions when possible instead of
|
paragraph-searching utility functions when possible instead of
|
||||||
duplicating code. Also overhaul the justify code to make it
|
duplicating code. Also overhaul the justify code to make it
|
||||||
leave the right number of spaces at the ends of the lines of a
|
leave the right number of spaces at the ends of the lines of a
|
||||||
paragraph, to make it (partially) support multibyte
|
paragraph, to make it support multibyte characters, and to
|
||||||
characters, and to make it simpler. Also, don't remove a
|
make it simpler. Also, don't remove a space after a duplicate
|
||||||
space after a duplicate character in punct anymore, as it
|
character in punct anymore, as it doesn't really make us more
|
||||||
doesn't really make us more compatible with Pico. New
|
compatible with Pico. New functions mbstrchr(),
|
||||||
functions do_para_begin_void() and do_para_end_void(); changes
|
do_para_begin_void(), and do_para_end_void(); changes to
|
||||||
to justify_format(), do_para_begin(), inpar(), do_para_end(),
|
justify_format(), do_para_begin(), inpar(), do_para_end(),
|
||||||
break_line(), do_para_search() (renamed find_paragraph()), and
|
break_line(), do_para_search() (renamed find_paragraph()), and
|
||||||
do_justify(); removal of breakable(). (DLR)
|
do_justify(); removal of breakable(). (DLR)
|
||||||
- Still more steps toward full wide/multibyte character support.
|
- Still more steps toward full wide/multibyte character support.
|
||||||
|
|
45
src/chars.c
45
src/chars.c
|
@ -811,3 +811,48 @@ size_t mbstrnlen(const char *s, size_t maxlen)
|
||||||
nstrnlen(s, maxlen);
|
nstrnlen(s, maxlen);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef DISABLE_JUSTIFY
|
||||||
|
/* This function is equivalent to strchr() for multibyte strings. */
|
||||||
|
char *mbstrchr(const char *s, char *c)
|
||||||
|
{
|
||||||
|
assert(s != NULL && c != NULL);
|
||||||
|
|
||||||
|
#ifdef NANO_WIDE
|
||||||
|
if (!ISSET(NO_UTF8)) {
|
||||||
|
char *s_mb = charalloc(MB_CUR_MAX);
|
||||||
|
const char *q = s;
|
||||||
|
wchar_t ws, wc;
|
||||||
|
int s_mb_len, c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
|
||||||
|
|
||||||
|
if (c_mb_len <= 0) {
|
||||||
|
mbtowc(NULL, NULL, 0);
|
||||||
|
wc = (unsigned char)*c;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (*s != '\0') {
|
||||||
|
s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
|
||||||
|
|
||||||
|
if (mbtowc(&ws, s_mb, s_mb_len) <= 0) {
|
||||||
|
mbtowc(NULL, NULL, 0);
|
||||||
|
ws = (unsigned char)*s;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ws == wc)
|
||||||
|
break;
|
||||||
|
|
||||||
|
s += s_mb_len;
|
||||||
|
q += s_mb_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(s_mb);
|
||||||
|
|
||||||
|
if (ws != wc)
|
||||||
|
q = NULL;
|
||||||
|
|
||||||
|
return (char *)q;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
return strchr(s, *c);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
80
src/nano.c
80
src/nano.c
|
@ -2382,68 +2382,95 @@ void justify_format(filestruct *paragraph, size_t skip)
|
||||||
new_end = new_paragraph_data + skip;
|
new_end = new_paragraph_data + skip;
|
||||||
|
|
||||||
while (*end != '\0') {
|
while (*end != '\0') {
|
||||||
|
int end_len;
|
||||||
|
|
||||||
/* If this character is blank, make sure that it's a space with
|
/* If this character is blank, make sure that it's a space with
|
||||||
* no blanks after it. */
|
* no blanks after it. */
|
||||||
if (is_blank_char(*end)) {
|
if (is_blank_mbchar(end)) {
|
||||||
|
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||||
|
|
||||||
*new_end = ' ';
|
*new_end = ' ';
|
||||||
new_end++;
|
new_end++;
|
||||||
end++;
|
end += end_len;
|
||||||
|
|
||||||
while (*end != '\0' && is_blank_char(*end)) {
|
while (*end != '\0' && is_blank_mbchar(end)) {
|
||||||
end++;
|
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||||
shift++;
|
|
||||||
|
end += end_len;
|
||||||
|
shift += end_len;
|
||||||
|
|
||||||
#ifndef NANO_SMALL
|
#ifndef NANO_SMALL
|
||||||
/* Keep track of the change in the current line. */
|
/* Keep track of the change in the current line. */
|
||||||
if (mark_beginbuf == paragraph &&
|
if (mark_beginbuf == paragraph &&
|
||||||
mark_beginx >= end - paragraph->data)
|
mark_beginx >= end - paragraph->data)
|
||||||
mark_shift++;
|
mark_shift += end_len;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
/* If this character is punctuation optionally followed by a
|
/* If this character is punctuation optionally followed by a
|
||||||
* bracket and then followed by blanks, make sure there are no
|
* bracket and then followed by blanks, make sure there are no
|
||||||
* more than two blanks after it, and make sure that the blanks
|
* more than two blanks after it, and make sure that the blanks
|
||||||
* are spaces. */
|
* are spaces. */
|
||||||
} else if (strchr(punct, *end) != NULL) {
|
} else if (mbstrchr(punct, end) != NULL) {
|
||||||
|
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||||
|
|
||||||
|
while (end_len > 0) {
|
||||||
*new_end = *end;
|
*new_end = *end;
|
||||||
new_end++;
|
new_end++;
|
||||||
end++;
|
end++;
|
||||||
|
end_len--;
|
||||||
|
}
|
||||||
|
|
||||||
if (*end != '\0' && strchr(brackets, *end) != NULL) {
|
if (*end != '\0' && mbstrchr(brackets, end) != NULL) {
|
||||||
|
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||||
|
|
||||||
|
while (end_len > 0) {
|
||||||
*new_end = *end;
|
*new_end = *end;
|
||||||
new_end++;
|
new_end++;
|
||||||
end++;
|
end++;
|
||||||
|
end_len--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*end != '\0' && is_blank_char(*end)) {
|
if (*end != '\0' && is_blank_mbchar(end)) {
|
||||||
|
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||||
|
|
||||||
*new_end = ' ';
|
*new_end = ' ';
|
||||||
new_end++;
|
new_end++;
|
||||||
end++;
|
end += end_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*end != '\0' && is_blank_char(*end)) {
|
if (*end != '\0' && is_blank_mbchar(end)) {
|
||||||
|
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||||
|
|
||||||
*new_end = ' ';
|
*new_end = ' ';
|
||||||
new_end++;
|
new_end++;
|
||||||
end++;
|
end += end_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (*end != '\0' && is_blank_char(*end)) {
|
while (*end != '\0' && is_blank_mbchar(end)) {
|
||||||
end++;
|
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||||
shift++;
|
|
||||||
|
end += end_len;
|
||||||
|
shift += end_len;
|
||||||
|
|
||||||
#ifndef NANO_SMALL
|
#ifndef NANO_SMALL
|
||||||
/* Keep track of the change in the current line. */
|
/* Keep track of the change in the current line. */
|
||||||
if (mark_beginbuf == paragraph &&
|
if (mark_beginbuf == paragraph &&
|
||||||
mark_beginx >= end - paragraph->data)
|
mark_beginx >= end - paragraph->data)
|
||||||
mark_shift++;
|
mark_shift += end_len;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
/* If this character is neither blank nor punctuation, leave it
|
/* If this character is neither blank nor punctuation, leave it
|
||||||
* alone. */
|
* alone. */
|
||||||
} else {
|
} else {
|
||||||
|
end_len = parse_mbchar(end, NULL, NULL, NULL);
|
||||||
|
|
||||||
|
while (end_len > 0) {
|
||||||
*new_end = *end;
|
*new_end = *end;
|
||||||
new_end++;
|
new_end++;
|
||||||
end++;
|
end++;
|
||||||
|
end_len--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2743,11 +2770,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
|
||||||
* found with short enough display width. */
|
* found with short enough display width. */
|
||||||
ssize_t cur_loc = 0;
|
ssize_t cur_loc = 0;
|
||||||
/* Current index in line. */
|
/* Current index in line. */
|
||||||
|
int line_len;
|
||||||
|
|
||||||
assert(line != NULL);
|
assert(line != NULL);
|
||||||
|
|
||||||
while (*line != '\0' && goal >= 0) {
|
while (*line != '\0' && goal >= 0) {
|
||||||
int line_len;
|
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
|
|
||||||
line_len = parse_mbchar(line, NULL, NULL, &pos);
|
line_len = parse_mbchar(line, NULL, NULL, &pos);
|
||||||
|
@ -2770,7 +2797,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
|
||||||
bool found_blank = FALSE;
|
bool found_blank = FALSE;
|
||||||
|
|
||||||
while (*line != '\0') {
|
while (*line != '\0') {
|
||||||
int line_len = parse_mbchar(line, NULL, NULL, NULL);
|
line_len = parse_mbchar(line, NULL, NULL, NULL);
|
||||||
|
|
||||||
if (is_blank_mbchar(line)) {
|
if (is_blank_mbchar(line)) {
|
||||||
if (!found_blank)
|
if (!found_blank)
|
||||||
|
@ -2786,11 +2813,18 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Perhaps the character after blank_loc is a blank. But because
|
/* Move to the last blank after blank_loc, if there is one. */
|
||||||
* of justify_format(), there can be only two adjacent. */
|
line -= cur_loc;
|
||||||
if (*(line - cur_loc + blank_loc + 1) == ' ' ||
|
line += blank_loc;
|
||||||
*(line - cur_loc + blank_loc + 1) == '\0')
|
line_len = parse_mbchar(line, NULL, NULL, NULL);
|
||||||
blank_loc++;
|
line += line_len;
|
||||||
|
|
||||||
|
while (*line != '\0' && is_blank_mbchar(line)) {
|
||||||
|
line_len = parse_mbchar(line, NULL, NULL, NULL);
|
||||||
|
|
||||||
|
line += line_len;
|
||||||
|
blank_loc += line_len;
|
||||||
|
}
|
||||||
|
|
||||||
return blank_loc;
|
return blank_loc;
|
||||||
}
|
}
|
||||||
|
|
|
@ -207,6 +207,9 @@ size_t mbstrlen(const char *s);
|
||||||
size_t nstrnlen(const char *s, size_t maxlen);
|
size_t nstrnlen(const char *s, size_t maxlen);
|
||||||
#endif
|
#endif
|
||||||
size_t mbstrnlen(const char *s, size_t maxlen);
|
size_t mbstrnlen(const char *s, size_t maxlen);
|
||||||
|
#ifndef DISABLE_JUSTIFY
|
||||||
|
char *mbstrchr(const char *s, char *c);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Public functions in color.c. */
|
/* Public functions in color.c. */
|
||||||
#ifdef ENABLE_COLOR
|
#ifdef ENABLE_COLOR
|
||||||
|
|
Loading…
Reference in New Issue