new feature: add the option --wordchars, to set extra word characters

This allows the user to specify which other characters, besides the
default alphanumeric ones, should be considered as part of a word, so
that word operations like Ctrl+Left and Ctrl+Right will pass them by.

Using this option overrides the option --wordbounds.

This fulfills https://savannah.gnu.org/bugs/?47283.
master
Benno Schulenberg 2016-06-30 18:02:45 +02:00
parent d88423eaae
commit 6f12992cea
10 changed files with 74 additions and 14 deletions

View File

@ -148,9 +148,14 @@ keystroke instead of 25. Note that \fB\-c\fP overrides this.
Show the current version number and exit.
.TP
.BR \-W ", " \-\-wordbounds
Detect word boundaries more accurately by treating punctuation
Detect word boundaries differently by treating punctuation
characters as part of a word.
.TP
.BR "\-X ""\fIcharacters\fB""" ", " "\-\-wordchars=""" \fIcharacters """
Specify which other characters (besides the normal alphanumeric ones)
should be considered as part of a word. This overrides option
\fB\-W\fR (\fB\-\-wordbounds\fR).
.TP
.BR \-Y\ \fIname\fR ", " \-\-syntax= \fIname
Specify the name of the syntax highlighting to use from among the ones
defined in the \fInanorc\fP files.

View File

@ -253,8 +253,13 @@ Set the two characters used to indicate the presence of tabs and
spaces. They must be single-column characters.
.TP
.B set wordbounds
Detect word boundaries more accurately by treating punctuation
Detect word boundaries differently by treating punctuation
characters as parts of words.
.TP
.B set wordchars \fIstring\fP
Specify which other characters (besides the normal alphanumeric ones)
should be considered as parts of words. This overrides the option
\fBwordbounds\fR.
.SH SYNTAX HIGHLIGHTING
Coloring the different syntactic elements of a file

View File

@ -178,10 +178,15 @@
## The default otherwise:
# set whitespace ">."
## Detect word boundaries more accurately by treating punctuation
## Detect word boundaries differently by treating punctuation
## characters as parts of words.
# set wordbounds
## The characters (besides alphanumeric ones) that should be considered
## as parts of words. This option does not have a default value. When
## set, it overrides option 'set wordbounds'.
# set wordchars "<_>."
## Paint the interface elements of nano.
## This is an example; by default there are no colors.

View File

@ -231,9 +231,15 @@ Show the current version number and exit.
@item -W
@itemx --wordbounds
Detect word boundaries more accurately by treating punctuation
Detect word boundaries differently by treating punctuation
characters as parts of words.
@item -X "@var{characters}"
@itemx --wordchars="@var{characters}"
Specify which other characters (besides the normal alphanumeric ones)
should be considered as parts of words. This overrides option
@option{-W} (@option{--wordbounds}).
@item -Y @var{name}
@itemx --syntax=@var{name}
Specify a specific syntax from the nanorc files to use for highlighting.
@ -831,9 +837,14 @@ spaces. They must be single-column characters. The default pair
for a UTF-8 locale is @t{"»·"}, and for other locales @t{">."}.
@item set wordbounds
Detect word boundaries more accurately by treating punctuation
Detect word boundaries differently by treating punctuation
characters as part of a word.
@item set wordchars "@var{string}"
Specify which other characters (besides the normal alphanumeric ones)
should be considered as parts of words. This overrides the option
@code{wordbounds}.
@end table
@node Syntax Highlighting

View File

@ -183,15 +183,26 @@ bool is_punct_mbchar(const char *c)
return ispunct((unsigned char)*c);
}
/* Return TRUE for a multibyte character found in a word (currently only
* an alphanumeric or punctuation character, and only the latter if
* allow_punct is TRUE) and FALSE otherwise. */
/* Return TRUE when the given multibyte character c is a word-forming
* character (that is: alphanumeric, or specified in wordchars, or
* punctuation when allow_punct is TRUE), and FALSE otherwise. */
bool is_word_mbchar(const char *c, bool allow_punct)
{
assert(c != NULL);
return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) :
FALSE);
if (is_alnum_mbchar(c))
return TRUE;
if (word_chars != NULL && *word_chars != '\0') {
char *symbol = charalloc(MB_CUR_MAX + 1);
int symlen = parse_mbchar(c, symbol, NULL);
symbol[symlen] = '\0';
return (strstr(word_chars, symbol) != NULL);
}
return (allow_punct && is_punct_mbchar(c));
}
/* Return the visible representation of control character c. */

View File

@ -124,6 +124,9 @@ size_t quotelen;
#endif
#endif
char *word_chars = NULL;
/* Nonalphanumeric characters that also form words. */
bool nodelay_mode = FALSE;
/* Are we checking for a cancel wile doing something? */
@ -1669,6 +1672,7 @@ void thanks_for_all_the_fish(void)
delwin(edit);
delwin(bottomwin);
free(word_chars);
#ifndef DISABLE_JUSTIFY
free(quotestr);
#ifdef HAVE_REGEX_H

View File

@ -860,6 +860,8 @@ void usage(void)
#ifndef NANO_TINY
print_opt("-W", "--wordbounds",
N_("Detect word boundaries more accurately"));
print_opt("-X", "--wordchars",
N_("Which other characters are word parts"));
#endif
#ifndef DISABLE_COLOR
if (!ISSET(RESTRICTED))
@ -1995,6 +1997,7 @@ int main(int argc, char **argv)
{"smooth", 0, NULL, 'S'},
{"quickblank", 0, NULL, 'U'},
{"wordbounds", 0, NULL, 'W'},
{"wordchars", 1, NULL, 'X'},
{"autoindent", 0, NULL, 'i'},
{"cut", 0, NULL, 'k'},
{"unix", 0, NULL, 'u'},
@ -2040,11 +2043,11 @@ int main(int argc, char **argv)
while ((optchr =
#ifdef HAVE_GETOPT_LONG
getopt_long(argc, argv,
"ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$",
"ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$",
long_options, NULL)
#else
getopt(argc, argv,
"ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$")
"ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$")
#endif
) != -1) {
switch (optchr) {
@ -2146,6 +2149,9 @@ int main(int argc, char **argv)
case 'W':
SET(WORD_BOUNDS);
break;
case 'X':
word_chars = mallocstrcpy(word_chars, optarg);
break;
#endif
#ifndef DISABLE_COLOR
case 'Y':
@ -2279,6 +2285,7 @@ int main(int argc, char **argv)
#endif
#ifndef NANO_TINY
char *backup_dir_cpy = backup_dir;
char *word_chars_cpy = word_chars;
#endif
#ifndef DISABLE_JUSTIFY
char *quotestr_cpy = quotestr;
@ -2297,6 +2304,7 @@ int main(int argc, char **argv)
#endif
#ifndef NANO_TINY
backup_dir = NULL;
word_chars = NULL;
#endif
#ifndef DISABLE_JUSTIFY
quotestr = NULL;
@ -2327,6 +2335,10 @@ int main(int argc, char **argv)
free(backup_dir);
backup_dir = backup_dir_cpy;
}
if (word_chars_cpy != NULL) {
free(word_chars);
word_chars = word_chars_cpy;
}
#endif
#ifndef DISABLE_JUSTIFY
if (quotestr_cpy != NULL) {

View File

@ -91,7 +91,10 @@ extern size_t quotelen;
#endif
#endif /* !DISABLE_JUSTIFY */
extern char *word_chars;
extern bool nodelay_mode;
extern char *answer;
extern ssize_t tabsize;

View File

@ -102,6 +102,7 @@ static const rcoption rcopts[] = {
{"unix", MAKE_IT_UNIX},
{"whitespace", 0},
{"wordbounds", WORD_BOUNDS},
{"wordchars", 0},
#endif
#ifndef DISABLE_COLOR
{"titlecolor", 0},
@ -1177,6 +1178,9 @@ void parse_rcfile(FILE *rcstream
if (strcasecmp(rcopts[i].name, "backupdir") == 0)
backup_dir = option;
else
if (strcasecmp(rcopts[i].name, "wordchars") == 0)
word_chars = option;
else
#endif
#ifndef DISABLE_SPELLER
if (strcasecmp(rcopts[i].name, "speller") == 0)

View File

@ -294,8 +294,8 @@ bool is_separate_word(size_t position, size_t length, const char *buf)
* word isn't a non-punctuation "word" character, and if we're at
* the end of the line or the character after the word isn't a
* non-punctuation "word" character, we have a whole word. */
retval = (position == 0 || !is_word_mbchar(before, FALSE)) &&
(word_end == strlen(buf) || !is_word_mbchar(after, FALSE));
retval = (position == 0 || !is_alnum_mbchar(before)) &&
(word_end == strlen(buf) || !is_alnum_mbchar(after));
free(before);
free(after);