new feature: add the option --wordchars, to set extra word characters

This allows the user to specify which other characters, besides the default alphanumeric ones, should be considered as part of a word, so that word operations like Ctrl+Left and Ctrl+Right will pass them by. Using this option overrides the option --wordbounds. This fulfills https://savannah.gnu.org/bugs/?47283.
2016-06-30 18:02:45 +02:00 · 2016-06-30 18:02:45 +02:00 · 6f12992cea
parent d88423eaae
commit 6f12992cea
10 changed files with 74 additions and 14 deletions
--- a/doc/man/nano.1
+++ b/doc/man/nano.1
@ -148,9 +148,14 @@ keystroke instead of 25.  Note that \fB\-c\fP overrides this.
 Show the current version number and exit.
 .TP
 .BR \-W ", " \-\-wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as part of a word.
 .TP
 .BR "\-X ""\fIcharacters\fB""" ", " "\-\-wordchars=""" \fIcharacters """
 Specify which other characters (besides the normal alphanumeric ones)
 should be considered as part of a word.  This overrides option
 \fB\-W\fR (\fB\-\-wordbounds\fR).
 .TP
 .BR \-Y\ \fIname\fR ", " \-\-syntax= \fIname
 Specify the name of the syntax highlighting to use from among the ones
 defined in the \fInanorc\fP files.
--- a/doc/man/nanorc.5
+++ b/doc/man/nanorc.5
@ -253,8 +253,13 @@ Set the two characters used to indicate the presence of tabs and
 spaces.  They must be single-column characters.
 .TP
 .B set wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as parts of words.
 .TP
 .B set wordchars \fIstring\fP
 Specify which other characters (besides the normal alphanumeric ones)
 should be considered as parts of words.  This overrides the option
 \fBwordbounds\fR.
 .SH SYNTAX HIGHLIGHTING
 Coloring the different syntactic elements of a file
--- a/doc/nanorc.sample.in
+++ b/doc/nanorc.sample.in
@ -178,10 +178,15 @@
 ## The default otherwise:
 # set whitespace ">."
-## Detect word boundaries more accurately by treating punctuation
+## Detect word boundaries differently by treating punctuation
 ## characters as parts of words.
 # set wordbounds
 ## The characters (besides alphanumeric ones) that should be considered
 ## as parts of words.  This option does not have a default value.  When
 ## set, it overrides option 'set wordbounds'.
 # set wordchars "<_>."
 ## Paint the interface elements of nano.
 ## This is an example; by default there are no colors.
--- a/doc/texinfo/nano.texi
+++ b/doc/texinfo/nano.texi
@ -231,9 +231,15 @@ Show the current version number and exit.
@item -W
@itemx --wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as parts of words.
@item -X "@var{characters}"
@itemx --wordchars="@var{characters}"
 Specify which other characters (besides the normal alphanumeric ones)
 should be considered as parts of words.  This overrides option
@option{-W} (@option{--wordbounds}).
@item -Y @var{name}
@itemx --syntax=@var{name}
 Specify a specific syntax from the nanorc files to use for highlighting.
@ -831,9 +837,14 @@ spaces.  They must be single-column characters.  The default pair
 for a UTF-8 locale is @t{"»·"}, and for other locales @t{">."}.
@item set wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as part of a word.
@item set wordchars "@var{string}"
 Specify which other characters (besides the normal alphanumeric ones)
 should be considered as parts of words.  This overrides the option
@code{wordbounds}.
@end table
@node Syntax Highlighting
--- a/src/chars.c
+++ b/src/chars.c
@ -183,15 +183,26 @@ bool is_punct_mbchar(const char *c)
 	return ispunct((unsigned char)*c);
 }
-/* Return TRUE for a multibyte character found in a word (currently only
+/* Return TRUE when the given multibyte character c is a word-forming
- * an alphanumeric or punctuation character, and only the latter if
+ * character (that is: alphanumeric, or specified in wordchars, or
- * allow_punct is TRUE) and FALSE otherwise. */
+ * punctuation when allow_punct is TRUE), and FALSE otherwise. */
 bool is_word_mbchar(const char *c, bool allow_punct)
 {
    assert(c != NULL);
-    return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) :
+    if (is_alnum_mbchar(c))
-	FALSE);
+	return TRUE;
    if (word_chars != NULL && *word_chars != '\0') {
 	char *symbol = charalloc(MB_CUR_MAX + 1);
 	int symlen = parse_mbchar(c, symbol, NULL);
 	symbol[symlen] = '\0';
 	return (strstr(word_chars, symbol) != NULL);
    }
    return (allow_punct && is_punct_mbchar(c));
 }
 /* Return the visible representation of control character c. */
--- a/src/global.c
+++ b/src/global.c
@ -124,6 +124,9 @@ size_t quotelen;
 #endif
 #endif
 char *word_chars = NULL;
 	/* Nonalphanumeric characters that also form words. */
 bool nodelay_mode = FALSE;
 	/* Are we checking for a cancel wile doing something? */
@ -1669,6 +1672,7 @@ void thanks_for_all_the_fish(void)
    delwin(edit);
    delwin(bottomwin);
    free(word_chars);
 #ifndef DISABLE_JUSTIFY
    free(quotestr);
 #ifdef HAVE_REGEX_H
--- a/src/nano.c
+++ b/src/nano.c
@ -860,6 +860,8 @@ void usage(void)
 #ifndef NANO_TINY
    print_opt("-W", "--wordbounds",
 	N_("Detect word boundaries more accurately"));
    print_opt("-X", "--wordchars",
 	N_("Which other characters are word parts"));
 #endif
 #ifndef DISABLE_COLOR
    if (!ISSET(RESTRICTED))
@ -1995,6 +1997,7 @@ int main(int argc, char **argv)
 	{"smooth", 0, NULL, 'S'},
 	{"quickblank", 0, NULL, 'U'},
 	{"wordbounds", 0, NULL, 'W'},
 	{"wordchars", 1, NULL, 'X'},
 	{"autoindent", 0, NULL, 'i'},
 	{"cut", 0, NULL, 'k'},
 	{"unix", 0, NULL, 'u'},
@ -2040,11 +2043,11 @@ int main(int argc, char **argv)
    while ((optchr =
 #ifdef HAVE_GETOPT_LONG
 	getopt_long(argc, argv,
-		"ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$",
+		"ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$",
 		long_options, NULL)
 #else
 	getopt(argc, argv,
-		"ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$")
+		"ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$")
 #endif
 		) != -1) {
 	switch (optchr) {
@ -2146,6 +2149,9 @@ int main(int argc, char **argv)
 	    case 'W':
 		SET(WORD_BOUNDS);
 		break;
 	    case 'X':
 		word_chars = mallocstrcpy(word_chars, optarg);
 		break;
 #endif
 #ifndef DISABLE_COLOR
 	    case 'Y':
@ -2279,6 +2285,7 @@ int main(int argc, char **argv)
 #endif
 #ifndef NANO_TINY
 	char *backup_dir_cpy = backup_dir;
 	char *word_chars_cpy = word_chars;
 #endif
 #ifndef DISABLE_JUSTIFY
 	char *quotestr_cpy = quotestr;
@ -2297,6 +2304,7 @@ int main(int argc, char **argv)
 #endif
 #ifndef NANO_TINY
 	backup_dir = NULL;
 	word_chars = NULL;
 #endif
 #ifndef DISABLE_JUSTIFY
 	quotestr = NULL;
@ -2327,6 +2335,10 @@ int main(int argc, char **argv)
 	    free(backup_dir);
 	    backup_dir = backup_dir_cpy;
 	}
 	if (word_chars_cpy != NULL) {
 	    free(word_chars);
 	    word_chars = word_chars_cpy;
 	}
 #endif
 #ifndef DISABLE_JUSTIFY
 	if (quotestr_cpy != NULL) {
--- a/src/proto.h
+++ b/src/proto.h
@ -91,7 +91,10 @@ extern size_t quotelen;
 #endif
 #endif /* !DISABLE_JUSTIFY */
 extern char *word_chars;
 extern bool nodelay_mode;
 extern char *answer;
 extern ssize_t tabsize;
--- a/src/rcfile.c
+++ b/src/rcfile.c
@ -102,6 +102,7 @@ static const rcoption rcopts[] = {
    {"unix", MAKE_IT_UNIX},
    {"whitespace", 0},
    {"wordbounds", WORD_BOUNDS},
    {"wordchars", 0},
 #endif
 #ifndef DISABLE_COLOR
    {"titlecolor", 0},
@ -1177,6 +1178,9 @@ void parse_rcfile(FILE *rcstream
 	if (strcasecmp(rcopts[i].name, "backupdir") == 0)
 	    backup_dir = option;
 	else
 	if (strcasecmp(rcopts[i].name, "wordchars") == 0)
 	    word_chars = option;
 	else
 #endif
 #ifndef DISABLE_SPELLER
 	if (strcasecmp(rcopts[i].name, "speller") == 0)
--- a/src/utils.c
+++ b/src/utils.c
@ -294,8 +294,8 @@ bool is_separate_word(size_t position, size_t length, const char *buf)
     * word isn't a non-punctuation "word" character, and if we're at
     * the end of the line or the character after the word isn't a
     * non-punctuation "word" character, we have a whole word. */
-    retval = (position == 0 || !is_word_mbchar(before, FALSE)) &&
+    retval = (position == 0 || !is_alnum_mbchar(before)) &&
-		(word_end == strlen(buf) || !is_word_mbchar(after, FALSE));
+		(word_end == strlen(buf) || !is_alnum_mbchar(after));
    free(before);
    free(after);