new feature: add the option --wordchars, to set extra word characters

This allows the user to specify which other characters, besides the default alphanumeric ones, should be considered as part of a word, so that word operations like Ctrl+Left and Ctrl+Right will pass them by. Using this option overrides the option --wordbounds. This fulfills https://savannah.gnu.org/bugs/?47283.
2016-06-30 18:02:45 +02:00 · 2016-06-30 18:02:45 +02:00 · 6f12992cea
parent d88423eaae
commit 6f12992cea
10 changed files with 74 additions and 14 deletions
--- a/doc/man/nano.1
+++ b/doc/man/nano.1
@ -148,9 +148,14 @@ keystroke instead of 25.  Note that \fB\-c\fP overrides this.
 Show the current version number and exit.
 .TP
 .BR \-W ", " \-\-wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as part of a word.
 .TP
+.BR "\-X ""\fIcharacters\fB""" ", " "\-\-wordchars=""" \fIcharacters """
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as part of a word.  This overrides option
+\fB\-W\fR (\fB\-\-wordbounds\fR).
+.TP
 .BR \-Y\ \fIname\fR ", " \-\-syntax= \fIname
 Specify the name of the syntax highlighting to use from among the ones
 defined in the \fInanorc\fP files.
--- a/doc/man/nanorc.5
+++ b/doc/man/nanorc.5
@ -253,8 +253,13 @@ Set the two characters used to indicate the presence of tabs and
 spaces.  They must be single-column characters.
 .TP
 .B set wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as parts of words.
+.TP
+.B set wordchars \fIstring\fP
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as parts of words.  This overrides the option
+\fBwordbounds\fR.

 .SH SYNTAX HIGHLIGHTING
 Coloring the different syntactic elements of a file
--- a/doc/nanorc.sample.in
+++ b/doc/nanorc.sample.in
@ -178,10 +178,15 @@
 ## The default otherwise:
 # set whitespace ">."

-## Detect word boundaries more accurately by treating punctuation
+## Detect word boundaries differently by treating punctuation
 ## characters as parts of words.
 # set wordbounds

+## The characters (besides alphanumeric ones) that should be considered
+## as parts of words.  This option does not have a default value.  When
+## set, it overrides option 'set wordbounds'.
+# set wordchars "<_>."
+

 ## Paint the interface elements of nano.
 ## This is an example; by default there are no colors.
--- a/doc/texinfo/nano.texi
+++ b/doc/texinfo/nano.texi
@ -231,9 +231,15 @@ Show the current version number and exit.

@item -W
@itemx --wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as parts of words.

+@item -X "@var{characters}"
+@itemx --wordchars="@var{characters}"
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as parts of words.  This overrides option
+@option{-W} (@option{--wordbounds}).
+
@item -Y @var{name}
@itemx --syntax=@var{name}
 Specify a specific syntax from the nanorc files to use for highlighting.
@ -831,9 +837,14 @@ spaces.  They must be single-column characters.  The default pair
 for a UTF-8 locale is @t{"»·"}, and for other locales @t{">."}.

@item set wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as part of a word.

+@item set wordchars "@var{string}"
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as parts of words.  This overrides the option
+@code{wordbounds}.
+
@end table

@node Syntax Highlighting
--- a/src/chars.c
+++ b/src/chars.c
@ -183,15 +183,26 @@ bool is_punct_mbchar(const char *c)
 	return ispunct((unsigned char)*c);
 }

-/* Return TRUE for a multibyte character found in a word (currently only
- * an alphanumeric or punctuation character, and only the latter if
- * allow_punct is TRUE) and FALSE otherwise. */
+/* Return TRUE when the given multibyte character c is a word-forming
+ * character (that is: alphanumeric, or specified in wordchars, or
+ * punctuation when allow_punct is TRUE), and FALSE otherwise. */
 bool is_word_mbchar(const char *c, bool allow_punct)
 {
    assert(c != NULL);

-    return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) :
-	FALSE);
+    if (is_alnum_mbchar(c))
+	return TRUE;
+
+    if (word_chars != NULL && *word_chars != '\0') {
+	char *symbol = charalloc(MB_CUR_MAX + 1);
+	int symlen = parse_mbchar(c, symbol, NULL);
+
+	symbol[symlen] = '\0';
+
+	return (strstr(word_chars, symbol) != NULL);
+    }
+
+    return (allow_punct && is_punct_mbchar(c));
 }

 /* Return the visible representation of control character c. */
--- a/src/global.c
+++ b/src/global.c
@ -124,6 +124,9 @@ size_t quotelen;
 #endif
 #endif

+char *word_chars = NULL;
+	/* Nonalphanumeric characters that also form words. */
+
 bool nodelay_mode = FALSE;
 	/* Are we checking for a cancel wile doing something? */

@ -1669,6 +1672,7 @@ void thanks_for_all_the_fish(void)
    delwin(edit);
    delwin(bottomwin);

+    free(word_chars);
 #ifndef DISABLE_JUSTIFY
    free(quotestr);
 #ifdef HAVE_REGEX_H
--- a/src/nano.c
+++ b/src/nano.c
@ -860,6 +860,8 @@ void usage(void)
 #ifndef NANO_TINY
    print_opt("-W", "--wordbounds",
 	N_("Detect word boundaries more accurately"));
+    print_opt("-X", "--wordchars",
+	N_("Which other characters are word parts"));
 #endif
 #ifndef DISABLE_COLOR
    if (!ISSET(RESTRICTED))
@ -1995,6 +1997,7 @@ int main(int argc, char **argv)
 	{"smooth", 0, NULL, 'S'},
 	{"quickblank", 0, NULL, 'U'},
 	{"wordbounds", 0, NULL, 'W'},
+	{"wordchars", 1, NULL, 'X'},
 	{"autoindent", 0, NULL, 'i'},
 	{"cut", 0, NULL, 'k'},
 	{"unix", 0, NULL, 'u'},
@ -2040,11 +2043,11 @@ int main(int argc, char **argv)
    while ((optchr =
 #ifdef HAVE_GETOPT_LONG
 	getopt_long(argc, argv,
-		"ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$",
+		"ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$",
 		long_options, NULL)
 #else
 	getopt(argc, argv,
-		"ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$")
+		"ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$")
 #endif
 		) != -1) {
 	switch (optchr) {
@ -2146,6 +2149,9 @@ int main(int argc, char **argv)
 	    case 'W':
 		SET(WORD_BOUNDS);
 		break;
+	    case 'X':
+		word_chars = mallocstrcpy(word_chars, optarg);
+		break;
 #endif
 #ifndef DISABLE_COLOR
 	    case 'Y':
@ -2279,6 +2285,7 @@ int main(int argc, char **argv)
 #endif
 #ifndef NANO_TINY
 	char *backup_dir_cpy = backup_dir;
+	char *word_chars_cpy = word_chars;
 #endif
 #ifndef DISABLE_JUSTIFY
 	char *quotestr_cpy = quotestr;
@ -2297,6 +2304,7 @@ int main(int argc, char **argv)
 #endif
 #ifndef NANO_TINY
 	backup_dir = NULL;
+	word_chars = NULL;
 #endif
 #ifndef DISABLE_JUSTIFY
 	quotestr = NULL;
@ -2327,6 +2335,10 @@ int main(int argc, char **argv)
 	    free(backup_dir);
 	    backup_dir = backup_dir_cpy;
 	}
+	if (word_chars_cpy != NULL) {
+	    free(word_chars);
+	    word_chars = word_chars_cpy;
+	}
 #endif
 #ifndef DISABLE_JUSTIFY
 	if (quotestr_cpy != NULL) {
--- a/src/proto.h
+++ b/src/proto.h
@ -91,7 +91,10 @@ extern size_t quotelen;
 #endif
 #endif /* !DISABLE_JUSTIFY */

+extern char *word_chars;
+
 extern bool nodelay_mode;
+
 extern char *answer;

 extern ssize_t tabsize;
--- a/src/rcfile.c
+++ b/src/rcfile.c
@ -102,6 +102,7 @@ static const rcoption rcopts[] = {
    {"unix", MAKE_IT_UNIX},
    {"whitespace", 0},
    {"wordbounds", WORD_BOUNDS},
+    {"wordchars", 0},
 #endif
 #ifndef DISABLE_COLOR
    {"titlecolor", 0},
@ -1177,6 +1178,9 @@ void parse_rcfile(FILE *rcstream
 	if (strcasecmp(rcopts[i].name, "backupdir") == 0)
 	    backup_dir = option;
 	else
+	if (strcasecmp(rcopts[i].name, "wordchars") == 0)
+	    word_chars = option;
+	else
 #endif
 #ifndef DISABLE_SPELLER
 	if (strcasecmp(rcopts[i].name, "speller") == 0)
--- a/src/utils.c
+++ b/src/utils.c
@ -294,8 +294,8 @@ bool is_separate_word(size_t position, size_t length, const char *buf)
     * word isn't a non-punctuation "word" character, and if we're at
     * the end of the line or the character after the word isn't a
     * non-punctuation "word" character, we have a whole word. */
-    retval = (position == 0 || !is_word_mbchar(before, FALSE)) &&
-		(word_end == strlen(buf) || !is_word_mbchar(after, FALSE));
+    retval = (position == 0 || !is_alnum_mbchar(before)) &&
+		(word_end == strlen(buf) || !is_alnum_mbchar(after));

    free(before);
    free(after);