From e9ac1d793059e18541fbe8aac6eb312e53afb376 Mon Sep 17 00:00:00 2001
From: David Lawrence Ramsey <pooka109@gmail.com>
Date: Sun, 13 Mar 2005 03:28:37 +0000
Subject: [PATCH] overhaul the justify code to make it leave the right number
 of spaces at the ends of the lines of a paragraph, and also to make it
 simpler

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2354 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
---
 ChangeLog   |   4 +
 src/nano.c  | 524 +++++++++++++++++++++++++---------------------------
 src/proto.h |   3 +-
 3 files changed, 260 insertions(+), 271 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0d00ad04..e46e4bb0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -171,6 +171,10 @@ CVS code -
 	  parse_syntax(), parse_colors(), parse_rcfile(), do_rcfile(),
 	  etc. (David Benbennick)  DLR: Rename colortoint() to
 	  color_to_int(), and add a few miscellaneous tweaks.
+	- Overhaul the justify code to make it leave the right number of
+	  spaces at the ends of the lines of a paragraph, and also to
+	  make it simpler.  Changes to justify_format() and
+	  do_justify(); removal of breakable(). (DLR)
 	- Still more steps toward full wide/multibyte character support.
 	  Make whitespace display mode work with multibyte characters,
 	  and add a few related documentation updates.  New function
diff --git a/src/nano.c b/src/nano.c
index d1207ff1..6d8a1207 100644
--- a/src/nano.c
+++ b/src/nano.c
@@ -2354,8 +2354,8 @@ size_t indent_length(const char *line)
 #ifndef DISABLE_JUSTIFY
 /* justify_format() replaces tabs with spaces and multiple spaces by 1
  * (except it maintains 2 after a non-repeated character in punct
- * followed by a character in brackets).  Note that the terminating \0
- * counts as a space.
+ * followed by a character in brackets, and removes all at the end of
+ * the line).
  *
  * justify_format() might make line->data shorter, and change the actual
  * pointer with null_at().
@@ -2363,74 +2363,146 @@ size_t indent_length(const char *line)
  * justify_format() will not look at the first skip characters of line.
  * skip should be at most strlen(line->data).  The character at
  * line[skip + 1] must not be whitespace. */
-void justify_format(filestruct *line, size_t skip)
+void justify_format(filestruct *paragraph, size_t skip)
 {
-    char *back, *front;
+    char *end, *new_end, *new_paragraph_data;
+    size_t shift = 0;
+#ifndef NANO_SMALL
+    size_t mark_shift = 0;
+#endif
 
     /* These four asserts are assumptions about the input data. */
-    assert(line != NULL);
-    assert(line->data != NULL);
-    assert(skip < strlen(line->data));
-    assert(!is_blank_char(line->data[skip]));
-
-    back = line->data + skip;
-    for (front = back; ; front++) {
-	bool remove_space = FALSE;
-	    /* Do we want to remove this space? */
-
-	if (*front == '\t')
-	    *front = ' ';
-
-	/* These tests are safe since line->data + skip is not a
-	 * space. */
-	if ((*front == '\0' || *front == ' ') && *(front - 1) == ' ') {
-	    const char *bob = back - 2;
-
-	    remove_space = TRUE;
-	    for (; bob >= line->data + skip; bob--) {
-		if (strchr(punct, *bob) != NULL) {
-		    /* If this character is in punct, don't remove the
-		     * space unless this character and the character
-		     * before it are the same. */
-		    remove_space = (bob > line->data + skip &&
-			*bob == *(bob - 1));
-		    break;
-		}
-		if (strchr(brackets, *bob) == NULL)
-		    break;
+    assert(paragraph != NULL);
+    assert(paragraph->data != NULL);
+    assert(skip < strlen(paragraph->data));
+    assert(!is_blank_char(paragraph->data[skip]));
+
+    end = paragraph->data + skip;
+    new_paragraph_data = charalloc(strlen(paragraph->data) + 1);
+    charcpy(new_paragraph_data, paragraph->data, skip);
+    new_end = new_paragraph_data + skip;
+
+    while (*end != '\0') {
+	/* If this character is blank, make sure that it's a space with
+	 * no blanks after it. */
+	if (is_blank_char(*end)) {
+	    *new_end = ' ';
+	    new_end++;
+	    end++;
+
+	    while (*end != '\0' && is_blank_char(*end)) {
+		end++;
+		shift++;
+#ifndef NANO_SMALL
+		if (mark_beginbuf == paragraph &&
+			mark_beginx >= end - paragraph->data)
+		    mark_shift++;
+#endif
 	    }
-	}
+	/* If this character is punctuation, there are two ways we can
+	 * handle it. */
+	} else if (strchr(punct, *end) != NULL) {
+	    *new_end = *end;
+	    new_end++;
+	    end++;
+
+	    /* If this character is punctuation followed by itself and
+	     * optionally followed by a bracket, make sure there is no
+	     * more than one blank after it, and make sure that the
+	     * blank is a space. */
+	    if (*end != '\0' && *end == *(end - 1)) {
+		*new_end = *end;
+		new_end++;
+		end++;
+
+		if (*end != '\0' && strchr(brackets, *end) != NULL) {
+		    *new_end = *end;
+		    new_end++;
+		    end++;
+		}
+
+		if (*end != '\0' && is_blank_char(*end)) {
+		    *new_end = ' ';
+		    new_end++;
+		    end++;
+		}
 
-	if (remove_space) {
-	    /* Now *front is a space we want to remove.  We do that by
-	     * simply failing to assign it to *back. */
+		while (*end != '\0' && is_blank_char(*end)) {
+		    end++;
+		    shift++;
 #ifndef NANO_SMALL
-	    if (mark_beginbuf == line && back - line->data < mark_beginx)
-		mark_beginx--;
+		if (mark_beginbuf == paragraph &&
+			mark_beginx >= end - paragraph->data)
+		    mark_shift++;
 #endif
-	    if (*front == '\0')
-		*(back - 1) = '\0';
+		}
+	    /* If this character is punctuation optionally followed by a
+	     * bracket and then followed by spaces, make sure there are
+	     * no more than two blanks after it, and make sure that the
+	     * blanks are spaces. */
+	    } else {
+		if (*end != '\0' && strchr(brackets, *end) != NULL) {
+		    *new_end = *end;
+		    new_end++;
+		    end++;
+		}
+
+		if (*end != '\0' && is_blank_char(*end)) {
+		    *new_end = ' ';
+		    new_end++;
+		    end++;
+		}
+
+		if (*end != '\0' && is_blank_char(*end)) {
+		    *new_end = ' ';
+		    new_end++;
+		    end++;
+		}
+
+		while (*end != '\0' && is_blank_char(*end)) {
+		    end++;
+		    shift++;
+#ifndef NANO_SMALL
+		if (mark_beginbuf == paragraph &&
+			mark_beginx >= end - paragraph->data)
+		    mark_shift++;
+#endif
+		}
+	    }
 	} else {
-	    *back = *front;
-	    back++;
+	    *new_end = *end;
+	    new_end++;
+	    end++;
 	}
-	if (*front == '\0')
-	    break;
     }
 
-    back--;
+    assert(*end == '\0');
+
+    *new_end = *end;
 
-    assert(*back == '\0' && *front == '\0');
+    while (new_end > new_paragraph_data + skip &&
+	*(new_end - 1) == ' ') {
+	new_end--;
+	shift++;
+    }
+
+    if (shift > 0) {
+	totsize -= shift;
+	null_at(&new_paragraph_data, new_end - new_paragraph_data);
+	free(paragraph->data);
+	paragraph->data = new_paragraph_data;
 
-    /* Now back is the new end of line->data. */
-    if (back != front) {
-	totsize -= front - back;
-	null_at(&line->data, back - line->data);
 #ifndef NANO_SMALL
-	if (mark_beginbuf == line && back - line->data < mark_beginx)
-	    mark_beginx = back - line->data;
+	/* Adjust the mark coordinates to compensate for the change in
+	 * the current line. */
+	if (mark_beginbuf == paragraph) {
+	    mark_beginx -= mark_shift;
+	    if (mark_beginx > new_end - new_paragraph_data)
+		mark_beginx = new_end - new_paragraph_data;
+	}
 #endif
-    }
+    } else
+	free(new_paragraph_data);
 }
 
 /* The "quote part" of a line is the largest initial substring matching
@@ -2668,25 +2740,6 @@ filestruct *backup_lines(filestruct *first_line, size_t par_len, size_t
     return first_line;
 }
 
-/* Is it possible to break line at or before goal? */
-bool breakable(const char *line, ssize_t goal)
-{
-    while (*line != '\0' && goal >= 0) {
-	size_t pos = 0;
-
-	if (is_blank_char(*line))
-	    return TRUE;
-
-	line += parse_mbchar(line, NULL, NULL, &pos);
-
-	goal -= pos;
-    }
-
-    /* If goal is not negative, the whole line (one word) was short
-     * enough. */
-    return (goal >= 0);
-}
-
 /* We are trying to break a chunk off line.  We find the last space such
  * that the display length to there is at most goal + 1.  If there is no
  * such space, and force is TRUE, then we find the first space.  Anyway,
@@ -2871,10 +2924,6 @@ void do_justify(bool full_justify)
     filestruct *last_par_line;
 	/* Will be the line containing the newline after the last line
 	 * of the result.  Also for restoring after unjustify. */
-    bool allow_respacing;
-	/* Whether we should change the spacing at the end of a line
-	 * after justifying it.  This should be TRUE whenever we move
-	 * to the next line after justifying the current line. */
 
     /* We save these global variables to be restored if the user
      * unjustifies.  Note that we don't need to save totlines. */
@@ -2897,11 +2946,18 @@ void do_justify(bool full_justify)
     last_par_line = current;
 
     while (TRUE) {
+	size_t i;
+	    /* Generic loop variable. */
 	size_t quote_len;
 	    /* Length of the initial quotation of the paragraph we
 	     * justify. */
+	size_t indent_len;
+	    /* Length of the initial indentation of the paragraph we
+	     * justify. */
 	size_t par_len;
-	    /* Number of lines in that paragraph. */
+	    /* Number of lines in the paragraph we justify. */
+	ssize_t break_pos;
+	    /* Where we will break lines. */
 
 	/* Find the first line of the paragraph to be justified.  That
 	 * is the start of this paragraph if we're in one, or the start
@@ -2923,213 +2979,143 @@ void do_justify(bool full_justify)
 	    }
 	}
 
-	/* Next step, we loop through the lines of this paragraph,
-	 * justifying each one individually. */
-	for (; par_len > 0; current_y++, par_len--) {
-	    size_t indent_len;
-		/* Generic indentation length. */
-	    size_t line_len;
-	    size_t display_len;
-		/* The width of current in screen columns. */
-	    ssize_t break_pos;
-		/* Where we will break the line. */
-
-	    /* We'll be moving to the next line after justifying the
-	     * current line in almost all cases, so allow changing the
-	     * spacing at the ends of justified lines by default. */
-	    allow_respacing = TRUE;
-
-	    indent_len = quote_len + indent_length(current->data +
+	/* If we haven't already done it, copy the original paragraph(s)
+	 * to the justify buffer. */
+	if (first_par_line == NULL)
+	    first_par_line = backup_lines(current, full_justify ?
+		filebot->lineno - current->lineno : par_len, quote_len);
+
+	/* Next step, we tack all the lines of the paragraph together,
+	 * skipping the quoting and indentation on all lines after the
+	 * first. */
+	for (i = 0; i < par_len - 1; i++) {
+	    filestruct *next_line = current->next;
+	    size_t line_len = strlen(current->data);
+	    size_t next_line_len = strlen(current->next->data);
+
+	    indent_len = quote_len + indent_length(current->next->data +
 		quote_len);
+	    next_line_len -= indent_len;
+	    totsize -= indent_len;
+
+	    if (line_len > 0 && current->data[line_len - 1] != ' ') {
+		line_len++;
+		current->data = charealloc(current->data, line_len + 1);
+		current->data[line_len - 1] = ' ';
+		current->data[line_len] = '\0';
+		totsize++;
+	    }
 
-	    /* If we haven't already done it, copy the original
-	     * paragraph to the justify buffer. */
-	    if (first_par_line == NULL)
-		first_par_line = backup_lines(current, full_justify ?
-			filebot->lineno - current->lineno : par_len,
-			quote_len);
-
-	    /* Now we call justify_format() on the current line of the
-	     * paragraph, which will remove excess spaces from it and
-	     * change tabs to spaces. */
-	    justify_format(current, quote_len +
-		indent_length(current->data + quote_len));
+	    current->data = charealloc(current->data, line_len +
+		next_line_len + 1);
+	    strcat(current->data, next_line->data + indent_len);
+
+	    /* Don't destroy edittop! */
+	    if (edittop == next_line)
+		edittop = current;
 
-	    line_len = strlen(current->data);
-	    display_len = strlenpt(current->data);
-
-	    if (display_len > fill) {
-		/* The line is too long.  Try to wrap it to the next. */
-	        break_pos = break_line(current->data + indent_len,
-			fill - strnlenpt(current->data, indent_len),
-			TRUE);
-		if (break_pos == -1 ||
-			break_pos + indent_len == line_len)
-		    /* We can't break the line, or don't need to, so
-		     * just go on to the next. */
-		    goto continue_loc;
-		break_pos += indent_len;
-
-		assert(break_pos < line_len);
-
-		if (par_len == 1) {
-		    /* There is no next line in this paragraph.  We make
-		     * a new line and copy text after break_pos into
-		     * it. */
-		    splice_node(current, make_new_node(current),
-			current->next);
-		    /* In a non-quoted paragraph, we copy the indent
-		     * only if AUTOINDENT is turned on. */
-		    if (quote_len == 0
-#ifndef NANO_SMALL
-			&& !ISSET(AUTOINDENT)
-#endif
-			)
-			    indent_len = 0;
-		    current->next->data = charalloc(indent_len +
-			line_len - break_pos);
-		    strncpy(current->next->data, current->data,
-			indent_len);
-		    strcpy(current->next->data + indent_len,
-			current->data + break_pos + 1);
-
-		    assert(strlen(current->next->data) ==
-			indent_len + line_len - break_pos - 1);
-
-		    totlines++;
-		    totsize += indent_len;
-		    par_len++;
-		} else {
-		    size_t next_line_len = strlen(current->next->data);
-
-		    indent_len = quote_len +
-			indent_length(current->next->data + quote_len);
-		    current->next->data =
-			charealloc(current->next->data, next_line_len +
-			line_len - break_pos + 1);
-
-		    charmove(current->next->data + indent_len +
-			line_len - break_pos, current->next->data +
-			indent_len, next_line_len - indent_len + 1);
-		    strcpy(current->next->data + indent_len,
-			current->data + break_pos + 1);
-		    current->next->data[indent_len + line_len -
-			break_pos - 1] = ' ';
 #ifndef NANO_SMALL
-		    if (mark_beginbuf == current->next) {
-			if (mark_beginx < indent_len)
-			    mark_beginx = indent_len;
-			mark_beginx += line_len - break_pos;
-		    }
+	    /* Adjust the mark coordinates to compensate for the change
+	     * in the next line. */
+	    if (mark_beginbuf == next_line) {
+		mark_beginbuf = current;
+		mark_beginx += line_len;
+	    }
 #endif
-		}
+
+	    unlink_node(next_line);
+	    delete_node(next_line);
+
+	    /* If we've removed the next line, we need to go through
+	     * this line again. */
+	    i--;
+
+	    par_len--;
+	    totlines--;
+	    totsize--;
+	}
+
+	/* Now we call justify_format() on the paragraph, which will
+	 * remove excess spaces from it and change tabs to spaces. */
+	justify_format(current, quote_len +
+		indent_length(current->data + quote_len));
+
+	while (par_len > 0 && strlenpt(current->data) > fill) {
+	    size_t line_len = strlen(current->data);
+
+	    indent_len = quote_len + indent_length(current->data +
+		quote_len);
+
+	    /* If this line is too long, try to wrap it to the next line
+	     * to make it short enough. */
+	    break_pos = break_line(current->data + indent_len,
+		fill - strnlenpt(current->data, indent_len), TRUE);
+
+	    /* We can't break the line, or don't need to, so get out. */
+	    if (break_pos == -1 || break_pos + indent_len == line_len)
+		break;
+
+	    break_pos += indent_len;
+
+	    assert(break_pos < line_len);
+
+	    /* Make a new line and copy the text after where we broke
+	     * this line to the beginning of the new line. */
+	    splice_node(current, make_new_node(current), current->next);
+
+	    /* If this paragraph is non-quoted, and autoindent is turned
+	     * on, set the indentation length to zero so that
+	     * indentation is treated as part of the line. */
+	    if (quote_len == 0
 #ifndef NANO_SMALL
-		if (mark_beginbuf == current &&
-			mark_beginx > break_pos) {
-		    mark_beginbuf = current->next;
-		    mark_beginx -= break_pos + 1 - indent_len;
-		}
+		&& !ISSET(AUTOINDENT)
 #endif
-		null_at(&current->data, break_pos);
+		)
+		indent_len = 0;
 
-		/* Go to the next line. */
-		current = current->next;
-	    } else if (display_len < fill && par_len > 1) {
-		size_t next_line_len;
+	    current->next->data = charalloc(indent_len + line_len -
+		break_pos);
+	    charcpy(current->next->data, current->data, indent_len);
+	    strcpy(current->next->data + indent_len, current->data +
+		break_pos + 1);
 
-		indent_len = quote_len +
-			indent_length(current->next->data + quote_len);
-		/* If we can't pull a word from the next line up to this
-		 * one, just go on. */
-		if (!breakable(current->next->data + indent_len,
-			fill - display_len - 1))
-		    goto continue_loc;
+	    assert(strlen(current->next->data) == indent_len + line_len - break_pos - 1);
 
-		break_pos = break_line(current->next->data + indent_len,
-			fill - display_len - 1, FALSE);
+	    par_len++;
+	    totlines++;
+	    totsize += indent_len;
 
-		assert(break_pos != -1);
-
-		current->data = charealloc(current->data,
-			line_len + break_pos + 2);
-		current->data[line_len] = ' ';
-		strncpy(current->data + line_len + 1,
-			current->next->data + indent_len, break_pos);
-		current->data[line_len + break_pos + 1] = '\0';
 #ifndef NANO_SMALL
-		if (mark_beginbuf == current->next) {
-		    if (mark_beginx < indent_len + break_pos) {
-			mark_beginbuf = current;
-			if (mark_beginx <= indent_len)
-			    mark_beginx = line_len + 1;
-			else
-			    mark_beginx = line_len + 1 + mark_beginx -
-				indent_len;
-		    } else
-			mark_beginx -= break_pos + 1;
-		}
+	    /* Adjust the mark coordinates to compensate for the change
+	     * in the current line. */
+	    if (mark_beginbuf == current && mark_beginx > break_pos) {
+		mark_beginbuf = current->next;
+		mark_beginx -= break_pos + 1 - indent_len;
+	    }
 #endif
-		next_line_len = strlen(current->next->data);
-		if (indent_len + break_pos == next_line_len) {
-		    filestruct *line = current->next;
-
-		    /* Don't destroy edittop! */
-		    if (line == edittop)
-			edittop = current;
-
-		    unlink_node(line);
-		    delete_node(line);
-		    totlines--;
-		    totsize -= indent_len;
-		    current_y--;
-
-		    /* Don't go to the next line.  Accordingly, don't
-		     * allow changing the spacing at the end of the
-		     * previous justified line, so that we don't end up
-		     * doing it more than once on the same line. */
-		    allow_respacing = FALSE;
-		} else {
-		    charmove(current->next->data + indent_len,
-			current->next->data + indent_len + break_pos + 1,
-			next_line_len - break_pos - indent_len);
-		    null_at(&current->next->data,
-			next_line_len - break_pos);
-
-		    /* Go to the next line. */
-		    current = current->next;
-		}
-	    } else
-  continue_loc:
-		/* Go to the next line. */
-		current = current->next;
 
-	    /* We've moved to the next line after justifying the
-	     * current line.  If the justified line was not the last
-	     * line of the paragraph, add a space to the end of it to
-	     * replace the one removed or left out by justify_format().
-	     * If it was the last line of the paragraph, and
-	     * justify_format() left a space on the end of it, remove
-	     * the space. */
-	    if (allow_respacing) {
-		size_t prev_line_len = strlen(current->prev->data);
-
-		if (par_len > 1) {
-		    current->prev->data =
-			charealloc(current->prev->data,
-			prev_line_len + 2);
-		    current->prev->data[prev_line_len] = ' ';
-		    current->prev->data[prev_line_len + 1] = '\0';
-		    totsize++;
-		} else if (par_len == 1 &&
-			current->prev->data[prev_line_len - 1] == ' ') {
-		    current->prev->data =
-			charealloc(current->prev->data, prev_line_len);
-		    current->prev->data[prev_line_len - 1] = '\0';
-		    totsize--;
-		}
+	    /* Break the line.  If this isn't the last line of the
+	     * paragraph, add a space after where we break it. */
+	    null_at(&current->data, break_pos);
+	    if (par_len > 1) {
+		current->data = charealloc(current->data,
+			break_pos + 2);
+		current->data[break_pos] = ' ';
+		current->data[break_pos + 1] = '\0';
+		totsize++;
 	    }
+
+	    /* Go to the next line. */
+	    par_len--;
+	    current_y++;
+	    current = current->next;
 	}
 
+	/* Go to the next line, the line after the last line of the
+	 * paragraph. */
+	current_y++;
+	current = current->next;
+
 	/* We've just justified a paragraph. If we're not justifying the
 	 * entire file, break out of the loop.  Otherwise, continue the
 	 * loop so that we justify all the paragraphs in the file. */
diff --git a/src/proto.h b/src/proto.h
index d0694acc..38abba63 100644
--- a/src/proto.h
+++ b/src/proto.h
@@ -414,7 +414,7 @@ void do_spell(void);
 size_t indent_length(const char *line);
 #endif
 #ifndef DISABLE_JUSTIFY
-void justify_format(filestruct *line, size_t skip);
+void justify_format(filestruct *paragraph, size_t skip);
 size_t quote_length(const char *line);
 bool quotes_match(const char *a_line, size_t a_quote, const char
 	*b_line);
@@ -426,7 +426,6 @@ bool inpar(const char *str);
 void do_para_end(void);
 filestruct *backup_lines(filestruct *first_line, size_t par_len, size_t
 	quote_len);
-bool breakable(const char *line, ssize_t goal);
 ssize_t break_line(const char *line, ssize_t goal, bool force);
 bool do_para_search(size_t *const quote, size_t *const par);
 void do_justify(bool full_justify);
-- 
GitLab