Fix linesplitting bug.

author: deva <deva> 2010-05-19 10:53:45 +0000
committer: deva <deva> 2010-05-19 10:53:45 +0000
commit: 0ee7ea5ea7b1105c5c7291ff3e88424dbf00eb8b (patch)
tree: 8c06bcd25a2241db9a51605335fb06fc4a0fa480 /server/src
parent: ef49d45a08bed5aafeede360762c895d9aece7f2 (diff)
1 files changed, 40 insertions, 26 deletions
diff --git a/server/src/journalwriter.cc b/server/src/journalwriter.cc
index db6939c..c73a841 100644
--- a/server/src/journalwriter.cc
+++ b/server/src/journalwriter.cc
@@ -38,7 +38,7 @@ static inline bool iswhitespace(char c)
 /**
  * Remove all spaces, tabs and newline trailing the string.
  */
-static std::string stripTrailingWhitepace(std::string str)
+static std::string stripTrailingWhitepace(const std::string &str)
 {
   if(str == "") return str;
 
@@ -50,7 +50,7 @@ static std::string stripTrailingWhitepace(std::string str)
   return str.substr(0, end);
 }
 
-static bool isInsideUTF8(std::string str, size_t idx)
+static bool isInsideUTF8(const std::string &str, size_t idx)
 {
   // Two byte character
   if(idx > 0 &&
@@ -92,7 +92,7 @@ static bool isInsideUTF8(std::string str, size_t idx)
   return false;
 }
 
-static size_t UTF8Length(std::string str)
+static size_t UTF8Length(const std::string &str)
 {
   size_t size = 0;
   for(size_t i = 0; i < str.size(); i++) {
@@ -103,39 +103,53 @@ static size_t UTF8Length(std::string str)
 
 /**
  * Find all lines longer than 'width', and insert a newline in the
- * first backward occurring space.
+ * first backward occurring space. Force split any lines without a space.
  */
-static std::string addNewlines(std::string str, size_t width)
+static std::string addNewlines(const std::string &str, size_t width)
 {
   std::string output;
-
-  std::string fraction;
-  size_t linelen = 0;
+  size_t len = 0;
   for(size_t i = 0; i < str.size(); i++) {
+    char c = str[i];
+
+    /*
+    fprintf(stderr, "i: %d, char: '%c', width: %d, len: %d, output: '%s'\n",
+            i, c, width, len, output.c_str());
+    */
 
-    fraction += str[i];
+    output += c;
 
     if(isInsideUTF8(str, i)) continue;
-    
-    if(iswhitespace(str[i]) ) {
-      if(linelen + UTF8Length(fraction) - 1 > width) {
-        output[output.size() - 1] = '\n';
-        linelen = 0;
-      }
 
-      output += fraction;
-      linelen += UTF8Length(fraction);
-      fraction = "";
-    }
+    len++;
+    if(c == '\n') len = 0;
 
-    if(str[i] == '\n') linelen = 0;
-  }
+    // Try to split line at whitespace.
+    if(len > width) {
+      size_t p = 0;
+      while(p < width) {
+        p++;
 
-  if(linelen + UTF8Length(fraction) > width) {
-    output[output.size() - 1] = '\n';
-    linelen = 0;
+        size_t pos = output.size() - p;
+
+        if(isInsideUTF8(output, pos)) continue;
+
+        if(iswhitespace(output[pos])) {
+          output[pos] = '\n';
+          len = UTF8Length(output.substr(pos+1));
+          break;
+        }
+      }
+    }
+
+    // Force split line at current pos.
+    if(len > width) {
+      // replace last char with a newline, and append the character again, after the newline.
+      output[output.size()-1] = '\n';
+      output += c;
+      len = 1;
+    }
   }
-  output += fraction;
 
   return output;
 }
@@ -285,7 +299,7 @@ TEST_EQUAL_STR(addNewlines
 
 TEST_EQUAL_STR(addNewlines
                ("abc Loremipsum", 6),
-                "abc Lo\nremips\num",
+                "abc\nLoremi\npsum",
                "Test single linesplit inside word.");
 
 TEST_TRUE(isInsideUTF8("ø", 1), "Test positive utf8 match.");
author	deva <deva>	2010-05-19 10:53:45 +0000
committer	deva <deva>	2010-05-19 10:53:45 +0000
commit	0ee7ea5ea7b1105c5c7291ff3e88424dbf00eb8b (patch)
tree	8c06bcd25a2241db9a51605335fb06fc4a0fa480 /server/src
parent	ef49d45a08bed5aafeede360762c895d9aece7f2 (diff)