[Home]WikiPatches/ImprovedTextFormatting

UseModWiki | WikiPatches | RecentChanges | Preferences

This patch fixes many problems related to text formatting [1] [2] such as incompliant <p>..</p> tags and list nesting.

Well, this is really so long that the patch likely has a bug, but this is the limitation of my ingenuity. If someone can have better solution, I would like to know it. I hope the patch helps one who wants compliant HTML output.

--TakuyaMurata

Yes, because UseMod doesn't support nested <pre></pre> so the display is corrupted.

It is hard to change this patch, so I will just add a few comments here. If you have a </pre> in the source, that breaks formatting, so I suggest you change </pre> to </pre" . "> or something similar... Plus I think that the newline handling can be changed: s/[\r\n]/\n/g is the same as s/\r/\n/g. -- AlexSchroeder

Thanks comments. I agree that the patch is complicated, but I don't know how it can be done in a simpler way. --TakuyaMurata

--- wiki_orig.cgi	2002-12-25 18:56:20.000000000 -0600
+++ wiki_text.cgi	2002-12-25 23:59:59.000000000 -0600
@@ -1163,13 +1163,174 @@
   }
   $pageText = &QuoteHtml($pageText);
   $pageText =~ s/\\ *\r?\n/ /g;          # Join lines with backslash at end
-  $pageText = &CommonMarkup($pageText, 1, 0);   # Multi-line markup
-  $pageText = &WikiLinesToHtml($pageText);      # Line-oriented markup
+  #$pageText = &CommonMarkup($pageText, 1, 0);   # Multi-line markup
+
+  $pageText =~ s/$FS[123]//g;       # Remove separators (paranoia)
+  $pageText =~ s/\r\n/\n/g;
+  $pageText =~ s/\r/\n/g;           # In Mac only '\r' is a newline.
+  $pageText = &GetParsedPage(\$pageText);
+
   $pageText =~ s/$FS(\d+)$FS/$SaveUrl{$1}/ge;   # Restore saved text
   $pageText =~ s/$FS(\d+)$FS/$SaveUrl{$1}/ge;   # Restore nested saved text
   return $pageText;
 }
 
+sub regex_prematch {
+  my ($string) = @_;
+  return substr($string, 0, $-[0]);
+}
+
+sub regex_postmatch {
+  my ($string) = @_;
+  return substr($string, $+[0]);
+}
+
+sub GetParsedInline {
+  my ($t) = @_;
+  $t = &CommonMarkup($t, 1, 1);    # do both
+  $t =~ s/\A\s+//g;     # trim
+  $t =~ s/\s+\Z//g;     # trim
+  return $t;
+}
+
+sub GetParsedFlow {
+  my ($t) = @_;
+  my ($left, $right, $depth);
+  my ($ThinLine) = 0;
+
+  $t || return '';
+
+  if ($t =~ /^(\=+) +([^\n]+) +\1(?:\n|\Z)/m) {
+    $left = &regex_prematch($t);
+    $depth = length($1);
+    $depth = 6  if ($depth > 6);
+    return &GetParsedFlow($left) . "<h$depth>$2</h$depth>\n" . GetParsedFlow(&regex_postmatch($t));
+  }
+
+  if ($ThinLine) {
+    if ($t =~ /----+/) {
+      $left = &regex_prematch($t);
+      $t = &regex_postmatch($t);
+      return &GetParsedFlow($left) . "<hr noshade size=\"1\" />\n" . GetParsedFlow($t);
+    }
+    if ($t =~ /====+/) {
+      $left = &regex_prematch($t);
+      $t = &regex_postmatch($t);
+      return &GetParsedFlow($left) . "<hr noshade size=\"2\" />\n" . GetParsedFlow($t);
+    }
+  }
+  else {
+    if ($t =~ /----+/) {
+      $left = &regex_prematch($t);
+      $t = &regex_postmatch($t);
+      return &GetParsedFlow($left) . "<hr />\n" . GetParsedFlow($t);
+    }
+  }
+
+  if ($t =~ /((?:^ +[^\n]+(?:\n|\Z))+)/m) {
+    $left = &regex_prematch($t);
+    $t = &regex_postmatch($t);
+    my ($pre) = $1;
+    $pre =~ s/^ +//mg;
+    return &GetParsedFlow($left) . "<pre>$pre</pre" . ">\n" . GetParsedFlow($t);
+  }
+
+  ($left, $right) = split(/\n\s*\n/, $t, 2);
+  if ($right) {
+    return &GetParsedFlow($left) . &GetParsedFlow($right);
+  }
+
+  $t =~ s/\n/ /g;     # I suspect that some browser concatenates two line into one string.
+  $t =~ s/\A\s+//g;   # trim
+  $t =~ s/\s+\Z//g;   # trim
+  return $t && ("<p>" . &GetParsedInline($t) . "</p>\n");
+}
+
+sub GetParsedPage {
+  my ($ref_text, $depth, $tag) = @_;
+  my (%item_tag) = ('' => '', 'dl' => 'dd', 'ol' => 'li', 'ul' => 'li',);
+  my (%list_tag) = ('*' => 'ul', '#' => 'ol', );
+  my ($html, $left);
+  my ($t) = $$ref_text;
+
+  $t || return '';
+
+  if ($t =~ /^((?:\*+)|(?:\#+)) *([^\n]*)(?:\n|\Z)/m) {
+    $left = &regex_prematch($t);
+    if (!$left) {
+      my ($en_tag) = $list_tag{substr($1, 0, 1)};
+      if (length($1) > $depth) {
+        $html  = "<$en_tag>\n";
+        $html .= &GetParsedPage($ref_text, $depth + 1, $en_tag);
+        $html .= "</$en_tag>\n";
+        if ($item_tag{$tag}) {
+          $html = "<$item_tag{$tag}>$html</$item_tag{$tag}>\n";
+        }
+        $html .= &GetParsedPage($ref_text, $depth, $tag);
+      }
+      elsif (length($1) == $depth && $tag eq $en_tag) {
+        $$ref_text = substr($$ref_text, $+[0]);  # consume
+        $html  = $2 && "<li>" . &GetParsedInline($2) . "</li>\n";
+        $html .= &GetParsedPage($ref_text, $depth, $tag);
+      }
+      return $html;
+    }
+    $t = $left;
+  }
+
+  if ($t =~ /^(;+)([^:\n]*)\:([^\n]*)(?:\n|\Z)/m) {
+    $left = &regex_prematch($t);
+    if (!$left) {
+      if (length($1) > $depth) {
+        $html  = "<dl>\n";
+        $html .= &GetParsedPage($ref_text, $depth + 1, 'dl');
+        $html .= "</dl>\n";
+        if ($item_tag{$tag}) {
+          $html = "<$item_tag{$tag}>$html</$item_tag{$tag}>\n";
+        }
+        $html .= &GetParsedPage($ref_text, $depth, $tag);
+      }
+      elsif (length($1) == $depth && $tag eq 'dl') {
+        $$ref_text = substr($$ref_text, $+[0]);  # consume
+        $html  = $2 && ("<dt>" . &GetParsedInline($2) . "</dt>\n");
+        $html .= $3 && ("<dd>" . &GetParsedInline($3) . "</dd>\n");
+        $html .= &GetParsedPage($ref_text, $depth, $tag);
+      }
+      return $html;
+    }
+    $t = $left;
+  }
+
+  if ($t =~ /^(\:+)([^\n]*)(?:\n|\Z)/m) {
+    $left = &regex_prematch($t);
+    if (!$left) {
+      if (length($1) > $depth) {
+        $html  = "<dl>\n";
+        $html .= &GetParsedPage($ref_text, $depth + 1, 'dl');
+        $html .= "</dl>\n";
+        if ($item_tag{$tag}) {
+          $html = "<$item_tag{$tag}>$html</$item_tag{$tag}>\n";
+        }
+        $html .= &GetParsedPage($ref_text, $depth, $tag);
+      }
+      elsif (length($1) == $depth && $tag eq 'dl') {
+        $$ref_text = substr($$ref_text, $+[0]);  # consume
+        $html .= $2 && ("<dd>" . &GetParsedInline($2) . "</dd>\n");
+        $html .= &GetParsedPage($ref_text, $depth, $tag);
+      }
+      return $html;
+    }
+    $t = $left;
+  }
+
+  if ($depth > 0) {
+    return '';
+  }
+
+  $$ref_text = substr($$ref_text, length($t));
+  return &GetParsedFlow($t) . &GetParsedPage($ref_text, $depth, $tag);
+}
+
 sub CommonMarkup {
   my ($text, $useImage, $doLines) = @_;
   local $_ = $text;
@@ -1233,66 +1394,10 @@
     # by matching the inner quotes for the strong pattern.
     s/('*)'''(.*?)'''/$1<strong>$2<\/strong>/g;
     s/''(.*?)''/<em>$1<\/em>/g;
-    if ($UseHeadings) {
-      s/(^|\n)\s*(\=+)\s+([^\n]+)\s+\=+/&WikiHeading($1, $2, $3)/geo;
-    }
   }
   return $_;
 }
 
-sub WikiLinesToHtml {
-  my ($pageText) = @_;
-  my ($pageHtml, @htmlStack, $code, $depth, $oldCode);
-
-  @htmlStack = ();
-  $depth = 0;
-  $pageHtml = "";
-  foreach (split(/\n/, $pageText)) {  # Process lines one-at-a-time
-    $_ .= "\n";
-    if (s/^(\;+)([^:]+\:?)\:/<dt>$2<dd>/) {
-      $code = "DL";
-      $depth = length $1;
-    } elsif (s/^(\:+)/<dt><dd>/) {
-      $code = "DL";
-      $depth = length $1;
-    } elsif (s/^(\*+)/<li>/) {
-      $code = "UL";
-      $depth = length $1;
-    } elsif (s/^(\#+)/<li>/) {
-      $code = "OL";
-      $depth = length $1;
-    } elsif (/^[ \t].*\S/) {
-      $code = "PRE";
-      $depth = 1;
-    } else {
-      $depth = 0;
-    }
-    while (@htmlStack > $depth) {   # Close tags as needed
-      $pageHtml .=  "</" . pop(@htmlStack) . ">\n";
-    }
-    if ($depth > 0) {
-      $depth = $IndentLimit  if ($depth > $IndentLimit);
-      if (@htmlStack) {  # Non-empty stack
-        $oldCode = pop(@htmlStack);
-        if ($oldCode ne $code) {
-          $pageHtml .= "</$oldCode><$code>\n";
-        }
-        push(@htmlStack, $code);
-      }
-      while (@htmlStack < $depth) {
-        push(@htmlStack, $code);
-        $pageHtml .= "<$code>\n";
-      }
-    }
-    s/^\s*$/<p>\n/;                        # Blank lines become <p> tags
-    $pageHtml .= &CommonMarkup($_, 1, 2);  # Line-oriented common markup
-  }
-  while (@htmlStack > 0) {       # Clear stack
-    $pageHtml .=  "</" . pop(@htmlStack) . ">\n";
-  }
-  return $pageHtml;
-}
-
 sub QuoteHtml {
   my ($html) = @_;
 
@@ -1512,14 +1617,6 @@
   return $url;
 }
 
-sub WikiHeading {
-  my ($pre, $depth, $text) = @_;
-
-  $depth = length($depth);
-  $depth = 6  if ($depth > 6);
-  return $pre . "<H$depth>$text</H$depth>\n";
-}
-
 # ==== Difference markup and HTML ====
 sub GetDiffHTML {
   my ($diffType, $id, $rev, $newText) = @_;


UseModWiki | WikiPatches | RecentChanges | Preferences
Edit text of this page | View other revisions | Search MetaWiki
Last edited October 7, 2007 6:35 am by JuanmaMP (diff)
Search: