)>}]
شركة التطبيقات المتكاملة لتصميم وبرمجة البرمجيات الخاصة ش.ش.و.
Integrated Applications Programming Company
Home » Code Library » Html (Ia.Cl.Model)

Public general use code classes and xml files that we've compiled and used over the years:

Handle HTML encoding, decoding functions.

    1: using System.Net;
    2: using System.Text.RegularExpressions;
    3:  
    4: namespace Ia.Cl.Model
    5: {
    6:     ////////////////////////////////////////////////////////////////////////////
    7:  
    8:     /// <summary publish="true">
    9:     /// Handle HTML encoding, decoding functions.
   10:     /// </summary>
   11:     /// <remarks> 
   12:     /// Copyright � 2001-2018 Jasem Y. Al-Shamlan (info@ia.com.kw), Integrated Applications - Kuwait. All Rights Reserved.
   13:     ///
   14:     /// This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by
   15:     /// the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
   16:     ///
   17:     /// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   18:     /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
   19:     /// 
   20:     /// You should have received a copy of the GNU General Public License along with this library. If not, see http://www.gnu.org/licenses.
   21:     /// 
   22:     /// Copyright notice: This notice may not be removed or altered from any source distribution.
   23:     /// </remarks> 
   24:     public class Html
   25:     {
   26:         private static readonly Regex regexBetweenTags = new Regex(@">\s+<", RegexOptions.Compiled);
   27:         private static readonly Regex regexLineBreaks = new Regex(@"\n\s+", RegexOptions.Compiled);
   28:  
   29:         ////////////////////////////////////////////////////////////////////////////
   30:  
   31:         /// <summary>
   32:         ///
   33:         /// </summary>
   34:         public Html() { }
   35:  
   36:         ////////////////////////////////////////////////////////////////////////////
   37:  
   38:         ///<summary>
   39:         ///
   40:         /// </summary>
   41:         /// <param name="s"></param>
   42:         /// <returns></returns>
   43:         /// <remark>http://www.west-wind.com/weblog/posts/2009/Feb/05/Html-and-Uri-String-Encoding-without-SystemWeb</remark>
   44:         public static string HtmlEncode(string s)
   45:         {
   46: #if WFA
   47:  
   48:             if (s == null) return null;
   49:  
   50:             StringBuilder sb = new StringBuilder(s.Length);
   51:  
   52:             int len = s.Length;
   53:  
   54:             for (int i = 0; i < len; i++)
   55:             {
   56:                 switch (s[i])
   57:                 {
   58:                     case '<': sb.Append("&lt;"); break;
   59:                     case '>': sb.Append("&gt;"); break;
   60:                     case '"': sb.Append("&quot;"); break;
   61:                     case '&': sb.Append("&amp;"); break;
   62:                     default:
   63:                         if (s[i] > 159)
   64:                         {
   65:                             // decimal numeric entity
   66:                             sb.Append("&#");
   67:                             sb.Append(((int)s[i]).ToString(CultureInfo.InvariantCulture));
   68:                             sb.Append(";");
   69:                         }
   70:                         else sb.Append(s[i]);
   71:                         break;
   72:                 }
   73:             }
   74:  
   75:             return sb.ToString();
   76: #else
   77:             return WebUtility.HtmlEncode(s);
   78: #endif
   79:         }
   80:  
   81:         ////////////////////////////////////////////////////////////////////////////
   82:  
   83:         ///<summary>
   84:         ///
   85:         /// </summary>
   86:         public static string HtmlDecode(string s)
   87:         {
   88: #if WFA
   89:             s = s.Replace("&lt;","<");
   90:             s = s.Replace("&gt;",">");
   91:             s = s.Replace("&quot;",@"""");
   92:             s = s.Replace("&amp;","&");
   93:  
   94:             return s;
   95: #else
   96:             return WebUtility.HtmlDecode(s);
   97: #endif
   98:         }
   99:  
  100:         ////////////////////////////////////////////////////////////////////////////
  101:  
  102:         /// <summary>
  103:         ///
  104:         /// </summary>
  105:         public static string Encode(string s)
  106:         {
  107:             s = HtmlEncode(s);
  108:  
  109:             // database requirement:
  110:             s = s.Replace(@"'", @"_#039_");
  111:             s = s.Replace(@"?", @"_#063_");
  112:  
  113:             return s;
  114:         }
  115:  
  116:         ////////////////////////////////////////////////////////////////////////////
  117:  
  118:         /// <summary>
  119:         ///
  120:         /// </summary>
  121:         public static string Decode(string s)
  122:         {
  123:             // database requirement:
  124:             s = s.Replace(@"_#063_", @"?");
  125:             s = s.Replace(@"_#039_", @"'");
  126:  
  127:             s = HtmlDecode(s);
  128:  
  129:             return s;
  130:         }
  131:  
  132:         ////////////////////////////////////////////////////////////////////////////
  133:  
  134:         /// <summary>
  135:         ///
  136:         /// </summary>
  137:         public static string DecodeRemoveNLLF(string s)
  138:         {
  139:             // database requirement:
  140:  
  141:             s = s.Replace(@"_#063_", @"?");
  142:             s = s.Replace(@"_#039_", @"'");
  143:  
  144:             s = HtmlDecode(s);
  145:  
  146:             s = s.Replace("\n\r", " ");
  147:             s = s.Replace("\r\n", " ");
  148:             s = s.Replace("\n", " ");
  149:             s = s.Replace("\r", " ");
  150:  
  151:             return s;
  152:         }
  153:  
  154:         ////////////////////////////////////////////////////////////////////////////
  155:  
  156:         /// <summary>
  157:         ///
  158:         /// </summary>
  159:         public static string XmlEncode(string s)
  160:         {
  161:             s = HtmlEncode(s);
  162:  
  163:             s = s.Replace(@"'", @"_#039_");
  164:             s = s.Replace(@"\", @"_#092_");
  165:             s = s.Replace(@"?", @"_#063_");
  166:  
  167:             /*
  168:             &amp;  =  &
  169:             &lt;   =  <
  170:             &gt;   =  >
  171:             &quot; =  "
  172:             &apos; =  '
  173:             */
  174:  
  175:             // XML requirement:
  176:             s = s.Replace("&", "_amp_");
  177:             s = s.Replace(">", "_gt_");
  178:             s = s.Replace("<", "_lt_");
  179:  
  180:             return s;
  181:         }
  182:  
  183:         ////////////////////////////////////////////////////////////////////////////
  184:  
  185:         /// <summary>
  186:         ///
  187:         /// </summary>
  188:         public static string XmlDecode(string s)
  189:         {
  190:             // XML requirement
  191:             s = s.Replace("_gt_", ">");
  192:             s = s.Replace("_lt_", "<");
  193:             s = s.Replace("_amp_", "&");
  194:  
  195:             s = s.Replace(@"_#039_", @"'");
  196:             s = s.Replace(@"_#092_", @"\");
  197:             s = s.Replace(@"_#063_", @"?");
  198:  
  199:             s = HtmlDecode(s);
  200:             return s;
  201:         }
  202:  
  203:         ////////////////////////////////////////////////////////////////////////////
  204:  
  205:         /// <summary>
  206:         ///
  207:         /// </summary>
  208:         public static string Code(string code)
  209:         {
  210:             // this displays an HTML code in regular text
  211:             /*
  212:             s=s.Replace("_gt_",">");
  213:             s=s.Replace("_lt_","<");
  214:             s=s.Replace("_amp_","&");
  215:  
  216:             s=s.Replace(@"_#039_",@"'");
  217:             s=s.Replace(@"_#092_",@"\");
  218:             s=s.Replace(@"_#063_",@"?");
  219:             */
  220:  
  221:             code = HtmlEncode(code);
  222:             return code;
  223:         }
  224:  
  225:         ////////////////////////////////////////////////////////////////////////////
  226:  
  227:         /// <summary>
  228:         ///
  229:         /// </summary>
  230:         public static string StripHtml(string source)
  231:         {
  232:             try
  233:             {
  234:                 string result;
  235:  
  236:                 // Remove HTML Development formatting
  237:                 // Replace line breaks with space
  238:                 // because browsers inserts space
  239:                 result = source.Replace("\r", " ");
  240:  
  241:                 // Replace line breaks with space
  242:                 // because browsers inserts space
  243:                 result = result.Replace("\n", " ");
  244:  
  245:                 // Remove step-formatting
  246:                 result = result.Replace("\t", string.Empty);
  247:  
  248:                 // Remove repeating speces becuase browsers ignore them
  249:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"( )+", " ");
  250:  
  251:                 // Remove the header (prepare first by clearing attributes)
  252:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*head([^>])*>", "<head>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  253:  
  254:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*head( )*>)", "</head>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  255:  
  256:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(<head>).*(</head>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  257:  
  258:                 // remove all scripts (prepare first by clearing attributes)
  259:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*script([^>])*>", "<script>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  260:  
  261:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*script( )*>)", "</script>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  262:  
  263:                 //result = System.Text.RegularExpressions.Regex.Replace(result, @"(<script>)([^(<script>\.</script>)])*(</script>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  264:  
  265:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"(<script>).*(</script>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  266:  
  267:                 // remove all styles (prepare first by clearing attributes)
  268:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*style([^>])*>", "<style>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  269:  
  270:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*style( )*>)", "</style>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  271:  
  272:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(<style>).*(</style>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  273:  
  274:                 // insert tabs in spaces of <td> tags
  275:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*td([^>])*>", "\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  276:  
  277:                 // insert line breaks in places of <BR> and <LI> tags
  278:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*br( )*>", "\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  279:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*li( )*>", "\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  280:  
  281:                 // insert line paragraphs (double line breaks) in place
  282:                 // if <P>, <DIV> and <TR> tags
  283:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*div([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  284:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*tr([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  285:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*p([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  286:  
  287:                 // Remove remaining tags like <a>, links, images, // comments etc - anything thats enclosed inside < >
  288:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<[^>]*>", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  289:  
  290:                 // replace special characters:
  291:                 result = System.Text.RegularExpressions.Regex.Replace(result, @" ", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  292:  
  293:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&bull;", " * ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  294:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&lsaquo;", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  295:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&rsaquo;", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  296:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&trade;", "(tm)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  297:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&frasl;", "/", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  298:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&lt;", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  299:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&gt;", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  300:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&copy;", "(c)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  301:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&reg;", "(r)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  302:  
  303:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&nbsp;", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  304:  
  305:                 // Remove all others. More can be added, see
  306:                 // http://hotwired.lycos.com/webmonkey/reference/special_characters/
  307:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&(.{2,6});", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  308:  
  309:                 // for testng
  310:                 //System.Text.RegularExpressions.Regex.Replace(result, 
  311:                 //      this.txtRegex.Text,string.Empty, 
  312:                 //      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  313:  
  314:                 // make line breaking consistent
  315:                 result = result.Replace("\n", "\r");
  316:  
  317:                 // Remove extra line breaks and tabs:
  318:                 // replace over 2 breaks with 2 and over 4 tabs with 4. 
  319:                 // Prepare first to remove any whitespaces inbetween
  320:                 // the escaped characters and remove redundant tabs inbetween linebreaks
  321:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\r)", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  322:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\t)", "\t\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  323:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\r)", "\t\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  324:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\t)", "\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  325:  
  326:                 // Remove redundant tabs
  327:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+(\r)", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  328:  
  329:                 // Remove multible tabs followind a linebreak with just one tab
  330:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+", "\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  331:  
  332:                 // Initial replacement target string for linebreaks
  333:                 string breaks = "\r\r\r";
  334:                 // Initial replacement target string for tabs
  335:                 string tabs = "\t\t\t\t\t";
  336:  
  337:                 for (int index = 0; index < result.Length; index++)
  338:                 {
  339:                     result = result.Replace(breaks, "\r\r");
  340:                     result = result.Replace(tabs, "\t\t\t\t");
  341:                     breaks = breaks + "\r";
  342:                     tabs = tabs + "\t";
  343:                 }
  344:  
  345:                 // Thats it.
  346:                 return result;
  347:  
  348:             }
  349:             catch
  350:             {
  351:                 //MessageBox.Show("Error");
  352:                 return null;
  353:             }
  354:         }
  355:  
  356:         ////////////////////////////////////////////////////////////////////////////
  357:  
  358:         /// <summary>
  359:         ///
  360:         /// </summary>
  361:         public static string TextToHtml(string source)
  362:         {
  363:             // clean regular text format pages and return an equivalent html format
  364:  
  365:             string s;
  366:  
  367:             s = Decode(source);
  368:             //s = Ia.Cl.Models.Html.Html_Strip(s);
  369:             s = Regex.Replace(s, @"\.", @". ");
  370:             s = Regex.Replace(s, @"[ ]+", @" ");
  371:             s = s.Replace("\r", "");
  372:             s = s.Replace("\n+", "\n");
  373:             //s = "<p>" + s.Replace("\n", "</p>\n<p>") + "</p>";
  374:             /*
  375:             s = s.Replace("\n", "</p>\n<p>");
  376:  
  377:             // clean up
  378:             u = sb.ToString();
  379:             u = Regex.Replace(u, @"^\s+", "");
  380:             u = Regex.Replace(u, @">\s+", ">");
  381:             u = Regex.Replace(u, @"\s+<", "<");
  382:             u = Regex.Replace(u, @"\s+", " ");
  383:             u = Regex.Replace(u, @"\n+", @"<br/>"); // keep newlines
  384:             //u = Regex.Replace(u, @"</ul>(.+?)</ul>", "</ul><p>$1</p></ul>");
  385:             //u = Regex.Replace(u, @"</ul>(.+?)</p>", "</ul><p>$1</p></p>");
  386:             //u = u.Replace(@"�", "<p/>&nbsp;&nbsp;&nbsp;�&nbsp;");
  387:             */
  388:  
  389:             return s;
  390:         }
  391:  
  392:         ////////////////////////////////////////////////////////////////////////////
  393:  
  394:         /// <summary>
  395:         ///
  396:         /// </summary>
  397:         public static string TextToHtml2(string source)
  398:         {
  399:             // clean regular text format pages and return an equivalent html format
  400:  
  401:             string s;
  402:  
  403:             s = Decode(source);
  404:             //s = Ia.Cl.Models.Html.Html_Strip(s);
  405:             s = Regex.Replace(s, @"\.", @". ");
  406:             s = Regex.Replace(s, @"[ ]+", @" ");
  407:             s = s.Replace("\r", "");
  408:             s = s.Replace("\n+", "\n");
  409:             s = "<p>" + s.Replace("\n", "</p>\n<p>") + "</p>";
  410:  
  411:             /*
  412:             s = s.Replace("\n", "</p>\n<p>");
  413:  
  414:             // clean up
  415:             u = sb.ToString();
  416:             u = Regex.Replace(u, @"^\s+", "");
  417:             u = Regex.Replace(u, @">\s+", ">");
  418:             u = Regex.Replace(u, @"\s+<", "<");
  419:             u = Regex.Replace(u, @"\s+", " ");
  420:             u = Regex.Replace(u, @"\n+", @"<br/>"); // keep newlines
  421:             //u = Regex.Replace(u, @"</ul>(.+?)</ul>", "</ul><p>$1</p></ul>");
  422:             //u = Regex.Replace(u, @"</ul>(.+?)</p>", "</ul><p>$1</p></p>");
  423:             //u = u.Replace(@"�", "<p/>&nbsp;&nbsp;&nbsp;�&nbsp;");
  424:             */
  425:  
  426:             return s;
  427:         }
  428:  
  429:         ////////////////////////////////////////////////////////////////////////////
  430:  
  431:         /// <summary>
  432:         ///
  433:         /// </summary>
  434:         public static string TextToHtmlAndOl_Ul_LiToBr(string source)
  435:         {
  436:             // clean regular text format pages and return an equivalent html format
  437:  
  438:             string s;
  439:  
  440:             s = Decode(source);
  441:             s = Regex.Replace(s, @"\.", @". ");
  442:             s = Regex.Replace(s, @"[ ]+", @" ");
  443:             s = s.Replace("\r", "");
  444:             s = s.Replace("\n+", "\n");
  445:  
  446:             s = s.Replace("<ol>", "<br/> <br/>");
  447:             s = s.Replace("</ol>", "");
  448:             s = s.Replace("<ul>", "<br/> <br/>");
  449:             s = s.Replace("</ul>", "");
  450:             s = s.Replace("<li>", "-");
  451:             s = s.Replace("</li>", "<br/>");
  452:  
  453:             return s;
  454:         }
  455:  
  456:         ////////////////////////////////////////////////////////////////////////////
  457:  
  458:         /// <summary>
  459:         ///
  460:         /// <see href="http://madskristensen.net/post/remove-whitespace-from-your-pages"/>
  461:         /// </summary>
  462:         public static string RemoveWhitespaceFromHtml(string html)
  463:         {
  464:             // for now we will skip if page has <pre>
  465:  
  466:             if (!html.Contains("<pre>"))
  467:             {
  468:                 html = regexBetweenTags.Replace(html, "> <");
  469:                 html = regexLineBreaks.Replace(html, string.Empty);
  470:             }
  471:  
  472:             return html.Trim();
  473:         }
  474:  
  475:         ////////////////////////////////////////////////////////////////////////////
  476:         ////////////////////////////////////////////////////////////////////////////
  477:     }
  478: }