)>}]
شركة التطبيقات المتكاملة لتصميم وبرمجة البرمجيات الخاصة ش.ش.و.
Integrated Applications Programming Company
Home » Code Library » Html (Ia.Cl.Models)

Public general use code classes and xml files that we've compiled and used over the years:

Handle HTML encoding, decoding functions.

    1: using System.Net;
    2: using System.Text.RegularExpressions;
    3:  
    4: namespace Ia.Cl.Models
    5: {
    6:     ////////////////////////////////////////////////////////////////////////////
    7:  
    8:     /// <summary publish="true">
    9:     /// Handle HTML encoding, decoding functions.
   10:     /// </summary>
   11:     /// <remarks> 
   12:     /// Copyright � 2001-2018 Jasem Y. Al-Shamlan (info@ia.com.kw), Integrated Applications - Kuwait. All Rights Reserved.
   13:     ///
   14:     /// This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by
   15:     /// the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
   16:     ///
   17:     /// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   18:     /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
   19:     /// 
   20:     /// You should have received a copy of the GNU General Public License along with this library. If not, see http://www.gnu.org/licenses.
   21:     /// 
   22:     /// Copyright notice: This notice may not be removed or altered from any source distribution.
   23:     /// </remarks> 
   24:     public class Html
   25:     {
   26:         private static readonly Regex regexBetweenTags = new Regex(@">\s+<", RegexOptions.Compiled);
   27:         private static readonly Regex regexLineBreaks = new Regex(@"\n\s+", RegexOptions.Compiled);
   28:  
   29:         ////////////////////////////////////////////////////////////////////////////
   30:  
   31:         /// <summary>
   32:         ///
   33:         /// </summary>
   34:         public Html() { }
   35:  
   36:         ////////////////////////////////////////////////////////////////////////////
   37:  
   38:         ///<summary>
   39:         ///
   40:         /// </summary>
   41:         /// <param name="s"></param>
   42:         /// <returns></returns>
   43:         /// <remark>http://www.west-wind.com/weblog/posts/2009/Feb/05/Html-and-Uri-String-Encoding-without-SystemWeb</remark>
   44:         public static string HtmlEncode(string s)
   45:         {
   46: #if WFA
   47:  
   48:             if (s == null) return null;
   49:  
   50:             StringBuilder sb = new StringBuilder(s.Length);
   51:  
   52:             int len = s.Length;
   53:  
   54:             for (int i = 0; i < len; i++)
   55:             {
   56:                 switch (s[i])
   57:                 {
   58:                     case '<': sb.Append("&lt;"); break;
   59:                     case '>': sb.Append("&gt;"); break;
   60:                     case '"': sb.Append("&quot;"); break;
   61:                     case '&': sb.Append("&amp;"); break;
   62:                     default:
   63:                         if (s[i] > 159)
   64:                         {
   65:                             // decimal numeric entity
   66:                             sb.Append("&#");
   67:                             sb.Append(((int)s[i]).ToString(CultureInfo.InvariantCulture));
   68:                             sb.Append(";");
   69:                         }
   70:                         else sb.Append(s[i]);
   71:                         break;
   72:                 }
   73:             }
   74:  
   75:             return sb.ToString();
   76: #else
   77:             return WebUtility.HtmlEncode(s);
   78: #endif
   79:         }
   80:  
   81:         ////////////////////////////////////////////////////////////////////////////
   82:  
   83:         ///<summary>
   84:         ///
   85:         /// </summary>
   86:         public static string HtmlDecode(string s)
   87:         {
   88: #if WFA
   89:             s = s.Replace("&lt;","<");
   90:             s = s.Replace("&gt;",">");
   91:             s = s.Replace("&quot;",@"""");
   92:             s = s.Replace("&amp;","&");
   93:  
   94:             return s;
   95: #else
   96:             return WebUtility.HtmlDecode(s);
   97: #endif
   98:         }
   99:  
  100:         ////////////////////////////////////////////////////////////////////////////
  101:  
  102:         /// <summary>
  103:         ///
  104:         /// </summary>
  105:         public static string Encode(string s)
  106:         {
  107:             if (!string.IsNullOrEmpty(s))
  108:             {
  109:                 s = HtmlEncode(s);
  110:  
  111:                 // database requirement:
  112:                 s = s.Replace(@"'", @"_#039_");
  113:                 s = s.Replace(@"?", @"_#063_");
  114:             }
  115:  
  116:             return s;
  117:         }
  118:  
  119:         ////////////////////////////////////////////////////////////////////////////
  120:  
  121:         /// <summary>
  122:         ///
  123:         /// </summary>
  124:         public static string Decode(string s)
  125:         {
  126:             if (!string.IsNullOrEmpty(s))
  127:             {
  128:                 // database requirement:
  129:                 s = s.Replace(@"_#063_", @"?");
  130:                 s = s.Replace(@"_#039_", @"'");
  131:  
  132:                 s = HtmlDecode(s);
  133:             }
  134:  
  135:             return s;
  136:         }
  137:  
  138:         ////////////////////////////////////////////////////////////////////////////
  139:  
  140:         /// <summary>
  141:         ///
  142:         /// </summary>
  143:         public static string DecodeRemoveNLLF(string s)
  144:         {
  145:             if (!string.IsNullOrEmpty(s))
  146:             {
  147:                 // database requirement:
  148:  
  149:                 s = s.Replace(@"_#063_", @"?");
  150:                 s = s.Replace(@"_#039_", @"'");
  151:  
  152:                 s = HtmlDecode(s);
  153:  
  154:                 s = s.Replace("\n\r", " ");
  155:                 s = s.Replace("\r\n", " ");
  156:                 s = s.Replace("\n", " ");
  157:                 s = s.Replace("\r", " ");
  158:             }
  159:  
  160:             return s;
  161:         }
  162:  
  163:         ////////////////////////////////////////////////////////////////////////////
  164:  
  165:         /// <summary>
  166:         ///
  167:         /// </summary>
  168:         public static string XmlEncode(string s)
  169:         {
  170:             if (!string.IsNullOrEmpty(s))
  171:             {
  172:                 s = HtmlEncode(s);
  173:  
  174:                 s = s.Replace(@"'", @"_#039_");
  175:                 s = s.Replace(@"\", @"_#092_");
  176:                 s = s.Replace(@"?", @"_#063_");
  177:  
  178:                 /*
  179:                 &amp;  =  &
  180:                 &lt;   =  <
  181:                 &gt;   =  >
  182:                 &quot; =  "
  183:                 &apos; =  '
  184:                 */
  185:  
  186:                 // XML requirement:
  187:                 s = s.Replace("&", "_amp_");
  188:                 s = s.Replace(">", "_gt_");
  189:                 s = s.Replace("<", "_lt_");
  190:             }
  191:  
  192:             return s;
  193:         }
  194:  
  195:         ////////////////////////////////////////////////////////////////////////////
  196:  
  197:         /// <summary>
  198:         ///
  199:         /// </summary>
  200:         public static string XmlDecode(string s)
  201:         {
  202:             if (!string.IsNullOrEmpty(s))
  203:             {
  204:                 // XML requirement
  205:                 s = s.Replace("_gt_", ">");
  206:                 s = s.Replace("_lt_", "<");
  207:                 s = s.Replace("_amp_", "&");
  208:  
  209:                 s = s.Replace(@"_#039_", @"'");
  210:                 s = s.Replace(@"_#092_", @"\");
  211:                 s = s.Replace(@"_#063_", @"?");
  212:  
  213:                 s = HtmlDecode(s);
  214:             }
  215:  
  216:             return s;
  217:         }
  218:  
  219:         ////////////////////////////////////////////////////////////////////////////
  220:  
  221:         /// <summary>
  222:         ///
  223:         /// </summary>
  224:         public static string Code(string code)
  225:         {
  226:             if (!string.IsNullOrEmpty(code))
  227:             {
  228:                 // this displays an HTML code in regular text
  229:                 /*
  230:                 s=s.Replace("_gt_",">");
  231:                 s=s.Replace("_lt_","<");
  232:                 s=s.Replace("_amp_","&");
  233:  
  234:                 s=s.Replace(@"_#039_",@"'");
  235:                 s=s.Replace(@"_#092_",@"\");
  236:                 s=s.Replace(@"_#063_",@"?");
  237:                 */
  238:  
  239:                 code = HtmlEncode(code);
  240:             }
  241:  
  242:             return code;
  243:         }
  244:  
  245:         ////////////////////////////////////////////////////////////////////////////
  246:  
  247:         /// <summary>
  248:         ///
  249:         /// </summary>
  250:         public static string StripHtml(string source)
  251:         {
  252:             try
  253:             {
  254:                 string result;
  255:  
  256:                 // Remove HTML Development formatting
  257:                 // Replace line breaks with space
  258:                 // because browsers inserts space
  259:                 result = source.Replace("\r", " ");
  260:  
  261:                 // Replace line breaks with space
  262:                 // because browsers inserts space
  263:                 result = result.Replace("\n", " ");
  264:  
  265:                 // Remove step-formatting
  266:                 result = result.Replace("\t", string.Empty);
  267:  
  268:                 // Remove repeating speces becuase browsers ignore them
  269:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"( )+", " ");
  270:  
  271:                 // Remove the header (prepare first by clearing attributes)
  272:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*head([^>])*>", "<head>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  273:  
  274:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*head( )*>)", "</head>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  275:  
  276:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(<head>).*(</head>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  277:  
  278:                 // remove all scripts (prepare first by clearing attributes)
  279:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*script([^>])*>", "<script>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  280:  
  281:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*script( )*>)", "</script>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  282:  
  283:                 //result = System.Text.RegularExpressions.Regex.Replace(result, @"(<script>)([^(<script>\.</script>)])*(</script>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  284:  
  285:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"(<script>).*(</script>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  286:  
  287:                 // remove all styles (prepare first by clearing attributes)
  288:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*style([^>])*>", "<style>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  289:  
  290:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*style( )*>)", "</style>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  291:  
  292:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(<style>).*(</style>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  293:  
  294:                 // insert tabs in spaces of <td> tags
  295:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*td([^>])*>", "\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  296:  
  297:                 // insert line breaks in places of <BR> and <LI> tags
  298:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*br( )*>", "\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  299:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*li( )*>", "\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  300:  
  301:                 // insert line paragraphs (double line breaks) in place
  302:                 // if <P>, <DIV> and <TR> tags
  303:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*div([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  304:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*tr([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  305:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*p([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  306:  
  307:                 // Remove remaining tags like <a>, links, images, // comments etc - anything thats enclosed inside < >
  308:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"<[^>]*>", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  309:  
  310:                 // replace special characters:
  311:                 result = System.Text.RegularExpressions.Regex.Replace(result, @" ", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  312:  
  313:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&bull;", " * ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  314:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&lsaquo;", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  315:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&rsaquo;", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  316:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&trade;", "(tm)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  317:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&frasl;", "/", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  318:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&lt;", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  319:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&gt;", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  320:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&copy;", "(c)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  321:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&reg;", "(r)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  322:  
  323:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&nbsp;", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  324:  
  325:                 // Remove all others. More can be added, see
  326:                 // http://hotwired.lycos.com/webmonkey/reference/special_characters/
  327:                 result = System.Text.RegularExpressions.Regex.Replace(result, @"&(.{2,6});", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  328:  
  329:                 // for testng
  330:                 //System.Text.RegularExpressions.Regex.Replace(result, 
  331:                 //      this.txtRegex.Text,string.Empty, 
  332:                 //      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  333:  
  334:                 // make line breaking consistent
  335:                 result = result.Replace("\n", "\r");
  336:  
  337:                 // Remove extra line breaks and tabs:
  338:                 // replace over 2 breaks with 2 and over 4 tabs with 4. 
  339:                 // Prepare first to remove any whitespaces inbetween
  340:                 // the escaped characters and remove redundant tabs inbetween linebreaks
  341:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\r)", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  342:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\t)", "\t\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  343:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\r)", "\t\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  344:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\t)", "\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  345:  
  346:                 // Remove redundant tabs
  347:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+(\r)", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  348:  
  349:                 // Remove multible tabs followind a linebreak with just one tab
  350:                 result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+", "\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
  351:  
  352:                 // Initial replacement target string for linebreaks
  353:                 string breaks = "\r\r\r";
  354:                 // Initial replacement target string for tabs
  355:                 string tabs = "\t\t\t\t\t";
  356:  
  357:                 for (int index = 0; index < result.Length; index++)
  358:                 {
  359:                     result = result.Replace(breaks, "\r\r");
  360:                     result = result.Replace(tabs, "\t\t\t\t");
  361:                     breaks = breaks + "\r";
  362:                     tabs = tabs + "\t";
  363:                 }
  364:  
  365:                 // Thats it.
  366:                 return result;
  367:  
  368:             }
  369:             catch
  370:             {
  371:                 //MessageBox.Show("Error");
  372:                 return null;
  373:             }
  374:         }
  375:  
  376:         ////////////////////////////////////////////////////////////////////////////
  377:  
  378:         /// <summary>
  379:         ///
  380:         /// </summary>
  381:         public static string TextToHtml(string source)
  382:         {
  383:             // clean regular text format pages and return an equivalent html format
  384:  
  385:             string s;
  386:  
  387:             s = Decode(source);
  388:             //s = Ia.Cl.Models.Html.Html_Strip(s);
  389:             s = Regex.Replace(s, @"\.", @". ");
  390:             s = Regex.Replace(s, @"[ ]+", @" ");
  391:             s = s.Replace("\r", "");
  392:             s = s.Replace("\n+", "\n");
  393:             //s = "<p>" + s.Replace("\n", "</p>\n<p>") + "</p>";
  394:             /*
  395:             s = s.Replace("\n", "</p>\n<p>");
  396:  
  397:             // clean up
  398:             u = sb.ToString();
  399:             u = Regex.Replace(u, @"^\s+", "");
  400:             u = Regex.Replace(u, @">\s+", ">");
  401:             u = Regex.Replace(u, @"\s+<", "<");
  402:             u = Regex.Replace(u, @"\s+", " ");
  403:             u = Regex.Replace(u, @"\n+", @"<br/>"); // keep newlines
  404:             //u = Regex.Replace(u, @"</ul>(.+?)</ul>", "</ul><p>$1</p></ul>");
  405:             //u = Regex.Replace(u, @"</ul>(.+?)</p>", "</ul><p>$1</p></p>");
  406:             //u = u.Replace(@"�", "<p/>&nbsp;&nbsp;&nbsp;�&nbsp;");
  407:             */
  408:  
  409:             return s;
  410:         }
  411:  
  412:         ////////////////////////////////////////////////////////////////////////////
  413:  
  414:         /// <summary>
  415:         ///
  416:         /// </summary>
  417:         public static string TextToHtml2(string source)
  418:         {
  419:             // clean regular text format pages and return an equivalent html format
  420:  
  421:             string s;
  422:  
  423:             s = Decode(source);
  424:             //s = Ia.Cl.Models.Html.Html_Strip(s);
  425:             s = Regex.Replace(s, @"\.", @". ");
  426:             s = Regex.Replace(s, @"[ ]+", @" ");
  427:             s = s.Replace("\r", "");
  428:             s = s.Replace("\n+", "\n");
  429:             s = "<p>" + s.Replace("\n", "</p>\n<p>") + "</p>";
  430:  
  431:             /*
  432:             s = s.Replace("\n", "</p>\n<p>");
  433:  
  434:             // clean up
  435:             u = sb.ToString();
  436:             u = Regex.Replace(u, @"^\s+", "");
  437:             u = Regex.Replace(u, @">\s+", ">");
  438:             u = Regex.Replace(u, @"\s+<", "<");
  439:             u = Regex.Replace(u, @"\s+", " ");
  440:             u = Regex.Replace(u, @"\n+", @"<br/>"); // keep newlines
  441:             //u = Regex.Replace(u, @"</ul>(.+?)</ul>", "</ul><p>$1</p></ul>");
  442:             //u = Regex.Replace(u, @"</ul>(.+?)</p>", "</ul><p>$1</p></p>");
  443:             //u = u.Replace(@"�", "<p/>&nbsp;&nbsp;&nbsp;�&nbsp;");
  444:             */
  445:  
  446:             return s;
  447:         }
  448:  
  449:         ////////////////////////////////////////////////////////////////////////////
  450:  
  451:         /// <summary>
  452:         ///
  453:         /// </summary>
  454:         public static string TextToHtmlAndOl_Ul_LiToBr(string source)
  455:         {
  456:             // clean regular text format pages and return an equivalent html format
  457:  
  458:             string s;
  459:  
  460:             s = Decode(source);
  461:             s = Regex.Replace(s, @"\.", @". ");
  462:             s = Regex.Replace(s, @"[ ]+", @" ");
  463:             s = s.Replace("\r", "");
  464:             s = s.Replace("\n+", "\n");
  465:  
  466:             s = s.Replace("<ol>", "<br/> <br/>");
  467:             s = s.Replace("</ol>", "");
  468:             s = s.Replace("<ul>", "<br/> <br/>");
  469:             s = s.Replace("</ul>", "");
  470:             s = s.Replace("<li>", "-");
  471:             s = s.Replace("</li>", "<br/>");
  472:  
  473:             return s;
  474:         }
  475:  
  476:         ////////////////////////////////////////////////////////////////////////////
  477:  
  478:         /// <summary>
  479:         ///
  480:         /// <see href="http://madskristensen.net/post/remove-whitespace-from-your-pages"/>
  481:         /// </summary>
  482:         public static string RemoveWhitespaceFromHtml(string html)
  483:         {
  484:             // for now we will skip if page has <pre>
  485:  
  486:             if (!html.Contains("<pre>"))
  487:             {
  488:                 html = regexBetweenTags.Replace(html, "> <");
  489:                 html = regexLineBreaks.Replace(html, string.Empty);
  490:             }
  491:  
  492:             return html.Trim();
  493:         }
  494:  
  495:         ////////////////////////////////////////////////////////////////////////////
  496:         ////////////////////////////////////////////////////////////////////////////
  497:     }
  498: }