Network C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.ComponentModel;
namespace NearForums
{
  public static class Utils
  {
    /// 
    /// sanitize any potentially dangerous tags from the provided raw HTML input using 
    /// a whitelist based approach, leaving the "safe" HTML tags
    /// CODESNIPPET:4100A61A-1711-4366-B0B0-144D1179A937 / http://refactormycode.com/codes/333-sanitize-html
    /// 

    /// Html to sanitize
    /// Regex containing the allowed name of the html elements. For example: em|h(2|3|4)|strong|p
    public static string SanitizeHtml(string html, string whiteListTags = "b(lockquote)?|code|d(d|t|l|el)|em|h(1|2|3|4)|i|kbd|li|ol|p(re)?|s(ub|up|trong|trike)?|ul|a|img")
    {
      #region Regex definitions
      Regex tagsRegex = new Regex("<[^>]*(>|$)",
        RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.Compiled);
      Regex cleanupRegex = new Regex("((?<=<\\w+[^>]*)(?!\\shref|\\sclass|\\srel|\\stitle|\\sclass|\\swidth|\\sheight|\\salt|\\ssrc)(\\s[\\w-]+)=[\"']?((?:.(?![\"']?\\s+(?:\\S+)=|[>\"']))+.)[\"']?)|((?<=]*)\\sclass=\"MsoNormal\")",
          RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase | RegexOptions.Compiled);
      Regex whitelistRegex = new Regex("^$|^<(b|h)r\\s?/?>$",
        RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace);
      Regex whitelistAnchorRegex = new Regex(@"
      ^      href=""(\#\w+|(https?|ftp)://[-a-z0-9+&@#/%?=~_|!:,.;\(\)]+)""
      (
      (\sclass=""([\w-]+)"")|(\stitle=""[^""<>]+"")|
      (\srel=""nofollow""))*
      \s?>$|
      ^$",
        RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace);
      Regex whitelistImageRegex = new Regex(@"
      ^      src=""https?://[-a-z0-9+&@#/%?=~_|!:,.;\(\)]+""
      ((\swidth=""\d{1,3}"")|
      (\sheight=""\d{1,3}"")|
      (\salt=""[^""<>]*"")|
      (\stitle=""[^""<>]*""))*
      \s?/?>$",
        RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace);
      #endregion
      if (String.IsNullOrEmpty(html))
        return html;
      //Do a previous cleanup, for not not allowed attributes included comming from word
      html = cleanupRegex.Replace(html, "");
      string tagname;
      Match tag;
      // match every HTML tag in the input
      MatchCollection tags = tagsRegex.Matches(html);
      for (int i = tags.Count - 1; i > -1; i--)
      {
        tag = tags[i];
        tagname = tag.Value.ToLowerInvariant();
        if (!(whitelistRegex.IsMatch(tagname) || whitelistAnchorRegex.IsMatch(tagname) || whitelistImageRegex.IsMatch(tagname)))
        {
          html = html.Remove(tag.Index, tag.Length);
          System.Diagnostics.Debug.WriteLine("tag sanitized: " + tagname);
        }
      }
      return html;
    }
  }
}