Regex is useful for parsing HTML fragments—particularly when the document may be imperfectly formed:
using System;
using System.Text.RegularExpressions;
class Program
{
static void Main(string[] args)
{
string r =
@"<(?'tag'\w+?).*>" + // match first tag, and name it 'tag'
@"(?'text'.*?)" + // match text content, name it 'textd'
@"\k'tag'>"; // match last tag, denoted by 'tag'
string text = "hello
";
Match m = Regex.Match (text, r);
Console.WriteLine (m.Groups ["tag"]); // h1
Console.WriteLine (m.Groups ["text"]); // hello
}
}
The output:
h1
hello