<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="ScrapeHeadings" %>
Scraping Headings
File: Default.aspx.cs
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Net;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
public partial class ScrapeHeadings : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
litContent.Text = "";
}
protected void btnSearch_Click(object sender, EventArgs e)
{
try
{
WebClient client = new WebClient();
string content = client.DownloadString(txtUrl.Text);
Regex reg = new Regex(@".+ ", RegexOptions.IgnoreCase);
MatchCollection mc = reg.Matches(content);
foreach (Match m in mc)
{
litContent.Text += HttpUtility.HtmlEncode(m.Value) + "
";
}
}
catch
{
litContent.Text = "Could not connect to " + txtUrl.Text;
}
}
}