Using the RegEx Class in ASP.NET

Regex Is a pattern-matching tool that is available in all languages. In this article, I am using C#. Let's assume you want to extract valuable information from any webpage for research purposes; then, you can use regex to extract the data from the parsed HTML.
In the following example, the user will enter the website into the textbox and then click the button to extract the information from the downloaded page. Code is self-explanatory.


<%@ Page Language="C#" AutoEventWireup="true" CodeFile="RegX.aspx.cs" Inherits="RegX" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
   <title>Untitled Page</title>
</head>
<body>
   <form id="form1" runat="server">
       <div>
           <asp:Panel ID="panUrl" runat="server" GroupingText="Search" CssClass="myPanel">
               Enter Url:
               <asp:TextBox ID="txtUrl" runat="server" Columns="50" />
               <br />
               <asp:Button ID="btnSearch" runat="server" OnClick="btnSearch_Click" Text="Search" />
           </asp:Panel>
           <p>
           </p>
           <asp:Panel  ID="panHeadings" runat="server" GroupingText="Headings in this Url" CssClass="myPanel">
               <asp:Literal ID="litContent" runat="server" />
           </asp:Panel>
       </div>
   </form>
</body>
</html>
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Text.RegularExpressions;
using System.Net;

public partial class RegX : System.Web.UI.Page
{
    /// <summary>
    /// Each time the page loads, empty the literal control
    /// </summary>
    protected void Page_Load(object sender, EventArgs e)
    {
        litContent.Text = "";
    }
    /// <summary>
    /// Event handler for search button
    /// </summary>
    protected void btnSearch_Click(object sender, EventArgs e)
    {
        // Need to trap error in case of unresponsive URL
        try
        {
            // Use WebClient to download content at URL into a string
            WebClient client = new WebClient();
            string content = client.DownloadString(txtUrl.Text);
            // Match any of the H? tags
            Regex reg = new Regex(@"<h\d>.+</h\d>",
            RegexOptions.IgnoreCase);
            // Get a collection of all the matches
            MatchCollection mc = reg.Matches(content);
            // Iterate through the collection of matches
            foreach (Match m in mc)
            {
                // HTML encode the tag and display in literal
                litContent.Text += HttpUtility.HtmlEncode(m.Value) +
                "<br/>";
            }
        }
        catch
        {
            litContent.Text = "Could not connect to " + txtUrl.Text;
        }
    }
}

Post a Comment

Please do not post any spam link in the comment box😊

Previous Post Next Post

Blog ads

CodeGuru