Reputation: 11335
Im using this code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
namespace HtmlParser
{
public partial class Form1 : Form
{
// The HtmlWeb class is a utility class to get the HTML over HTTP
HtmlWeb htmlWeb = new HtmlWeb();
// Creates an HtmlDocument object from an URL
HtmlAgilityPack.HtmlDocument document;
// Targets a specific node
HtmlNode someNode;
public Form1()
{
InitializeComponent();
document = htmlWeb.Load("http://www.walla.co.il");
someNode = document.GetElementbyId("mynode");
// If there is no node with that Id, someNode will be null
if (someNode != null)
{
// Extracts all links within that node
IEnumerable<HtmlNode> allLinks = someNode.Descendants("a");
// Outputs the href for external links
foreach (HtmlNode link in allLinks)
{
// Checks whether the link contains an HREF attribute
if (link.Attributes.Contains("href"))
{
// Simple check: if the href begins with "http://", prints it out
if (link.Attributes["href"].Value.StartsWith("http://"))
richTextBox1.Text = link.Attributes["href"].Value.ToString();
}
}
}
}
private void Form1_Load(object sender, EventArgs e)
{
}
}
}
But it never pass the line :
someNode = document.GetElementbyId("mynode");
Used a breakpoint on this line and its giving me a message: No Source Is Available If im not using a break point nothing happen the program is running but im not getting any errors but it also doesn't work.
What should i do ? I didn't understand what should i put there instead of "my node"
Upvotes: 0
Views: 144
Reputation: 838916
The probem is trying to use regex to parse HTML.
The specific cause of the error is that you have a ?
and a newline character that shouldn't be there, and this causes the regular expression to be invalid.
You can fix it by using HtmlAgilityPack instead.
Upvotes: 2