Private Sub Page_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load 'Put user code to initialize the page here End Sub Private Sub CategoriesURL_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles CategoriesURL.Click Dim URLs As String = "http://" & (txtsource.Text) Dim oRequest As WebRequest = WebRequest.Create(URLs) Dim oResponse As WebResponse = oRequest.GetResponse() Dim oStream As Stream = oResponse.GetResponseStream() Dim oStreamReader As New StreamReader(oStream, Encoding.UTF8) Dim comm As SqlCommand Dim adapter As SqlDataReader Dim connection As String Dim strConn As SqlConnection Dim ds As DataSet Dim querystring As String Dim strStart As String Dim strEnd As String strStart = 0 strEnd = 0 myString = oStreamReader.ReadToEnd() strStart = InStr(myString, txtStart.Text) strEnd = InStr(myString, txtstop.Text) myString = Mid(myString, strStart, strEnd - strStart) ' oResponse.Close() 'oStreamReader.Close() ' this is a regular expression to check for the urls Dim r As Regex = New Regex("a.*href\s*=\s*(?:""(?<1>[^""]*)""|(?<1>\S+))", RegexOptions.IgnoreCase Or RegexOptions.Compiled) ' get all the matches depending upon the regular expression Dim mcl As Match = r.Match(myString) Dim sMatch As String Dim cat As String While mcl.Success sMatch = ProcessURL(mcl.Groups(1).ToString, URLs) aMatch.Add(sMatch) mcl = mcl.NextMatch() End While Dim slink As String For Each slink In aMatch Dim b As String = slink.ToString ' Add the extracted urls to the array list a.Add(b) Next chkURL.DataSource = a chkURL.DataBind() ' assign arraylist to the datasource connection = "server=JOANNE;uid=sa;pwd=;database=VoiceNews" strConn = New SqlConnection(connection) querystring = "select CategoriesName, CategoriesID from Categories" comm = New SqlCommand comm.CommandText = querystring comm.Connection = strConn strConn.Open() adapter = comm.ExecuteReader() ctg.DataSource = adapter ctg.DataTextField = "CategoriesName" ctg.DataValueField = "CategoriesID" ctg.DataBind() strConn.Close() adapter.Close() End Sub Private Function ProcessURL(ByVal sInput As String, ByVal sURL As String) 'Find out if the sURL has a "/" after the Domain Name 'If not, give a "/" at the end 'First, check out for any slash after the 'Double Dashes of the http:// 'If there is NO slash, then end the sURL string with a SLASH If InStr(8, sURL, "/") = 0 Then sURL += "/" End If 'FILTERING 'Filter down to the Domain Name Directory from the Right Dim iCount As Integer For iCount = sURL.Length To 1 Step -1 If Mid(sURL, iCount, 1) = "/" Then sURL = Left(sURL, iCount) Exit For End If Next 'Filter out the ">" from the Left 'For iCount = sInput.Length To 1 Step -1 ' If Mid(sInput, iCount, 1) = ">" Then 'sInput = Left(sInput, iCount - 1) ' ' ElseIf Mid(sInput, iCount, 1) = ">" Then 'Stop and Take the Char before sInput = Left(sInput, iCount - 1) ' Exit For ' End If ' Next For iCount = 1 To sInput.Length If Mid(sInput, iCount, 4) = ">" Then sInput = Left(sInput, iCount - 1) 'Stop and Take the Char before Exit For End If Next 'Filter out unnecessary Characters sInput = sInput.Replace("<", Chr(39)) sInput = sInput.Replace(">", Chr(39)) sInput = sInput.Replace(""", Chr(34)) sInput = sInput.Replace("'", "") sInput = sInput.Replace("'", Chr(39)) If (sInput.IndexOf("http://") < 0) Then If (Not (sInput.StartsWith("/")) And Not (sURL.EndsWith("/"))) Then Return sURL & "/" & sInput Else If (sInput.StartsWith("/")) And (sURL.EndsWith("/")) Then Return sURL.Substring(0, sURL.Length - 1) + sInput Else Return sURL + sInput End If End If Else Return sInput End If End Function Sub LoadData() Dim strConn As String = "server=JOANNE;uid=sa;pwd=;database=VoiceNews" Dim strSQL As String = "Select CategoriesDetail.CategoriesSourceID ,CategoriesDetail.CategoriesURL ,CategoriesDetail.CategoriesID, Categories.CategoriesName From CategoriesDetail, Categories WHERE CategoriesDetail.CategoriesID=Categories.CategoriesID" Dim cmd As New SqlCommand(strSQL, New SqlConnection(strConn)) cmd.Connection.Open() DataGrid1.DataSource = cmd.ExecuteReader DataGrid1.DataBind() cmd.Connection.Close() cmd.Connection.Dispose() End Sub Private Sub addurl_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles addurl.Click Dim item As ListItem Dim comm As SqlCommand ' Dim adapter As SqlDataAdapter Dim connection As String Dim strConn As SqlConnection Dim ds As DataSet ' Dim querystring As String For Each item In chkURL.Items If item.Selected Then connection = "server=JOANNE;uid=sa;pwd=;database=VoiceNews" strConn = New SqlConnection(connection) 'querystring = "insert into(CategoriesURL, CategoriesType) values (@CategoriesURL, @CategoriesType)" comm = New SqlCommand("INSERT INTO CategoriesDetail(CategoriesURL,CategoriesID)VALUES('" & item.Text & "','" & ctg.SelectedItem.Value & "')", strConn) 'comm.CommandText = querystring ' comm.Connection = strConn 'ds = New DataSet 'comm.Parameters.Add("@CategoriesURL", SqlDbType.VarChar).Value = chkURL.Items(count).Value 'comm.Parameters.Add("@CategoriesType", SqlDbType.VarChar).Value = ctg.SelectedItem.Value comm.Connection.Open() comm.ExecuteNonQuery() comm.Connection.Close() End If Next chkURL.ClearSelection() LoadData() End Sub Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click Response.Redirect("NewsExtract.aspx") End Sub
News Source URL Start Extract ON Stop ON