在用WebBrowser编程实现网页操作自动化时,常要分析网页Html,例如网页在加载数据时,常会显示“系统处理中,请稍候..”,我们需要在数据加载完成后才能继续下一步操作,如何抓取这个信息的网页html元素变化,从而判断数据加载完毕呢?用IE开发者工具是不可能抓取到的,太快了。(当然,设置足够长的延时,也是可以实现的,只是不够科学及稳妥,毕竟有时因为网络原因,数据加载时间可能超过原来设定时间,其次,设置延时过长也导致程序不够友好)
实现的办法:
1、先用“系统处理中”查找(泛查找),并在找到html中,再细找缩小html元素范围。
bb = FindHtmlElement("系统处理中", ExtendedWebBrowser1.Document, "", "InnerText", false)
2、添加一个Timer控件,设定100毫秒。根据 1中找到的元素,进行不断抓取,并将抓到的结果输出到文本。
3、将2中输出,导入Excel,进行筛选,并从中找到重复次数少的行,便是数据加载、加载完成之间的变化。
- Private Sub TimerProgress_Tick(sender As Object, e As EventArgs) Handles TimerProgress.Tick
-
- If Gethtmel Then
-
- Dim bb As HtmlElement
-
- bb = FindHtmlElement("all_jzts", ExtendedWebBrowser1.Document, "div", "id", True)
-
- If Not bb Is Nothing Then
-
- 'WriteRunLog("Style : " + bb.Style)
-
- WriteRunLog(bb.OuterHtml)
-
- Else
-
- WriteRunLog("all_jzts没找到")
-
- End If
-
- bb = FindHtmlElement("jzts", ExtendedWebBrowser1.Document, "div", "id", True)
-
- If Not bb Is Nothing Then
-
- 'WriteRunLog("Style : " + bb.Style)
-
- WriteRunLog(bb.OuterHtml)
-
- Else
-
- WriteRunLog("jzts没找到")
-
- End If
-
- 'Gethtmel = False
-
- End If
-
- '系统处理中,请稍候...
-
- Application.DoEvents()
-
- End Sub
- Function FindHtmlElement(ByVal FindText As String, ByVal doc As HtmlDocument, ByVal cTagName As String, ByVal cGetAttribute As String, Optional ByVal StrictMatching As Boolean = False) As HtmlElement
-
- 'cTagName:检索具有指定 html 标记的元素,标记需要输入完整的,缺省时查找所有。
-
- '例如:,不能只输入"i",需要输入"input"
-
- 'cGetAttribute :比较的属性类型,取值为:Id、InnerText、Name、title、classname、value、
-
- 'Id、InnerText可以通过GetAttribute获取,也可以通过HtmlElement.Id、HtmlElement.InnerText获取,所以代码简化为用GetAttribute获取。
-
- 'doc:WebBrowserExt1.Document
-
- 'GetAttribute("classname") '例如显示class="commonTable"的值commonTable
-
- 'StrictMatching:True严格匹配FindText
-
- 'WriteRunLog("FindHtmlElement开始:" + FindText)
-
- Try
-
- Dim i, k As Integer
-
- FindHtmlElement = Nothing
-
- FindHtmlElementOfDocument = doc
-
- If doc Is Nothing Then '2023.11.15在递归调用中,因为有些iFrames还未真正加载,从而导致传入的doc = doc.Window.Frames.Item(k).Document 为 Nothing ,从而引发异常:未将对象引用设置到对象的实例。
-
- Exit Function
-
- End If
-
-
-
- If LCase(cGetAttribute) = "innertext" Then 'InnerText必须严格匹配,否则找到的结果是错误的。
-
- ’StrictMatching = True
-
- End If
-
-
-
- If cTagName <> "" Then
-
- Dim EE As HtmlElementCollection = doc.GetElementsByTagName(cTagName)
-
- For i = 0 To EE.Count - 1
-
- If InStr(EE.Item(i).GetAttribute(cGetAttribute), FindText) > 0 _
-
- And (Not StrictMatching Or InStr(FindText, EE.Item(i).GetAttribute(cGetAttribute)) > 0) Then
-
-
-
- FindHtmlElement = EE.Item(i)
-
- 'WriteRunLog("Loop1")
-
- 'WriteRunLog("FindHtmlElement结束0")
-
- Exit Function '找到就退出
-
- End If
-
- Next
-
- Else
-
- For i = 0 To doc.All.Count - 1
-
- If InStr(doc.All.Item(i).GetAttribute(cGetAttribute), FindText) > 0 _
-
- And (Not StrictMatching Or InStr(FindText, doc.All.Item(i).GetAttribute(cGetAttribute)) > 0) And (cTagName = "" Or LCase(cTagName) = LCase(doc.All.Item(i).TagName)) Then
-
- FindHtmlElement = doc.All.Item(i)
-
- 'WriteRunLog("Loop1")
-
- 'WriteRunLog("FindHtmlElement结束0")
-
- Exit Function '找到就退出
-
- End If
-
- Next
-
- End If
-
- '上面没找到,进行递归调用,递归会查找所有嵌套的Frame。
-
- For k = 0 To doc.Window.Frames.Count - 1
-
- 'If k = 0 Then
-
- ' WriteRunLog("递归调用 doc.Window.Frames.Count:" + doc.Window.Frames.Count.ToString) 'For Test
-
- 'End If
-
- '2018.3.14 直接 递归调用
-
- 'WriteRunLog("递归调用:" + Str(k))
-
- ' WriteRunLog("doc.Window.Frames.Item(k).Name:" + doc.Window.Frames.Item(k).Name)
-
- FindHtmlElementOfDocument = doc.Window.Frames.Item(k).Document
-
- FindHtmlElement = FindHtmlElement(FindText, doc.Window.Frames.Item(k).Document, cTagName, cGetAttribute, StrictMatching)
-
- If Not FindHtmlElement Is Nothing Then '找到就退出循环
-
- 'WriteRunLog("FindHtmlElement结束1")
-
- Exit Function
-
- End If
-
- Next
-
- Catch ex As Exception
-
- FindHtmlElement = Nothing
-
- WriteRunLog("FindHtmlElement发生异常:" + ex.Message)
-
- End Try
-
- End Function
-
-
- Sub WriteRunLog(ByVal MyMsg As String)
-
- 'Using w As StreamWriter = File.AppendText("RunLog.txt")
-
- Dim w As StreamWriter
-
- If File.Exists("RunLog.txt") Then
-
- If My.Computer.FileSystem.GetFileInfo("RunLog.txt").Length > 10485760 Then '2017.5.4 文件大于10M,清0
-
- w = File.CreateText("RunLog.txt")
-
- w.Write("文件大于10M,置0从头开始!")
-
- w.Write(Chr(9))
-
- Else
-
- w = File.AppendText("RunLog.txt")
-
- End If
-
- Else
-
- w = File.CreateText("RunLog.txt")
-
- End If
-
- w.Write(Now)
-
- w.Write(Chr(9)) '插入Tab键
-
- w.WriteLine(MyMsg)
-
- w.Flush()
-
- w.Close()
-
- 'End Using
-
- End Sub