How to load specific range of data from html into excel?

E

Eric

Does anyone have any suggestions on how to load specific range of data from
html into excel?
For example, http://www.hkex.com.hk/markdata/quot/d100114e.htm#short_selling
I would like to load this link into excel, but this page is too long, and
the limitation of row for excel is 66536, so the specific section of html
cannot be loaded, does anyone have any suggestions on how to load the
specific section (short_selling) of html into excel?
Thanks in advance for any suggestions
Eric
 
P

Peter T

That's a 6+mb file, not practical to try a webQuery.

There are probably different approaches, the following one goes something
like this -

start downloading the file to memory in chunks
look for the 2nd "short_selling" (pos2)
then look for "Total" (pos3)
Start adding chunks from Total to a big string
Stop when "-------" is found (pos4)

Split the big string and dump to cells
Might want to do a bit more to put all the values in a table, but that's the
easy part which I'll leave to you.

This is very much bespoke, things could easily change which would require
the code to be modified. As of time of posting seems to work well,
particularly bearing in mind the size of the file

Const INTERNET_OPEN_TYPE_PRECONFIG = 0
Const INTERNET_FLAG_EXISTING_CONNECT = &H20000000

Private Declare Function InternetOpen Lib "wininet.dll" Alias _
"InternetOpenA" ( _
ByVal lpszAgent As String, _
ByVal dwAccessType As Long, _
ByVal lpszProxy As String, _
ByVal lpszProxyBypass As String, _
ByVal dwFlags As Long) As Long
Private Declare Function InternetOpenUrl Lib "wininet.dll" Alias _
"InternetOpenUrlA" ( _
ByVal hInternet As Long, _
ByVal lpszUrl As String, _
ByVal lpszHeaders As String, _
ByVal dwHeadersLength As Long, _
ByVal dwFlags As Long, _
ByRef dwContext As Long) As Long
Private Declare Function InternetCloseHandle Lib "wininet.dll" ( _
ByRef hInternet As Long) As Boolean ' byref or byval ??
Private Declare Function InternetReadFile Lib "wininet.dll" ( _
ByVal hFile As Long, _
ByVal lpBuffer As String, _
ByVal dwNumberOfBytesToRead As Long, _
ByRef lpdwNumberOfBytesRead As Long) As Integer
'' note ByVal lpBuffer As String, not ByRef as Any which can crash

Sub Short_selling_To_Cells()
Dim bGrab As Boolean
Dim iRes As Integer
Dim i As Long, nLen As Long
Dim pos1 As Long, pos2 As Long, pos3 As Long, pos4 As Long
Dim hInternetSession As Long, hUrl As Long
Dim nBytes As Long
Dim Buffer As String
Dim bigBuffer As String
Dim sErr As String
Dim arr() As String
Dim cnt As Long
Const cURL As String = "http://www.hkex.com.hk/markdata/quot/d100114e.htm"
Const cANC As String = "short_selling"

On Error GoTo errH

Range("A:F").Clear

'' assumes IE installed
hInternetSession = InternetOpen("IExpore.exe", _
INTERNET_OPEN_TYPE_PRECONFIG, _
vbNullString, vbNullString, 0)
If hInternetSession = 0 Then Err.Raise 10100

hUrl = InternetOpenUrl(hInternetSession, cURL, vbNullString, _
0, INTERNET_FLAG_EXISTING_CONNECT, 0)
If hUrl = 0 Then Err.Raise 10200

Buffer = Space(4096)
bigBuffer = Space(4096& * 8)

Do

iRes = InternetReadFile(hUrl, Buffer, Len(Buffer), nBytes)

If nBytes = 0 Or iRes = 0 Then Exit Do

If bGrab = False Then

If pos1 = 0 Then
pos1 = InStr(1, Buffer, cANC, vbTextCompare)
If pos1 And pos1 < 4096 Then
pos2 = InStr(pos1 + 1, Buffer, cANC, vbTextCompare)
End If
ElseIf pos2 = 0 Then

pos2 = InStr(1, Buffer, cANC, vbTextCompare)

If pos2 And pos2 < 4096 Then
pos3 = InStr(pos2 + 1, Buffer, "Total", vbTextCompare)
End If

ElseIf pos2 Then
pos3 = InStr(1, Buffer, "Total") + 1

End If

If pos3 Then
bGrab = True
If pos3 < (4096) Then
Buffer = Mid$(Buffer, pos3, Len(Buffer))
End If
End If

End If

If bGrab Then

pos4 = InStr(1, Buffer, "----------")

If pos4 Then
Buffer = Left$(Buffer, pos4 - 1)
End If


If nLen + Len(Buffer) > Len(bigBuffer) Then
bigBuffer = bigBuffer & Space(4096& * 8)
End If

Mid$(bigBuffer, nLen + 1, Len(Buffer)) = Buffer

nLen = nLen + Len(Buffer)

If pos4 Then
Exit Do
End If

End If

Loop


If bGrab Then
bigBuffer = Left$(bigBuffer, nLen)
arr = Split(bigBuffer, vbCrLf)

With Range("A1:A" & UBound(arr) + 1)
.Font.Name = "Courier New"
' .Value = arr
End With

For i = 0 To UBound(arr)
Cells(i + 1, 1) = arr(i)
Next
End If

done:

If hUrl Then InternetCloseHandle hUrl
If hInternetSession Then InternetCloseHandle hInternetSession

Exit Sub

errH:
Select Case Erl
Case 10100
sErr = "Error calling InternetOpen"
Case 10200
sErr = "Error calling InternetOpenUrl function"
Case Else
sErr = Err.Description
End Select

MsgBox sErr
' Stop
' Resume
Resume done

End Sub

Regards,
Peter T
 
M

Matthew Herbert

Does anyone have any suggestions on how to load specific range of data from
html into excel?
For example,http://www.hkex.com.hk/markdata/quot/d100114e.htm#short_selling
I would like to load this link into excel, but this page is too long, and
the limitation of row for excel is 66536, so the specific section of html
cannot be loaded, does anyone have any suggestions on how to load the
specific section (short_selling) of html into excel?
Thanks in advance for any suggestions
Eric

Eric,

Here is another way to get the data using XML. The macro
"GetShortData" will put the data onto the ActiveSheet, anchored in A1.

Best,

Matthew Herbert

Sub GetShortData()
Dim strRes As String
Dim strFind1 As String
Dim strFind2 As String
Dim lngPosStart As Long
Dim lngPosEnd As Long
Dim varArr As Variant
Dim Rng As Range
Const c_strURL As String = "http://www.hkex.com.hk/markdata/quot/
d100114e.htm"

strRes = GetXMLHTTP(c_strURL)

strFind1 = "SHORT SELLING TURNOVER - DAILY REPORT"
strFind2 = "PREVIOUS DAY'S ADJUSTED SHORT SELLING TURNOVER"

'get the second instance of strFind1
lngPosStart = InStr(1, strRes, strFind1)
lngPosStart = InStr(lngPosStart + 1, strRes, strFind1)

lngPosEnd = InStr(lngPosStart, strRes, strFind2)
strRes = Mid(strRes, lngPosStart, lngPosEnd - lngPosStart)

varArr = Split(strRes, vbCrLf)

Set Rng = ActiveSheet.Range("A1")
Set Rng = Range(Rng, Rng.Offset(UBound(varArr), 0))

Rng.Value = Application.Transpose(varArr)

End Sub

Function GetXMLHTTP(strURL As String) As String

Dim objXMLHTTP As Object
Dim strText As String

If strURL = "" Then
GetXMLHTTP = ""
Exit Function
End If

Set objXMLHTTP = CreateObject("Microsoft.XMLHTTP")

With objXMLHTTP
.Open "GET", strURL, False
.Send
strText = .responseText
End With

If objXMLHTTP.statusText = "OK" Then
GetXMLHTTP = strText
Else
GetXMLHTTP = ""
End If

End Function
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top