Automating Web Query import

R

Roger on Excel

Thanks guys for all your help.

I am incredibly thankful for all your time trying to help me.

I wish you all the best.

Have a great weekend,

Roger
 
J

Joel

I wanted to do try to get the data using Ron's method to compare techniques.
I have had problems with using the getElementsByTagname() method. I always
seem to have problems finding all the data. Thought I would try again.

The IE method started working out easier than the Query method. The
Internet Method was even better at doing some error checks and finding
missing properties. Got everything working with 5 chemicals. Then increase
to 10 and found some missing properties in Important Items. Made Some minor
changes. Then went to 20 chemicals and got stuck. It took me 8 hours to
figure out the correct methods to get the last chemical working properly. It
seems there is minor differences between the chemical webpages that weren't
obvious.

Here is the new code

Const ChemNameCol = 1
Const GenericNameCol = ChemNameCol + 1
Const ISCSCol = GenericNameCol + 1
Const CASNoCol = ISCSCol + 1
Const RTECSNoCol = CASNoCol + 1
Const UNNoCol = RTECSNoCol + 1
Const ECNoCol = UNNoCol + 1
Const MolecularFormCol = ECNoCol + 1
Const AltNameCol = MolecularFormCol + 1
Const MoleMassCol = AltNameCol + 1

'Group of 3 Columns
Const FireHazCol = MoleMassCol + 1
Const ExplosHazCol = FireHazCol + 3
Const ExposureCol = ExplosHazCol + 3
Const InhalCol = ExposureCol + 3
Const SkinCol = InhalCol + 3
Const EyesCol = SkinCol + 3
Const IngestCol = EyesCol + 3

Const SpillDisposCol = IngestCol + 3
Const PackCol = SpillDisposCol + 1
Const EmergRespCol = PackCol + 1
Const SafeStorCol = EmergRespCol + 1

Const PhysStateCol = SafeStorCol + 1
Const RoutesCol = PhysStateCol + 1
Const ChemDangCol = RoutesCol + 1
Const InhalRiskCol = ChemDangCol + 1
Const OccupatCol = InhalRiskCol + 1
Const ShortTermCol = OccupatCol + 1
Const LongTermCol = ShortTermCol + 1
Const PhysDangerCol = LongTermCol + 1

Const PhysicPropCol = PhysDangerCol + 1
Const EnvironCol = PhysicPropCol + 1
Const NoteCol = EnvironCol + 1
Const LastCol = NoteCol

Sub MakeHeaders()

With Sheets("Data")

.Cells(1, ChemNameCol) = "Chemical Name"
.Cells(1, GenericNameCol) = "Generic Name(s)"
.Cells(1, ISCSCol) = "ISCS No"
.Cells(1, CASNoCol) = "CAS No"
.Cells(1, RTECSNoCol) = "RTECS No"
.Cells(1, UNNoCol) = "UN No"
.Cells(1, ECNoCol) = "EC No"
.Cells(1, MolecularFormCol) = "Molucular Formula"
.Cells(1, AltNameCol) = "Alternate Names"
.Columns(AltNameCol).WrapText = True
.Cells(1, MoleMassCol) = "Molecular Mass"

.Range(.Cells(1, FireHazCol), .Cells(1, FireHazCol + 2)).MergeCells = True
.Cells(1, FireHazCol) = "Fire Hazard"
.Cells(1, FireHazCol).HorizontalAlignment = xlCenter
.Cells(2, FireHazCol) = "Acute Hazard/Symptoms"
.Cells(2, FireHazCol + 1) = "Prevention"
.Cells(2, FireHazCol + 2) = "First Aid/Fire Fighting"

.Range(.Cells(1, ExplosHazCol), _
.Cells(1, ExplosHazCol + 2)).MergeCells = True
.Cells(1, ExplosHazCol) = "Explosion Hazard"
.Cells(1, ExplosHazCol).HorizontalAlignment = xlCenter
.Cells(2, ExplosHazCol) = "Acute Hazard/Symptoms"
.Cells(2, ExplosHazCol + 1) = "Prevention"
.Cells(2, ExplosHazCol + 2) = "First Aid/Fire Fighting"

.Range(.Cells(1, ExposureCol), _
.Cells(1, ExposureCol + 2)).MergeCells = True
.Cells(1, ExposureCol) = "Exposure"
.Cells(1, ExposureCol).HorizontalAlignment = xlCenter
.Cells(2, ExposureCol) = "Acute Hazard/Symptoms"
.Cells(2, ExposureCol + 1) = "Prevention"
.Cells(2, ExposureCol + 2) = "First Aid/Fire Fighting"

.Range(.Cells(1, InhalCol), .Cells(1, InhalCol + 2)).MergeCells = True
.Cells(1, InhalCol) = "Inhalation Exposure"
.Cells(1, InhalCol).HorizontalAlignment = xlCenter
.Cells(2, InhalCol) = "Acute Hazard/Symptoms"
.Cells(2, InhalCol + 1) = "Prevention"
.Cells(2, InhalCol + 2) = "First Aid/Fire Fighting"

.Range(.Cells(1, SkinCol), .Cells(1, SkinCol + 2)).MergeCells = True
.Cells(1, SkinCol) = "Skin Exposure"
.Cells(1, SkinCol).HorizontalAlignment = xlCenter
.Cells(2, SkinCol) = "Acute Hazard/Symptoms"
.Cells(2, SkinCol + 1) = "Prevention"
.Cells(2, SkinCol + 2) = "First Aid/Fire Fighting"

.Range(.Cells(1, EyesCol), .Cells(1, EyesCol + 2)).MergeCells = True
.Cells(1, EyesCol) = "Eyes Exposure"
.Cells(1, EyesCol).HorizontalAlignment = xlCenter
.Cells(2, EyesCol) = "Acute Hazard/Symptoms"
.Cells(2, EyesCol + 1) = "Prevention"
.Cells(2, EyesCol + 2) = "First Aid/Fire Fighting"

.Range(.Cells(1, IngestCol), .Cells(1, IngestCol + 2)).MergeCells = True
.Cells(1, IngestCol) = "Ingestion Exposure"
.Cells(1, IngestCol).HorizontalAlignment = xlCenter
.Cells(2, IngestCol) = "Acute Hazard/Symptoms"
.Cells(2, IngestCol + 1) = "Prevention"
.Cells(2, IngestCol + 2) = "First Aid/Fire Fighting"

.Cells(1, SpillDisposCol) = "Spillage Disposal"
.Cells(1, PackCol) = "Packaging and Labelling"
.Columns(PackCol).WrapText = True
.Cells(1, EmergRespCol) = "Emergency Response"
.Cells(1, SafeStorCol) = "Safe Storage"
.Columns(SafeStorCol).WrapText = True

.Cells(1, PhysStateCol) = "Physical State; Appearance"
.Cells(1, RoutesCol) = "Routes of Exposure"
.Cells(1, ChemDangCol) = "Chemical Dangers"
.Cells(1, InhalRiskCol) = "Inhalation Risk"
.Cells(1, OccupatCol) = "Occupational exposure limits"
.Cells(1, ShortTermCol) = "Effects of short-term exposure"
.Cells(1, LongTermCol) = "Effects of long-term or repeated exposure"
.Cells(1, PhysDangerCol) = "Physical Dangers"

.Cells(1, PhysicPropCol) = "PHYSICAL PROPERTIES"
.Cells(1, EnvironCol) = "ENVIRONMENTAL DATA"
.Cells(1, NoteCol) = "NOTES"

Range("A1:A" & LastCol).EntireColumn.AutoFit
End With

End Sub

Sub GetData2()

Found = False
For Each sht In Sheets
If sht.Name = "Data" Then
Found = True
Exit For
End If
Next sht
If Found = False Then
Set DataSht = Sheets.Add(after:=Sheets(Sheets.Count))
DataSht.Name = "Data"
Else
Set DataSht = Sheets("Data")
DataSht.Cells.ClearContents
End If

Call MakeHeaders

Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True

Set ChemicalSht = Sheets("Chemicals")
With ChemicalSht
FirstRow = 1
LastRow = .Range("A" & Rows.Count).End(xlUp).Row
LastRow = 20
Set Chemicals = .Range("A" & FirstRow & ":A" & LastRow)

NewRowCount = FirstRow + 3
For Each Chemical In Chemicals

URL = Chemical.Offset(0, 1)

'get web page
ie.Navigate2 URL
Do While ie.readyState <> 4
DoEvents
Loop

Do While ie.busy = True
DoEvents
Loop

Set cTables = ie.Document.getElementsByTagname("table")
Call MoveData2(cTables, Chemical, NewRowCount)
NewRowCount = NewRowCount + 1

Next Chemical
.Range("A1:A" & LastCol).EntireColumn.AutoFit

End With

DataSht.Columns(NoteCol).ColumnWidth = 50
DataSht.Rows("1:" & NewRowCount).VerticalAlignment = xlTop

End Sub

Sub MoveData2(cTables, Chemical, RowCount)

Dim StrNumber As String

LF = Chr(10)
CR = Chr(13)


'With Sheets("Test")
' .Cells.ClearContents
' RowCount = 1
' For Each itm In cTables
' .Range("A" & RowCount) = itm.classname
' .Range("B" & RowCount) = itm.innertext

' RowCount = RowCount + 1
' Next itm
'End With

Set DataSht = Sheets("Data")
'Use ICSC: to get chemical names
With DataSht
.Cells(RowCount, ChemNameCol) = Chemical

GenericText = cTables.Item(3).innertext
GenericText = Replace(GenericText, CR, "")
'Move Generic Name

.Cells(RowCount, GenericNameCol) = _
Trim(Left(GenericText, InStr(GenericText, ":") - 1))

'Move ISCS
.Cells(1, ISCSCol).NumberFormat = "@"
.Cells(RowCount, ISCSCol) = _
Trim(Mid(GenericText, InStr(GenericText, ":") + 1))

Alternate = cTables.Item(4).innertext
Alternate = Replace(Alternate, CR, "")
'Move Alternate Name
.Cells(RowCount, AltNameCol) = Alternate

'Get Additional ID Numbers
ID = cTables.Item(5).innertext
SplitData = Split(ID, CR)
For i = LBound(SplitData) To UBound(SplitData)
'get each line line
itm = Trim(SplitData(i))
itm = Replace(itm, LF, "")
'if no colon sign then molecular mass
If InStr(itm, ":") > 0 Then
'split name and number using colon sign
StrNumber = Trim(Mid(itm, InStr(itm, ":") + 1))
itm = Trim(Left(itm, InStr(itm, ":") - 1))
ID = Trim(Mid(ID, InStr(ID, CR) + 1))

Select Case itm
Case "CAS No":
'Move CASAN Name
.Cells(RowCount, CASNoCol).NumberFormat = "@"
.Cells(RowCount, CASNoCol) = StrNumber

Case "RTECS No":
'Move RTECS
.Cells(RowCount, RTECSNoCol).NumberFormat = "@"
.Cells(RowCount, RTECSNoCol) = StrNumber

Case "UN No":
'Move UN No
.Cells(RowCount, UNNoCol).NumberFormat = "@"
.Cells(RowCount, UNNoCol) = StrNumber

Case "EC No":
.Cells(RowCount, ECNoCol).NumberFormat = "@"

'split string number from molecular formula
StrNumber = _
Trim(Left(StrNumber, InStr(StrNumber, " ") - 1))

'Move EC No
.Cells(RowCount, ECNoCol) = StrNumber

Case "Molecular mass":
.Cells(RowCount, MoleMassCol) = StrNumber

End Select
End If
Next i

'Get molecular formula
Molecular = cTables.Item(5).Cells.Item(2).innertext
'Remove extra data in front of chemical formula
If Left(Molecular, 1) = "(" Then
Molecular = Mid(Molecular, InStr(Molecular, CR) + 1)
End If
If InStr(Molecular, CR) > 0 Then
Molecular = _
Trim(Left(Molecular, InStr(Molecular, CR) - 1))
Else
Molecular = Trim(Molecular)
End If
'Move Molecular Formula
Molecular = Molecular

For Each TableRow In cTables.Item(6).Rows
Select Case UCase(TableRow.Cells(0).innertext)
Case "FIRE":
'Move Fire hazard
.Cells(RowCount, FireHazCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, FireHazCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, FireHazCol + 2) = _
TableRow.Cells(3).innertext
Case "EXPLOSION":
'Move Explosion Hazard
.Cells(RowCount, ExplosHazCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, ExplosHazCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, ExplosHazCol + 2) = _
TableRow.Cells(3).innertext
Case "EXPOSURE":
'Move Exposure
.Cells(RowCount, ExposureCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, ExposureCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, ExposureCol + 2) = _
TableRow.Cells(3).innertext
Case "INHALATION":
'Move Inhalation Exposure
.Cells(RowCount, InhalCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, InhalCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, InhalCol + 2) = _
TableRow.Cells(3).innertext
Case "SKIN":
'Move Skin Exposure
.Cells(RowCount, SkinCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, SkinCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, SkinCol + 2) = _
TableRow.Cells(3).innertext
Case "EYES":
'Move Eyes Exposure
.Cells(RowCount, EyesCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, EyesCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, EyesCol + 2) = _
TableRow.Cells(3).innertext
Case "INGESTION":
'Move Ingestion Exposure
.Cells(RowCount, IngestCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, IngestCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, IngestCol + 2) = _
TableRow.Cells(3).innertext
End Select
Next TableRow

Set SpillRow = cTables.Item(7).Rows(1)

'Move SPILLAGE DISPOSAL
.Cells(RowCount, SpillDisposCol) = _
SpillRow.Cells(0).innertext

'Move PACKAGING & LABELING
'Combine columns 2 & 3 together
If SpillRow.Cells.Length = 2 Then
Pack = SpillRow.Cells(1).innertext
Else
Pack = SpillRow.Cells(1).innertext & _
LF & SpillRow.Cells(2).innertext
End If
Pack = Replace(Pack, CR, "")
.Cells(RowCount, PackCol) = Pack

Set EmergencyRow = cTables.Item(8).Rows(1)

'Move Emergency Response
Emergency = EmergencyRow.Cells(0).innertext
Emergency = Replace(Emergency, CR, "")
.Cells(RowCount, EmergRespCol) = Emergency

'Move Safe Storage
.Cells(RowCount, SafeStorCol) = _
EmergencyRow.Cells(1).innertext

Set ImportantRow = cTables.Item(9).Rows(1)
Cols = ImportantRow.Cells.Length
For ColCount = 0 To (Cols - 1)
Set cell = ImportantRow.Cells(ColCount)
Set B = cell.getElementsByTagname("B")
Set P = cell.getElementsByTagname("P")
First = True
Done = False
Do
If First = True Then
Title = B(0).innertext
Title = Trim(Replace(Title, ":", ""))
Title = Trim(Replace(Title, LF, ""))
Detail = B(0).nextsibling.nextsibling.data
PCount = 0
First = False
Else
Item = P(PCount).innertext
If Item <> "" Then
Title = Left(Item, InStr(Item, CR) - 1)
Title = Trim(Replace(Title, ":", ""))
Title = Trim(Replace(Title, LF, ""))

Detail = Mid(Item, InStr(Item, CR) + 1)
If Left(Detail, 1) = LF Then
Detail = Mid(Detail, 2)
End If
Else
Title = ""
End If
PCount = PCount + 1
End If

If Title <> "" Then
Select Case UCase(Title)
Case "PHYSICAL STATE; APPEARANCE":
'Move Physical State; Appearance
.Cells(RowCount, PhysStateCol) = Detail
Case "ROUTES OF EXPOSURE":
'Move Routes of exposure
.Cells(RowCount, RoutesCol) = Detail
Case "CHEMICAL DANGERS":
'Move Chemical dangers
.Cells(RowCount, ChemDangCol) = Detail
Case "INHALATION RISK":
'Move Inhalation risk
.Cells(RowCount, InhalRiskCol) = Detail
Case "OCCUPATIONAL EXPOSURE LIMITS":
'Move Occupational exposure limits
.Cells(RowCount, OccupatCol) = Detail
Case "EFFECTS OF SHORT-TERM EXPOSURE":
'Move Effects of short-term exposure
.Cells(RowCount, ShortTermCol) = Detail
Case "EFFECTS OF LONG-TERM OR REPEATED EXPOSURE":
'Move Effects of long-term or repeated exposure
.Cells(RowCount, LongTermCol) = Detail
Case "PHYSICAL DANGERS":
'Move Physical Danagers
.Cells(RowCount, PhysDangerCol) = Detail
Case Else:
MsgBox ("Unknown Detail : " & _
Title & " : Stop")

Stop
End Select
End If

If PCount >= P.Length Then
Done = True
End If

Loop While Done = False
Next ColCount

'Move Physical Properties
Physical = cTables.Item(10).Rows(1).Cells(0).innertext
Physical = Replace(Physical, CR, "")
.Cells(RowCount, PhysicPropCol) = Physical

'Move ENVIRONMENTAL DATA
Environmental = cTables.Item(10).Rows(1).Cells(1).innertext
Environmental = Replace(Environmental, CR, "")
.Cells(RowCount, EnvironCol) = Environmental

'Move NOTES
Notes = cTables.Item(11).Rows(1).innertext
Notes = Replace(Notes, CR, "")
.Cells(RowCount, NoteCol) = Notes

End With

End Sub
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top