I wanted to do try to get the data using Ron's method to compare techniques.
I have had problems with using the getElementsByTagname() method. I always
seem to have problems finding all the data. Thought I would try again.
The IE method started working out easier than the Query method. The
Internet Method was even better at doing some error checks and finding
missing properties. Got everything working with 5 chemicals. Then increase
to 10 and found some missing properties in Important Items. Made Some minor
changes. Then went to 20 chemicals and got stuck. It took me 8 hours to
figure out the correct methods to get the last chemical working properly. It
seems there is minor differences between the chemical webpages that weren't
obvious.
Here is the new code
Const ChemNameCol = 1
Const GenericNameCol = ChemNameCol + 1
Const ISCSCol = GenericNameCol + 1
Const CASNoCol = ISCSCol + 1
Const RTECSNoCol = CASNoCol + 1
Const UNNoCol = RTECSNoCol + 1
Const ECNoCol = UNNoCol + 1
Const MolecularFormCol = ECNoCol + 1
Const AltNameCol = MolecularFormCol + 1
Const MoleMassCol = AltNameCol + 1
'Group of 3 Columns
Const FireHazCol = MoleMassCol + 1
Const ExplosHazCol = FireHazCol + 3
Const ExposureCol = ExplosHazCol + 3
Const InhalCol = ExposureCol + 3
Const SkinCol = InhalCol + 3
Const EyesCol = SkinCol + 3
Const IngestCol = EyesCol + 3
Const SpillDisposCol = IngestCol + 3
Const PackCol = SpillDisposCol + 1
Const EmergRespCol = PackCol + 1
Const SafeStorCol = EmergRespCol + 1
Const PhysStateCol = SafeStorCol + 1
Const RoutesCol = PhysStateCol + 1
Const ChemDangCol = RoutesCol + 1
Const InhalRiskCol = ChemDangCol + 1
Const OccupatCol = InhalRiskCol + 1
Const ShortTermCol = OccupatCol + 1
Const LongTermCol = ShortTermCol + 1
Const PhysDangerCol = LongTermCol + 1
Const PhysicPropCol = PhysDangerCol + 1
Const EnvironCol = PhysicPropCol + 1
Const NoteCol = EnvironCol + 1
Const LastCol = NoteCol
Sub MakeHeaders()
With Sheets("Data")
.Cells(1, ChemNameCol) = "Chemical Name"
.Cells(1, GenericNameCol) = "Generic Name(s)"
.Cells(1, ISCSCol) = "ISCS No"
.Cells(1, CASNoCol) = "CAS No"
.Cells(1, RTECSNoCol) = "RTECS No"
.Cells(1, UNNoCol) = "UN No"
.Cells(1, ECNoCol) = "EC No"
.Cells(1, MolecularFormCol) = "Molucular Formula"
.Cells(1, AltNameCol) = "Alternate Names"
.Columns(AltNameCol).WrapText = True
.Cells(1, MoleMassCol) = "Molecular Mass"
.Range(.Cells(1, FireHazCol), .Cells(1, FireHazCol + 2)).MergeCells = True
.Cells(1, FireHazCol) = "Fire Hazard"
.Cells(1, FireHazCol).HorizontalAlignment = xlCenter
.Cells(2, FireHazCol) = "Acute Hazard/Symptoms"
.Cells(2, FireHazCol + 1) = "Prevention"
.Cells(2, FireHazCol + 2) = "First Aid/Fire Fighting"
.Range(.Cells(1, ExplosHazCol), _
.Cells(1, ExplosHazCol + 2)).MergeCells = True
.Cells(1, ExplosHazCol) = "Explosion Hazard"
.Cells(1, ExplosHazCol).HorizontalAlignment = xlCenter
.Cells(2, ExplosHazCol) = "Acute Hazard/Symptoms"
.Cells(2, ExplosHazCol + 1) = "Prevention"
.Cells(2, ExplosHazCol + 2) = "First Aid/Fire Fighting"
.Range(.Cells(1, ExposureCol), _
.Cells(1, ExposureCol + 2)).MergeCells = True
.Cells(1, ExposureCol) = "Exposure"
.Cells(1, ExposureCol).HorizontalAlignment = xlCenter
.Cells(2, ExposureCol) = "Acute Hazard/Symptoms"
.Cells(2, ExposureCol + 1) = "Prevention"
.Cells(2, ExposureCol + 2) = "First Aid/Fire Fighting"
.Range(.Cells(1, InhalCol), .Cells(1, InhalCol + 2)).MergeCells = True
.Cells(1, InhalCol) = "Inhalation Exposure"
.Cells(1, InhalCol).HorizontalAlignment = xlCenter
.Cells(2, InhalCol) = "Acute Hazard/Symptoms"
.Cells(2, InhalCol + 1) = "Prevention"
.Cells(2, InhalCol + 2) = "First Aid/Fire Fighting"
.Range(.Cells(1, SkinCol), .Cells(1, SkinCol + 2)).MergeCells = True
.Cells(1, SkinCol) = "Skin Exposure"
.Cells(1, SkinCol).HorizontalAlignment = xlCenter
.Cells(2, SkinCol) = "Acute Hazard/Symptoms"
.Cells(2, SkinCol + 1) = "Prevention"
.Cells(2, SkinCol + 2) = "First Aid/Fire Fighting"
.Range(.Cells(1, EyesCol), .Cells(1, EyesCol + 2)).MergeCells = True
.Cells(1, EyesCol) = "Eyes Exposure"
.Cells(1, EyesCol).HorizontalAlignment = xlCenter
.Cells(2, EyesCol) = "Acute Hazard/Symptoms"
.Cells(2, EyesCol + 1) = "Prevention"
.Cells(2, EyesCol + 2) = "First Aid/Fire Fighting"
.Range(.Cells(1, IngestCol), .Cells(1, IngestCol + 2)).MergeCells = True
.Cells(1, IngestCol) = "Ingestion Exposure"
.Cells(1, IngestCol).HorizontalAlignment = xlCenter
.Cells(2, IngestCol) = "Acute Hazard/Symptoms"
.Cells(2, IngestCol + 1) = "Prevention"
.Cells(2, IngestCol + 2) = "First Aid/Fire Fighting"
.Cells(1, SpillDisposCol) = "Spillage Disposal"
.Cells(1, PackCol) = "Packaging and Labelling"
.Columns(PackCol).WrapText = True
.Cells(1, EmergRespCol) = "Emergency Response"
.Cells(1, SafeStorCol) = "Safe Storage"
.Columns(SafeStorCol).WrapText = True
.Cells(1, PhysStateCol) = "Physical State; Appearance"
.Cells(1, RoutesCol) = "Routes of Exposure"
.Cells(1, ChemDangCol) = "Chemical Dangers"
.Cells(1, InhalRiskCol) = "Inhalation Risk"
.Cells(1, OccupatCol) = "Occupational exposure limits"
.Cells(1, ShortTermCol) = "Effects of short-term exposure"
.Cells(1, LongTermCol) = "Effects of long-term or repeated exposure"
.Cells(1, PhysDangerCol) = "Physical Dangers"
.Cells(1, PhysicPropCol) = "PHYSICAL PROPERTIES"
.Cells(1, EnvironCol) = "ENVIRONMENTAL DATA"
.Cells(1, NoteCol) = "NOTES"
Range("A1:A" & LastCol).EntireColumn.AutoFit
End With
End Sub
Sub GetData2()
Found = False
For Each sht In Sheets
If sht.Name = "Data" Then
Found = True
Exit For
End If
Next sht
If Found = False Then
Set DataSht = Sheets.Add(after:=Sheets(Sheets.Count))
DataSht.Name = "Data"
Else
Set DataSht = Sheets("Data")
DataSht.Cells.ClearContents
End If
Call MakeHeaders
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
Set ChemicalSht = Sheets("Chemicals")
With ChemicalSht
FirstRow = 1
LastRow = .Range("A" & Rows.Count).End(xlUp).Row
LastRow = 20
Set Chemicals = .Range("A" & FirstRow & ":A" & LastRow)
NewRowCount = FirstRow + 3
For Each Chemical In Chemicals
URL = Chemical.Offset(0, 1)
'get web page
ie.Navigate2 URL
Do While ie.readyState <> 4
DoEvents
Loop
Do While ie.busy = True
DoEvents
Loop
Set cTables = ie.Document.getElementsByTagname("table")
Call MoveData2(cTables, Chemical, NewRowCount)
NewRowCount = NewRowCount + 1
Next Chemical
.Range("A1:A" & LastCol).EntireColumn.AutoFit
End With
DataSht.Columns(NoteCol).ColumnWidth = 50
DataSht.Rows("1:" & NewRowCount).VerticalAlignment = xlTop
End Sub
Sub MoveData2(cTables, Chemical, RowCount)
Dim StrNumber As String
LF = Chr(10)
CR = Chr(13)
'With Sheets("Test")
' .Cells.ClearContents
' RowCount = 1
' For Each itm In cTables
' .Range("A" & RowCount) = itm.classname
' .Range("B" & RowCount) = itm.innertext
' RowCount = RowCount + 1
' Next itm
'End With
Set DataSht = Sheets("Data")
'Use ICSC: to get chemical names
With DataSht
.Cells(RowCount, ChemNameCol) = Chemical
GenericText = cTables.Item(3).innertext
GenericText = Replace(GenericText, CR, "")
'Move Generic Name
.Cells(RowCount, GenericNameCol) = _
Trim(Left(GenericText, InStr(GenericText, ":") - 1))
'Move ISCS
.Cells(1, ISCSCol).NumberFormat = "@"
.Cells(RowCount, ISCSCol) = _
Trim(Mid(GenericText, InStr(GenericText, ":") + 1))
Alternate = cTables.Item(4).innertext
Alternate = Replace(Alternate, CR, "")
'Move Alternate Name
.Cells(RowCount, AltNameCol) = Alternate
'Get Additional ID Numbers
ID = cTables.Item(5).innertext
SplitData = Split(ID, CR)
For i = LBound(SplitData) To UBound(SplitData)
'get each line line
itm = Trim(SplitData(i))
itm = Replace(itm, LF, "")
'if no colon sign then molecular mass
If InStr(itm, ":") > 0 Then
'split name and number using colon sign
StrNumber = Trim(Mid(itm, InStr(itm, ":") + 1))
itm = Trim(Left(itm, InStr(itm, ":") - 1))
ID = Trim(Mid(ID, InStr(ID, CR) + 1))
Select Case itm
Case "CAS No":
'Move CASAN Name
.Cells(RowCount, CASNoCol).NumberFormat = "@"
.Cells(RowCount, CASNoCol) = StrNumber
Case "RTECS No":
'Move RTECS
.Cells(RowCount, RTECSNoCol).NumberFormat = "@"
.Cells(RowCount, RTECSNoCol) = StrNumber
Case "UN No":
'Move UN No
.Cells(RowCount, UNNoCol).NumberFormat = "@"
.Cells(RowCount, UNNoCol) = StrNumber
Case "EC No":
.Cells(RowCount, ECNoCol).NumberFormat = "@"
'split string number from molecular formula
StrNumber = _
Trim(Left(StrNumber, InStr(StrNumber, " ") - 1))
'Move EC No
.Cells(RowCount, ECNoCol) = StrNumber
Case "Molecular mass":
.Cells(RowCount, MoleMassCol) = StrNumber
End Select
End If
Next i
'Get molecular formula
Molecular = cTables.Item(5).Cells.Item(2).innertext
'Remove extra data in front of chemical formula
If Left(Molecular, 1) = "(" Then
Molecular = Mid(Molecular, InStr(Molecular, CR) + 1)
End If
If InStr(Molecular, CR) > 0 Then
Molecular = _
Trim(Left(Molecular, InStr(Molecular, CR) - 1))
Else
Molecular = Trim(Molecular)
End If
'Move Molecular Formula
Molecular = Molecular
For Each TableRow In cTables.Item(6).Rows
Select Case UCase(TableRow.Cells(0).innertext)
Case "FIRE":
'Move Fire hazard
.Cells(RowCount, FireHazCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, FireHazCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, FireHazCol + 2) = _
TableRow.Cells(3).innertext
Case "EXPLOSION":
'Move Explosion Hazard
.Cells(RowCount, ExplosHazCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, ExplosHazCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, ExplosHazCol + 2) = _
TableRow.Cells(3).innertext
Case "EXPOSURE":
'Move Exposure
.Cells(RowCount, ExposureCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, ExposureCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, ExposureCol + 2) = _
TableRow.Cells(3).innertext
Case "INHALATION":
'Move Inhalation Exposure
.Cells(RowCount, InhalCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, InhalCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, InhalCol + 2) = _
TableRow.Cells(3).innertext
Case "SKIN":
'Move Skin Exposure
.Cells(RowCount, SkinCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, SkinCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, SkinCol + 2) = _
TableRow.Cells(3).innertext
Case "EYES":
'Move Eyes Exposure
.Cells(RowCount, EyesCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, EyesCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, EyesCol + 2) = _
TableRow.Cells(3).innertext
Case "INGESTION":
'Move Ingestion Exposure
.Cells(RowCount, IngestCol) = _
TableRow.Cells(1).innertext
.Cells(RowCount, IngestCol + 1) = _
TableRow.Cells(2).innertext
.Cells(RowCount, IngestCol + 2) = _
TableRow.Cells(3).innertext
End Select
Next TableRow
Set SpillRow = cTables.Item(7).Rows(1)
'Move SPILLAGE DISPOSAL
.Cells(RowCount, SpillDisposCol) = _
SpillRow.Cells(0).innertext
'Move PACKAGING & LABELING
'Combine columns 2 & 3 together
If SpillRow.Cells.Length = 2 Then
Pack = SpillRow.Cells(1).innertext
Else
Pack = SpillRow.Cells(1).innertext & _
LF & SpillRow.Cells(2).innertext
End If
Pack = Replace(Pack, CR, "")
.Cells(RowCount, PackCol) = Pack
Set EmergencyRow = cTables.Item(8).Rows(1)
'Move Emergency Response
Emergency = EmergencyRow.Cells(0).innertext
Emergency = Replace(Emergency, CR, "")
.Cells(RowCount, EmergRespCol) = Emergency
'Move Safe Storage
.Cells(RowCount, SafeStorCol) = _
EmergencyRow.Cells(1).innertext
Set ImportantRow = cTables.Item(9).Rows(1)
Cols = ImportantRow.Cells.Length
For ColCount = 0 To (Cols - 1)
Set cell = ImportantRow.Cells(ColCount)
Set B = cell.getElementsByTagname("B")
Set P = cell.getElementsByTagname("P")
First = True
Done = False
Do
If First = True Then
Title = B(0).innertext
Title = Trim(Replace(Title, ":", ""))
Title = Trim(Replace(Title, LF, ""))
Detail = B(0).nextsibling.nextsibling.data
PCount = 0
First = False
Else
Item = P(PCount).innertext
If Item <> "" Then
Title = Left(Item, InStr(Item, CR) - 1)
Title = Trim(Replace(Title, ":", ""))
Title = Trim(Replace(Title, LF, ""))
Detail = Mid(Item, InStr(Item, CR) + 1)
If Left(Detail, 1) = LF Then
Detail = Mid(Detail, 2)
End If
Else
Title = ""
End If
PCount = PCount + 1
End If
If Title <> "" Then
Select Case UCase(Title)
Case "PHYSICAL STATE; APPEARANCE":
'Move Physical State; Appearance
.Cells(RowCount, PhysStateCol) = Detail
Case "ROUTES OF EXPOSURE":
'Move Routes of exposure
.Cells(RowCount, RoutesCol) = Detail
Case "CHEMICAL DANGERS":
'Move Chemical dangers
.Cells(RowCount, ChemDangCol) = Detail
Case "INHALATION RISK":
'Move Inhalation risk
.Cells(RowCount, InhalRiskCol) = Detail
Case "OCCUPATIONAL EXPOSURE LIMITS":
'Move Occupational exposure limits
.Cells(RowCount, OccupatCol) = Detail
Case "EFFECTS OF SHORT-TERM EXPOSURE":
'Move Effects of short-term exposure
.Cells(RowCount, ShortTermCol) = Detail
Case "EFFECTS OF LONG-TERM OR REPEATED EXPOSURE":
'Move Effects of long-term or repeated exposure
.Cells(RowCount, LongTermCol) = Detail
Case "PHYSICAL DANGERS":
'Move Physical Danagers
.Cells(RowCount, PhysDangerCol) = Detail
Case Else:
MsgBox ("Unknown Detail : " & _
Title & " : Stop")
Stop
End Select
End If
If PCount >= P.Length Then
Done = True
End If
Loop While Done = False
Next ColCount
'Move Physical Properties
Physical = cTables.Item(10).Rows(1).Cells(0).innertext
Physical = Replace(Physical, CR, "")
.Cells(RowCount, PhysicPropCol) = Physical
'Move ENVIRONMENTAL DATA
Environmental = cTables.Item(10).Rows(1).Cells(1).innertext
Environmental = Replace(Environmental, CR, "")
.Cells(RowCount, EnvironCol) = Environmental
'Move NOTES
Notes = cTables.Item(11).Rows(1).innertext
Notes = Replace(Notes, CR, "")
.Cells(RowCount, NoteCol) = Notes
End With
End Sub