L
Liz
Hi,
I have written a macro designed to scrub an organization's name so I can
better match/compare its value. However, this macro takes 5 to 10 minutes to
process on over 1000 records (where the org name is just in one column).
I am using the Select Case method to pull out the last word to check and
remove certain common words. This seems to take the longest. Is there a
better way to write the macro?
Macro is below (Excel 2007):
Sub Scrub_Org_Name()
Dim sName1 As String
Dim sName2 As String
Dim iName2 As Integer
Dim sLessSpaces As String
Dim iLessSpaces As Integer
Dim sLastword As String
Dim iLast As Integer
Dim sLast As String
Dim iLastRow As Long
Dim iRowCount As Long
Dim sNameCol As String
Dim Result As String
Dim iReady As Integer
iLastRow = Cells(Rows.Count, "A").End(xlUp).Row
iRowCount = 2
sNameCol = InputBox("Enter Column Letter for Organization Name.",
"Organization Name Column", "Q")
'copy column for Organization Name to Column A
Columns(sNameCol).Select
Selection.Copy
Columns("A:A").Select
Selection.Insert Shift:=xlToRight
Range("A1").Select
ActiveCell.FormulaR1C1 = "Initial Scrub"
'Insert ScrubName Column
Columns("A:A").Select
Selection.Insert Shift:=xlToRight
Range("A1").Select
ActiveCell.FormulaR1C1 = "Scrubbed Org Name" & Chr(10) & "(Keywords
removed)"
'PERFORM INITIAL SCRUB
'__________________________________________________________________
'Find all periods and commas and "the"'s and other words that can be removed
entirely from names and remove from column B
Columns("B:B").Select
Selection.Replace What:=".", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:=",", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="The ", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Incorporated", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="GMBH", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Corporation", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Limited", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="LTD", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="LLC", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="LLP", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Industries", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Univ ", Replacement:="University ",
LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="-", Replacement:=" ", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
'PERFORM SECONDARY SCRUB (this Do/Loop method takes much longer than inital
scrub,
' therefore, if you can perform a scrub against the
whole
' name, without worrying about multple instances,
then
' perform the desired scrub using the initial scrub
' method of Search/Replace)
'____________________________________________________________________
'Run through all rows of data and trim leading and trailing spaces, plus
scrub out key words.
Do While iLastRow >= iRowCount
sName1 = Range("B" & iRowCount).Value
sName2 = UCase(Trim(sName1))
iName2 = Len(sName2)
sLessSpaces = Application.WorksheetFunction.Substitute(sName2, " ", "")
iLessSpaces = Len(sLessSpaces)
'Test for multiple words in company name
If (iName2 - iLessSpaces) = 0 Then
'If none found then then leave as is
Result = sName2
Else
'Converts the last space in a company name to a "^".
'The instance of the last space is defined by the the diff between iName2
and iLessSpaces
'(which is the length of name with spaces, less the length of name without
spaces)
sLast = Application.WorksheetFunction.Substitute(sName2, " ", "^",
iName2 - iLessSpaces)
iLast = Application.WorksheetFunction.Find("^", sLast) + 1
'Lastword is equal to the word starting at the position of the "^" above +1
sLastword = UCase(Mid(sLast, iLast, 256))
'Search for each of these types of last words below and delete them off of
the trimmed name, also make the result uppercase.
'The amount of positions to delete at the end is equal to the length of
characters plus 1 for the space before the last word.
'This approach will only delete the word if it is the last word in the name,
unlike the search and replace all approach above created by the initial scrub.
Select Case (sLastword)
Case "INC"
Result = Left(sName2, iName2 - 4)
Case "USA"
Result = Left(sName2, iName2 - 4)
Case "INTERNATIONAL"
Result = Left(sName2, iName2 - 14)
Case "PC"
Result = Left(sName2, iName2 - 3)
Case "APPLIANCES"
Result = Left(sName2, iName2 - 12)
Case "SUPPLIES"
Result = Left(sName2, iName2 - 9)
Case "SUPPLY"
Result = Left(sName2, iName2 - 7)
Case "COMPANY"
Result = Left(sName2, iName2 - 8)
Case "CORP"
Result = Left(sName2, iName2 - 5)
Case "CO"
Result = Left(sName2, iName2 - 3)
Case "IGT"
Result = Left(sName2, iName2 - 4)
Case "SERVICES"
Result = Left(sName2, iName2 - 10)
Case "SERVICES"
Result = Left(sName2, iName2 - 9)
Case "TECHNOLOGIES"
Result = Left(sName2, iName2 - 13)
Case "IND"
Result = Left(sName2, iName2 - 4)
Case Else
Result = sName2
End Select
End If
'Paste scrubbed results to Column A
Range("A" & iRowCount) = Result
'set next row to be evaluated
iRowCount = iRowCount + 1
Loop
'Delete the Initial Scrub Column and only leave the results for the
Secondary Scrub
Columns("B:B").Select
Selection.Delete Shift:=xlToLeft
End Sub
I have written a macro designed to scrub an organization's name so I can
better match/compare its value. However, this macro takes 5 to 10 minutes to
process on over 1000 records (where the org name is just in one column).
I am using the Select Case method to pull out the last word to check and
remove certain common words. This seems to take the longest. Is there a
better way to write the macro?
Macro is below (Excel 2007):
Sub Scrub_Org_Name()
Dim sName1 As String
Dim sName2 As String
Dim iName2 As Integer
Dim sLessSpaces As String
Dim iLessSpaces As Integer
Dim sLastword As String
Dim iLast As Integer
Dim sLast As String
Dim iLastRow As Long
Dim iRowCount As Long
Dim sNameCol As String
Dim Result As String
Dim iReady As Integer
iLastRow = Cells(Rows.Count, "A").End(xlUp).Row
iRowCount = 2
sNameCol = InputBox("Enter Column Letter for Organization Name.",
"Organization Name Column", "Q")
'copy column for Organization Name to Column A
Columns(sNameCol).Select
Selection.Copy
Columns("A:A").Select
Selection.Insert Shift:=xlToRight
Range("A1").Select
ActiveCell.FormulaR1C1 = "Initial Scrub"
'Insert ScrubName Column
Columns("A:A").Select
Selection.Insert Shift:=xlToRight
Range("A1").Select
ActiveCell.FormulaR1C1 = "Scrubbed Org Name" & Chr(10) & "(Keywords
removed)"
'PERFORM INITIAL SCRUB
'__________________________________________________________________
'Find all periods and commas and "the"'s and other words that can be removed
entirely from names and remove from column B
Columns("B:B").Select
Selection.Replace What:=".", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:=",", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="The ", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Incorporated", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="GMBH", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Corporation", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Limited", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="LTD", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="LLC", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="LLP", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Industries", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Univ ", Replacement:="University ",
LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="-", Replacement:=" ", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
'PERFORM SECONDARY SCRUB (this Do/Loop method takes much longer than inital
scrub,
' therefore, if you can perform a scrub against the
whole
' name, without worrying about multple instances,
then
' perform the desired scrub using the initial scrub
' method of Search/Replace)
'____________________________________________________________________
'Run through all rows of data and trim leading and trailing spaces, plus
scrub out key words.
Do While iLastRow >= iRowCount
sName1 = Range("B" & iRowCount).Value
sName2 = UCase(Trim(sName1))
iName2 = Len(sName2)
sLessSpaces = Application.WorksheetFunction.Substitute(sName2, " ", "")
iLessSpaces = Len(sLessSpaces)
'Test for multiple words in company name
If (iName2 - iLessSpaces) = 0 Then
'If none found then then leave as is
Result = sName2
Else
'Converts the last space in a company name to a "^".
'The instance of the last space is defined by the the diff between iName2
and iLessSpaces
'(which is the length of name with spaces, less the length of name without
spaces)
sLast = Application.WorksheetFunction.Substitute(sName2, " ", "^",
iName2 - iLessSpaces)
iLast = Application.WorksheetFunction.Find("^", sLast) + 1
'Lastword is equal to the word starting at the position of the "^" above +1
sLastword = UCase(Mid(sLast, iLast, 256))
'Search for each of these types of last words below and delete them off of
the trimmed name, also make the result uppercase.
'The amount of positions to delete at the end is equal to the length of
characters plus 1 for the space before the last word.
'This approach will only delete the word if it is the last word in the name,
unlike the search and replace all approach above created by the initial scrub.
Select Case (sLastword)
Case "INC"
Result = Left(sName2, iName2 - 4)
Case "USA"
Result = Left(sName2, iName2 - 4)
Case "INTERNATIONAL"
Result = Left(sName2, iName2 - 14)
Case "PC"
Result = Left(sName2, iName2 - 3)
Case "APPLIANCES"
Result = Left(sName2, iName2 - 12)
Case "SUPPLIES"
Result = Left(sName2, iName2 - 9)
Case "SUPPLY"
Result = Left(sName2, iName2 - 7)
Case "COMPANY"
Result = Left(sName2, iName2 - 8)
Case "CORP"
Result = Left(sName2, iName2 - 5)
Case "CO"
Result = Left(sName2, iName2 - 3)
Case "IGT"
Result = Left(sName2, iName2 - 4)
Case "SERVICES"
Result = Left(sName2, iName2 - 10)
Case "SERVICES"
Result = Left(sName2, iName2 - 9)
Case "TECHNOLOGIES"
Result = Left(sName2, iName2 - 13)
Case "IND"
Result = Left(sName2, iName2 - 4)
Case Else
Result = sName2
End Select
End If
'Paste scrubbed results to Column A
Range("A" & iRowCount) = Result
'set next row to be evaluated
iRowCount = iRowCount + 1
Loop
'Delete the Initial Scrub Column and only leave the results for the
Secondary Scrub
Columns("B:B").Select
Selection.Delete Shift:=xlToLeft
End Sub