G
Geoff K
Hi
The aim is to identify anomalous strings in user data using a Regex pattern
but as a complete newbie I have got bogged down.
My test code is below but as you will see there are some "bad" results.
The criteria - a string can be:
text only = Head, Dept Head
text plus numerics = Class 8-10, Class 8, 6th form, Head KS6
a single dot
If numerics are included then:
a single or double digit must be followed by a hyphen or an ordinal =
1st, 10-12, 15th
there must be no lead zeros = 0 or 01270
ordinals must not be hyphenated = 2nd-3rd, 2nd-4
My Regex doesn't yet include a test for text only and wondered if that had
to be a separate process?
I would be grateful of any help to complete the pattern.
T.I.A.
Geoff
Sub TestPattern()
Dim objRegExp As Object
Dim i As Long
'''some test and result strings
Dim arrString() As Variant
Dim arrResult() As Variant
arrString = Array("String", "0", "class 1-", "8-", "8-10", _
"12", "12-15", "165", "12-165", "0 12", "Class 8", "1st class", _
"2nd class", "3rd class", "6th form", "40th class", "400th class", _
"Head", "Head KS6", ".")
arrResult = Array("Result", "ok", "ok", "ok", "ok", "ok", "ok", _
"ok", "bad", "bad", "bad", "ok", "ok", "ok", "ok", "ok", "ok", _
"bad", "bad", "bad")
'''set pattern
Set objRegExp = CreateObject("Vbscript.RegExp")
With objRegExp
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "\b[1-9](-|st|nd|rd|th)\b|\b[1-9][0-9](|-|st|nd|rd|th)\b"
End With
With Sheets(1)
'''setup test strings
.Range("a:c").ClearContents
.Range("a:c").NumberFormat = "@"
.Cells(1, "B") = "Regex Test"
For i = LBound(arrString) To UBound(arrString)
.Cells(i + 1, "A") = arrString(i)
.Cells(i + 1, "C") = arrResult(i)
Next
'''run test pattern
For i = LBound(arrString) + 1 To UBound(arrString)
If objRegExp.Test(.Cells(i + 1, 1)) Then
.Cells(i + 1, "B") = "valid"
Else
.Cells(i + 1, "B") = "invalid"
End If
Next
End With
End Sub
The aim is to identify anomalous strings in user data using a Regex pattern
but as a complete newbie I have got bogged down.
My test code is below but as you will see there are some "bad" results.
The criteria - a string can be:
text only = Head, Dept Head
text plus numerics = Class 8-10, Class 8, 6th form, Head KS6
a single dot
If numerics are included then:
a single or double digit must be followed by a hyphen or an ordinal =
1st, 10-12, 15th
there must be no lead zeros = 0 or 01270
ordinals must not be hyphenated = 2nd-3rd, 2nd-4
My Regex doesn't yet include a test for text only and wondered if that had
to be a separate process?
I would be grateful of any help to complete the pattern.
T.I.A.
Geoff
Sub TestPattern()
Dim objRegExp As Object
Dim i As Long
'''some test and result strings
Dim arrString() As Variant
Dim arrResult() As Variant
arrString = Array("String", "0", "class 1-", "8-", "8-10", _
"12", "12-15", "165", "12-165", "0 12", "Class 8", "1st class", _
"2nd class", "3rd class", "6th form", "40th class", "400th class", _
"Head", "Head KS6", ".")
arrResult = Array("Result", "ok", "ok", "ok", "ok", "ok", "ok", _
"ok", "bad", "bad", "bad", "ok", "ok", "ok", "ok", "ok", "ok", _
"bad", "bad", "bad")
'''set pattern
Set objRegExp = CreateObject("Vbscript.RegExp")
With objRegExp
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "\b[1-9](-|st|nd|rd|th)\b|\b[1-9][0-9](|-|st|nd|rd|th)\b"
End With
With Sheets(1)
'''setup test strings
.Range("a:c").ClearContents
.Range("a:c").NumberFormat = "@"
.Cells(1, "B") = "Regex Test"
For i = LBound(arrString) To UBound(arrString)
.Cells(i + 1, "A") = arrString(i)
.Cells(i + 1, "C") = arrResult(i)
Next
'''run test pattern
For i = LBound(arrString) + 1 To UBound(arrString)
If objRegExp.Test(.Cells(i + 1, 1)) Then
.Cells(i + 1, "B") = "valid"
Else
.Cells(i + 1, "B") = "invalid"
End If
Next
End With
End Sub