Hey guys - again
My challenge today is as follow:
Sheet = "TEXT"
In column A i have several celles with texts/comments - words/
sentences
I want to split all the texts into singel words and make a word-list
of all used words of this column A - and copy the words into a new
sheet (sheet = "WORDS") column A as my complete word-list.
I have no idea of how to do this.
After producing this word-list I will remove the doublettes and sort
it - this of course I CAN do bmy own "skills".
Will anyone please help me?
Kindly regards
Snoopy
You can do this with a macro.
To enter this Macro (Sub), <alt-F11> opens the Visual Basic Editor.
Ensure your project is highlighted in the Project Explorer window.
Then, from the top menu, select Insert/Module and
paste the code below into the window that opens.
To use this Macro (Sub), <alt-F8> opens the macro dialog box. Select the macro
by name, and <RUN>.
Note that the destination and source are hard-coded. This can, and should, be
changed to meet your specific requirements.
Also, the output can be sorted alphabetically by word; or numerically by word
count. See the included comments for how to alter that.
There are significantly faster sorts available, which may be appropriate
depending on the amount of data you have.
But this should give you a start.
=======================================================
Option Explicit
Option Compare Text
Sub UniqueWordList()
Dim rSrc As Range, rDest As Range, c As Range
Dim cWordList As Collection
Dim res() As Variant
Dim w() As String
Dim i As Long
Set cWordList = New Collection
Set rSrc = Selection
Set rDest = Range("C1")
rDest.EntireColumn.NumberFormat = "@"
For Each c In rSrc
w = Split(c.Value)
For i = 0 To UBound(w)
w(i) = StripWord(w(i))
If Not w(i) = "" Then
On Error Resume Next
cWordList.Add Item:=w(i), Key:=w(i)
On Error GoTo 0
End If
Next i
Next c
'transfer words to results array
ReDim res(1 To cWordList.Count, 0 To 1)
For i = 1 To cWordList.Count
res(i, 0) = cWordList(i)
Next i
'get counts
For i = LBound(res) To UBound(res)
For Each c In rSrc
res(i, 1) = res(i, 1) + CountWord(c.Value, res(i, 0))
Next c
Next i
'sort alpha: d=0; sort numeric d=1
'there are various ways of sorting
BubbleSort res, 0
rDest.CurrentRegion.Clear
For i = LBound(res) To UBound(res)
rDest.Offset(i, 0).NumberFormat = "@"
rDest.Offset(i, 0).Value = res(i, 0)
'For just lowercase output, use:
'rDest.Offset(i, 0).Value = LCase(res(i, 0))
rDest.Offset(i, 1).Value = res(i, 1)
Next i
End Sub
Private Function StripWord(s As String) As String
Dim re As Object, mc As Object, M As Object
Set re = CreateObject("vbscript.regexp")
re.IgnoreCase = True
re.Global = True
re.Pattern = "[-\w]{2,}"
If re.Test(s) = True Then
Set mc = re.Execute(s)
StripWord = mc(0).Value
End If
Set re = Nothing
End Function
Private Function CountWord(ByVal s As String, sPat) As Long
Dim re As Object, mc As Object
Set re = CreateObject("vbscript.regexp")
re.Global = True
re.IgnoreCase = True
re.Pattern = "\b" & sPat & "\b"
Set mc = re.Execute(s)
CountWord = mc.Count
End Function
Private Sub BubbleSort(TempArray As Variant, D As Long) 'd is 0 based dimension
Dim temp(0, 1) As Variant
Dim i As Integer
Dim NoExchanges As Integer
' Loop until no more "exchanges" are made.
Do
NoExchanges = True
' Loop through each element in the array.
For i = LBound(TempArray) To UBound(TempArray) - 1
' If the element is less than the element
' following it, exchange the two elements.
' change "<" to ">" to sort ascending
If TempArray(i, D) > TempArray(i + 1, D) Then
NoExchanges = False
temp(0, 0) = TempArray(i, 0)
temp(0, 1) = TempArray(i, 1)
TempArray(i, 0) = TempArray(i + 1, 0)
TempArray(i, 1) = TempArray(i + 1, 1)
TempArray(i + 1, 0) = temp(0, 0)
TempArray(i + 1, 1) = temp(0, 1)
End If
Next i
Loop While Not (NoExchanges)
End Sub
======================================
--ron