detect and delete duplicate words

H

Helmut Weber

Hi Shakeel,

word is a fuzzy concept of fuzzy natural language...
duplicates in a format
is pretty difficult, as you would have to specify
all format properties
how do i detect and delete same words

that is the easiest part:

Sub Test5612()
Dim rWrd1 As Range
Dim rWrd2 As Range
For Each rWrd1 In ActiveDocument.Range.Words
For Each rWrd2 In ActiveDocument.Range.Words
If rWrd1.Text = rWrd2.Text And _
rWrd1.Start <> rWrd2.Start Then
rWrd2.Delete
End If
Next
Next
End Sub


--

Greetings from Bavaria, Germany

Helmut Weber, MVP WordVBA

Vista Small Business, Office XP
 
R

Russ

Shakeel and Helmut,

As an exercise, I came up with another method to delete duplicate words.
It uses a collection to store each unique word as it comes upon the word
then it deletes all instances of that unique word to speed up the 'for each'
loop.

Sub Delete_Duplicate_Words()
Dim aRange As Word.Range
Dim aWordCollection As Collection
Dim aWord As Variant
Dim aString As String

Set aWordCollection = New Collection
Set aRange = ActiveDocument.Content
Application.ScreenUpdating = False
'Add words to collection
For Each aWord In aRange.words
If aWord <> " " And aWord <> vbCr Then
aString = Trim(aWord)
aWordCollection.Add aString
With ActiveDocument.Content.Find
.Text = aWord
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
End With
End If
Next aWord
ActiveDocument.Content.Delete
'Type words back into document
For Each aWord In aWordCollection
With Selection
.TypeText Text:=aWord
.TypeParagraph
End With
Next aWord
'Clean up document
With ActiveDocument.Content.Find
.Text = "[!a-zA-Z0-9\n ]@\n"
.MatchWildcards = True
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "\n \n"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
.Text = "\n{2,}"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
End With
Do While ActiveDocument.Paragraphs.Last.Range.Characters.Count = 1
ActiveDocument.Paragraphs.Last.Range.Delete
Loop
Do While ActiveDocument.Paragraphs.First.Range.Characters.Count = 1
ActiveDocument.Paragraphs.First.Range.Delete
Loop

Application.ScreenUpdating = True
Set aWord = Nothing
Set aWordCollection = Nothing
End Sub
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top