D
David Turner
I have some code which attempts to tidy up Word documents converted from or
pasted from PDF so that the text wraps correctly without hard returns at the
ends of lines.
It basically does a wildcard search for lines not ending in stop
punctuation (.,:;?!) and beginning with a lower case letter on the next line.
It should thus find lines like:
The quick red fox[hard/soft return]
jumped over the lazy brown cow
then remove the hard/soft return and add a space to wrap the text onto one
line.
I've also tried to arrange for it to ignore any bulleted lines
(ListParagraphs) lines starting with a lower case letter.
The code seems to work OK in the body of the document but fails for some
reason in text boxes. The text is selected but then the selection moves
outside the text box.
Can anyone see what's going wrong?
Any help would be greatly appreciated.
Thanks.
Sub PDFTidy()
Dim rDoc As Range
Dim rTmp As Range
Dim rShp As Range
Dim pos1 As Long
Dim pos2 As Long
Dim sTmp As String
Dim pText As String
Dim iShpCnt As Long
pText = "([!^13^l\.\:\;\!\?])[^13^l]([a-z])"
Set rDoc = ActiveDocument.Range
Set rTmp = Selection.Range
With rDoc.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = pText
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = True
.MatchSoundsLike = False
.MatchAllWordForms = False
While .Execute
pos1 = rDoc.Start
rTmp.Start = pos1
pos2 = rDoc.End
rTmp.End = pos2
rTmp.Select
Selection.Collapse Direction:=wdCollapseEnd
If Selection.Paragraphs(1).Range.ListParagraphs.Count = 0 Then
Selection.Start = pos1
rTmp.Select 'for testing [F8]
sTmp = rTmp.Text
sTmp = Replace(sTmp, vbCr, " ")
rTmp.Text = sTmp
End If
rDoc.Collapse Direction:=wdCollapseEnd
rDoc.End = ActiveDocument.Range.End
Wend
End With
For iShpCnt = ActiveDocument.Shapes.Count To 1 Step -1
With ActiveDocument.Shapes(iShpCnt)
If .Type = msoTextBox Then
If ActiveDocument.Shapes(iShpCnt).TextFrame.HasText = True
Then
Set rShp =
ActiveDocument.Shapes(iShpCnt).TextFrame.TextRange
MsgBox
ActiveDocument.Shapes(iShpCnt).TextFrame.TextRange.Text
'rShp.Select
With rShp.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = pText
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = True
.MatchSoundsLike = False
.MatchAllWordForms = False
While .Execute
pos1 = rShp.Start
rTmp.Start = pos1
pos2 = rShp.End
rTmp.End = pos2
rTmp.Select ; Code fails here: the selection is
outside the text box
Selection.Collapse Direction:=wdCollapseEnd
If
Selection.Paragraphs(1).Range.ListParagraphs.Count = 0 Then
Selection.Start = pos1
rTmp.Select 'for testing [F8]
sTmp = rTmp.Text
sTmp = Replace(sTmp, vbCr, " ")
rTmp.Text = sTmp
End If
rShp.Collapse Direction:=wdCollapseEnd
Wend
End With
End If
End If
End With
Next iShpCnt
End Sub
pasted from PDF so that the text wraps correctly without hard returns at the
ends of lines.
It basically does a wildcard search for lines not ending in stop
punctuation (.,:;?!) and beginning with a lower case letter on the next line.
It should thus find lines like:
The quick red fox[hard/soft return]
jumped over the lazy brown cow
then remove the hard/soft return and add a space to wrap the text onto one
line.
I've also tried to arrange for it to ignore any bulleted lines
(ListParagraphs) lines starting with a lower case letter.
The code seems to work OK in the body of the document but fails for some
reason in text boxes. The text is selected but then the selection moves
outside the text box.
Can anyone see what's going wrong?
Any help would be greatly appreciated.
Thanks.
Sub PDFTidy()
Dim rDoc As Range
Dim rTmp As Range
Dim rShp As Range
Dim pos1 As Long
Dim pos2 As Long
Dim sTmp As String
Dim pText As String
Dim iShpCnt As Long
pText = "([!^13^l\.\:\;\!\?])[^13^l]([a-z])"
Set rDoc = ActiveDocument.Range
Set rTmp = Selection.Range
With rDoc.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = pText
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = True
.MatchSoundsLike = False
.MatchAllWordForms = False
While .Execute
pos1 = rDoc.Start
rTmp.Start = pos1
pos2 = rDoc.End
rTmp.End = pos2
rTmp.Select
Selection.Collapse Direction:=wdCollapseEnd
If Selection.Paragraphs(1).Range.ListParagraphs.Count = 0 Then
Selection.Start = pos1
rTmp.Select 'for testing [F8]
sTmp = rTmp.Text
sTmp = Replace(sTmp, vbCr, " ")
rTmp.Text = sTmp
End If
rDoc.Collapse Direction:=wdCollapseEnd
rDoc.End = ActiveDocument.Range.End
Wend
End With
For iShpCnt = ActiveDocument.Shapes.Count To 1 Step -1
With ActiveDocument.Shapes(iShpCnt)
If .Type = msoTextBox Then
If ActiveDocument.Shapes(iShpCnt).TextFrame.HasText = True
Then
Set rShp =
ActiveDocument.Shapes(iShpCnt).TextFrame.TextRange
MsgBox
ActiveDocument.Shapes(iShpCnt).TextFrame.TextRange.Text
'rShp.Select
With rShp.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = pText
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = True
.MatchSoundsLike = False
.MatchAllWordForms = False
While .Execute
pos1 = rShp.Start
rTmp.Start = pos1
pos2 = rShp.End
rTmp.End = pos2
rTmp.Select ; Code fails here: the selection is
outside the text box
Selection.Collapse Direction:=wdCollapseEnd
If
Selection.Paragraphs(1).Range.ListParagraphs.Count = 0 Then
Selection.Start = pos1
rTmp.Select 'for testing [F8]
sTmp = rTmp.Text
sTmp = Replace(sTmp, vbCr, " ")
rTmp.Text = sTmp
End If
rShp.Collapse Direction:=wdCollapseEnd
Wend
End With
End If
End If
End With
Next iShpCnt
End Sub