Reputation: 501
I have documents written in old Kazakh font (Kazakhstan), using win98. Nowadays we are using Times New Roman, but this font shows strange unicode characters. I can use substitution (Ctrl + H) to change all symbols to Times New Roman encoding, but we have 42 (84 in both cases) letters.
For example I have all symbols from old font in first line, and all symbols from new font at the second line in the same order.
Can someone write an example script that will read this two lines char by char, making something like a dictionary in Java then do a global substitution.
Update
Thanks Roman Plischke!
I wrote a macro that recursively applies to all *.doc files in some folder.
Sub Substitution()
'
' Substitution of the chars from font Times/Kazakh
' to Times New Roman
' Chars to substitute are 176-255 bytes, 73 and 105 byte
Dim sTab As String
sTab = "£ª½¥¡¯Ž¼º¾´¢¿žÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
Selection.Find.Font.Shadow = False
Selection.Find.Replacement.Font.Shadow = False
For i = 1 To Len(sTab)
With Selection.Find
.Text = ChrW(i + 175)
.Replacement.Text = Mid(sTab, i, 1)
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
Selection.Find.Text = Selection.Find.Text
Next i
Selection.Find.Execute Replace:=wdReplaceAll
With Selection.Find
.Text = ChrW(105)
.Replacement.Text = "³"
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
Selection.Find.Text = Selection.Find.Text
With Selection.Find
.Text = ChrW(73)
.Replacement.Text = "²"
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
Selection.Find.Text = Selection.Find.Text
' kazakh language
Selection.WholeStory
Selection.LanguageID = WdLanguageID.wdKazakh
Application.CheckLanguage = False
Selection.Collapse Direction:=wdCollapseStart
End Sub
' Function that Call Substitution() for all documents
' in folder vDirectory
Sub LoopDirectory()
Dim vDirectory As String
Dim oDoc As Document
vDirectory = "E:\soft\Dedushka\not\"
vFile = Dir(vDirectory & "*.doc")
Do While vFile <> ""
Set oDoc = Documents.Open(FileName:=vDirectory & vFile)
Debug.Print ActiveDocument.Name + " Started"
Call Zamena
Debug.Print ActiveDocument.Name + " Finish"
oDoc.Close SaveChanges:=True
vFile = Dir
Loop
End Sub
Upvotes: 0
Views: 2747
Reputation: 1072
I used for similar conversions this subroutine. The "heart" of code is a definition of string sTab. This string contains all charactesr for code 127 and above. Fill this string by new characters one by one.
If you have a code table of the old Kazakh coding, it is very simple: type in the VBA editor all characters starting 127 char. VBA editor works in Unicode, so this works.
If you have not a code table, you have to get an old code of each character (try select this character and press Alt+X) and write it manually in the string at right position.
In both cases, for unused (or unusually) character you may fill a space or other character.
The rest of code replaces each character with code above 127 for new character from sTab.
Sub Convert()
Dim sTab As String
Dim sKod As String
Dim i As Long
Dim ch As String
'new chars 127-255:
'note: for each character above 127 fill in this table unicode character
sTab = "ÄÃãÉ¥ÖÜá¹ÈäèÆæéŸÏí“”ëEóeôöoúÌìü†°Ê£§•¶ß®©™ê¨‡gIlÎ__îK__³Ll¼¾ÅåNnѬVñÒ_«»… òÕOõO–—“”‘’÷_OÀàØ‹›øRrŠ‚„šŒœÁÍŽžUÓÔuÙÚùÛûUuÝýk¯£¿G¡"
'clear all shadow - we use this attrib as flag for changed characters
Selection.Find.ClearFormatting
Selection.Find.Font.Shadow = True
Selection.Find.replacement.ClearFormatting
Selection.Find.replacement.Font.Shadow = False
With Selection.Find
.Text = ""
.replacement.Text = ""
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
'changing characters by codetable
Selection.Find.Font.Shadow = False
Selection.Find.replacement.Font.Shadow = True
For i = 1 To Len(sTab)
With Selection.Find
ch = Chr(126 + i)
If ch = "^" Then ch = "^^"
.Text = ch
ch = Mid(sTab, i, 1)
If ch = "^" Then ch = "^^"
.replacement.Text = ch
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
Selection.Find.Text = Selection.Find.Text
Next i
'clear shadows
Selection.Find.Font.Shadow = True
Selection.Find.replacement.Font.Shadow = False
With Selection.Find
.Text = ""
.replacement.Text = ""
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = True
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
' kazakh language
Selection.WholeStory
Selection.LanguageID = WdLanguageID.wdKazakh
Application.CheckLanguage = False
Selection.Collapse Direction:=wdCollapseStart
End Sub
Upvotes: 1