Reputation: 33
I'm struggling with a problem of translating Mazovia encoding (old polish encoding) to Unicode. I've used implementation of mazovia encoding class found here: a link. I use this class to translate values from DBF files with method:
public static string AsUnicode(string text)
{
string result = System.Text.Encoding.Unicode.GetString(
System.Text.Encoding.Convert(new Mazovia(),
System.Text.Encoding.Unicode,
System.Text.Encoding.GetEncoding(852).GetBytes(text)));
return result;
}
Before i used , but somehow it stopped working after few weeks. public static string AsLatin2(string text) {
string result = System.Text.Encoding.GetEncoding(1250).GetString(
System.Text.Encoding.Convert(new Mazovia(),
System.Text.Encoding.GetEncoding(1250),
new Mazovia().GetBytes(text)));
return result;
}
The new solution worked fine on my PC and on a couple of others i have tested it on, however it does not work on all devices. On some it still reads signs like "?" instead of "Ł".
Engoding works on both polish and english Windows 7/8 , but only on some Encoding does not work , and i have no idea what might be a problem. Did anyone have a similar problem with encoding classes - different behaciour on different Windows OS instances ?
This is the whole Encodinf implementation of Mazovia I'm using:
public sealed class MazoviaEncoder : Encoder { private Dictionary Translator = new Dictionary();
internal MazoviaEncoder()
{
for (byte i = 0x00; i < 0x80; i++)
{
char c = (char)i;
Translator.Add(c, i); // znaki poniżej 128 to standardowe kody ASCII
}
for (byte i = 0x00; i < 0x80; i++)
{
char c = (char)MazoviaAsUnicode.Content[i];
Translator.Add(c, (byte)(i + 0x80)); // znaki powyżej 127 to kody zgodne z Mazovią - trzeba użyć słownika translacji
}
}
public override int GetByteCount(char[] chars, int index, int count, bool flush)
{
// Mazovia jest jednobajtową stroną kodową, więc ilość bajtów dla podanej długości tekstu "count",
// jest równa tej długości (jeden unikodowy, dwubajtowy znak równa się jednemu bajtowi Mazovii)
return count;
}
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush)
{
byte b;
for (int i = 0; i < charCount; i++)
{
if (!Translator.TryGetValue(chars[charIndex + i], out b))
b = 0x3F;
bytes[byteIndex + i] = b;
}
return charCount;
}
}
internal static class MazoviaAsUnicode
{
public static readonly short[] Content = {
0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x0105, 0x00E7,
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x0107, 0x00C4, 0x0104,
0x0118, 0x0119, 0x0142, 0x00F4, 0x00F6, 0x0106, 0x00FB, 0x00F9,
0x015A, 0x00D6, 0x00DC, 0x00A2, 0x0141, 0x00A5, 0x015B, 0x0192,
0x0179, 0x017B, 0x00F3, 0x00D3, 0x0144, 0x0143, 0x017A, 0x017C,
0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229,
0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0};
}
public sealed class MazoviaDecoder : Decoder
{
private char[] Translator = new char[256];
internal MazoviaDecoder()
{
for (byte i = 0x00; i < 0x80; i++)
{
char c = (char)i;
Translator[i] = c; // znaki poniżej 128 to standardowe kody ASCII
}
for (byte i = 0x00; i < 0x80; i++)
{
char c = (char)MazoviaAsUnicode.Content[i];
Translator[i + 0x80] = c; // znaki powyżej 127 to kody zgodne z Mazovią - trzeba użyć słownika translacji
}
}
public override int GetCharCount(byte[] bytes, int index, int count)
{
// Mazovia jest jednobajtową stroną kodową, więc ilość znaków dla podanej ilości bajtów "count",
// jest równa tej właśnie ilości (jeden bajt Mazovii równa się dwubajtowemu znakowi)
return count;
}
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
for (int i = 0; i < byteCount; i++)
chars[charIndex + i] = Translator[bytes[byteIndex + i]];
return byteCount;
}
}
public class Mazovia : System.Text.Encoding
{
public static MazoviaEncoder GetMazoviaEncoder()
{
return new MazoviaEncoder();
}
public static MazoviaDecoder GetMazoviaDecoder()
{
return new MazoviaDecoder();
}
public override int GetByteCount(char[] chars, int index, int count)
{
return GetMazoviaEncoder().GetByteCount(chars, index, count, false);
}
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
{
return GetMazoviaEncoder().GetBytes(chars, charIndex, charCount, bytes, byteIndex, false);
}
public override int GetCharCount(byte[] bytes, int index, int count)
{
return GetMazoviaDecoder().GetCharCount(bytes, index, count);
}
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
return GetMazoviaDecoder().GetChars(bytes, byteIndex, byteCount, chars, charIndex);
}
public override int GetMaxByteCount(int charCount)
{
return charCount;
}
public override int GetMaxCharCount(int byteCount)
{
return byteCount;
}
}
public class External
{
[Microsoft.SqlServer.Server.SqlFunction()]
public static SqlString AsMazovia(SqlString text)
{
if (text.IsNull)
return SqlString.Null;
byte[] b = text.GetNonUnicodeBytes();
char[] c = new char[b.Length];
Mazovia Mazovia = new Mazovia();
Mazovia.GetChars(b, 0, b.Length, c, 0);
StringBuilder sb = new StringBuilder(b.Length);
foreach (var i in c)
sb.Append(i);
SqlString result = new SqlString(sb.ToString());
return result.Value;
}
public static string AsUnicode(string text)
{
string result = System.Text.Encoding.Unicode.GetString(
System.Text.Encoding.Convert(new Mazovia(),
System.Text.Encoding.Unicode,
System.Text.Encoding.GetEncoding(852).GetBytes(text)));
return result;
}
}
The code i'm using to load data from .DBF file looks like this:
string strAccessConn = @"Provider=vfpoledb;uid=admin;Collating Sequence=machine;Data Source=...\latin2;CodePage=852";
string strAccessSelect = "SELECT NAME FROM TABLE";
DataSet myDataSet = new DataSet();
OleDbConnection myAccessConn = null;
myAccessConn = new OleDbConnection(strAccessConn);
OleDbCommand myAccessCommand = new OleDbCommand(strAccessSelect, myAccessConn);
OleDbDataAdapter myDataAdapter = new OleDbDataAdapter(myAccessCommand);
myAccessConn.Open();
myDataAdapter.Fill(myDataSet, "TABLE");
return null;
myAccessConn.Close();
DataTableCollection dta = myDataSet.Tables;
foreach (DataTable dt in dta)
{
...
}
That is one of many of providers i'va used by bith odbc and oleDB.
Upvotes: 1
Views: 586
Reputation: 8145
I'm not sure what is exactly getting wrong in your case.
Although some articles say that it should be possible to read it using the Microsoft OLE DB Provider for Visual FoxPro (e.g. Stack Overflow: How do i read a dbase file and apply different decoding?), fortunately the DBF file format is documented (e.g. Stack Overflow: Code or Tools to Export FoxPro to dBase) and it is possible to be read without any hard-to-configure 3rd party tools.
One reliable executable specification of the file format in the Delphi
language is available in the now-abandoned repository tDBF component for Delphi and BCB - Browse /TDbf Win32_Linux at SourceForge.net.
Another now-abandoned executable specification of the file format in the C#
language (with unknown reliability) is available at https://github.com/ekonbenefits/dotnetdbf.
In particular https://github.com/ekonbenefits/dotnetdbf/blob/master/DotNetDBF/DBFReader.cs#L258 is the place where your custom-decoder might be plugged in or you might plug your Mazovia
class into https://github.com/ekonbenefits/dotnetdbf/blob/master/DotNetDBF/DBFBase.cs#L27
Using your custom C# reader under your full control should give you the OS version independence and reliability, beside that your users will not be forced to enter some driver version/32bit/64bit hell.
Upvotes: 1