Reputation: 337
I am querying a server for some information (returned as JSON), inter alia a list of names, and one of those names returns containing weird characters:
Ðемања Матејић
This is how it should be:
Немања Матејић
I have tried the following:
json_decode
and directly take the name and insert it into my UTF8-encoded MySQL database.... to no avail - the value in the database still remains flawed.
How to solve this?
Edit:
Running SHOW VARIABLES LIKE '%character%'
returns
character_set_client utf8
character_set_connection utf8
character_set_database utf8
character_set_filesystem binary
character_set_results utf8
character_set_server latin1
character_set_system utf8
character_sets_dir /data/mysql/fuentez/share/mysql/charsets/
Is it possibly because character_set_server is latin1?
Upvotes: 0
Views: 525
Reputation: 325
This is may be a kind handy to have json formath with russian simbols:
new json_decode fuunction
<?php
function json_encode_my($value)
{
if (is_int($value)) {
return (string)$value;
} elseif (is_string($value)) {
$value = str_replace(array('\\', '/', '"', "\r", "\n", "\b", "\f", "\t"),
array('\\\\', '\/', '\"', '\r', '\n', '\b', '\f', '\t'), $value);
$convmap = array(0x80, 0xFFFF, 0, 0xFFFF);
$result = "";
for ($i = mb_strlen($value) - 1; $i >= 0; $i--) {
$mb_char = mb_substr($value, $i, 1);
if (mb_ereg("&#(\\d+);", mb_encode_numericentity($mb_char, $convmap, "UTF-8"), $match)) {
$result = sprintf("\\u%04x", $match[1]) . $result;
} else {
$result = $mb_char . $result;
}
}
return '"' . $result . '"';
} elseif (is_float($value)) {
return str_replace(",", ".", $value);
} elseif (is_null($value)) {
return 'null';
} elseif (is_bool($value)) {
return $value ? 'true' : 'false';
} elseif (is_array($value)) {
$with_keys = false;
$n = count($value);
for ($i = 0, reset($value); $i < $n; $i++, next($value)) {
if (key($value) !== $i) {
$with_keys = true;
break;
}
}
} elseif (is_object($value)) {
$with_keys = true;
} else {
return '';
}
$result = array();
if ($with_keys) {
foreach ($value as $key => $v) {
$result[] = json_encode((string)$key) . ':' . json_encode($v);
}
return '{' . implode(',', $result) . '}';
} else {
foreach ($value as $key => $v) {
$result[] = json_encode($v);
}
return '[' . implode(',', $result) . ']';
}
}
?>
Upvotes: 0
Reputation: 839234
You stored the data in the database as latin1 instead of UTF-8.
For example the string е
encoded as latin1 becomes 0xd0 0xb5
which is the UTF-8 encoding of the Cyrillic letter е
.
Upvotes: 1