convert text back from utf8 to windows 1251 (russian and ukrainian letters)
function Utf8ToWin($fcontents) {
$out = $c1 = '';
$byte2 = false;
for ($c = 0;$c < strlen($fcontents);$c++) {
$i = ord($fcontents[$c]);
if ($i <= 127) {
$out .= $fcontents[$c];
}
if ($byte2) {
$new_c2 = ($c1 & 3) * 64 + ($i & 63);
$new_c1 = ($c1 >> 2) & 5;
$new_i = $new_c1 * 256 + $new_c2;
if ($new_i == 1025) {
$out_i = 168;
} else {
if ($new_i == 1105) {
$out_i = 184;
} else {
$out_i = $new_i - 848;
}
}
// UKRAINIAN fix
switch ($out_i){
case 262: $out_i=179;break;// і
case 182: $out_i=178;break;// І
case 260: $out_i=186;break;// є
case 180: $out_i=170;break;// Є
case 263: $out_i=191;break;// ї
case 183: $out_i=175;break;// Ї
case 321: $out_i=180;break;// ґ
case 320: $out_i=165;break;// Ґ
}
$out .= chr($out_i);
$byte2 = false;
}
if ( ( $i >> 5) == 6) {
$c1 = $i;
$byte2 = true;
}
}
return $out;
}
convert_cyr_string
(PHP 4, PHP 5)
convert_cyr_string — Convert from one Cyrillic character set to another
Description
string convert_cyr_string
( string $str
, string $from
, string $to
)
Converts from one Cyrillic character set to another.
Parameters
- str
-
The string to be converted.
- from
-
The source Cyrillic character set, as a single character.
- to
-
The target Cyrillic character set, as a single character.
Supported characters are:
- k - koi8-r
- w - windows-1251
- i - iso8859-5
- a - x-cp866
- d - x-cp866
- m - x-mac-cyrillic
Return Values
Returns the converted string.
Notes
Note: This function is binary-safe.
convert_cyr_string
web at uit dot kiev dot ua
13-May-2008 03:51
13-May-2008 03:51
web at uit dot kiev dot ua
24-Apr-2008 04:42
24-Apr-2008 04:42
real convert windows-1251 to utf-8 (ukrainian, russian)
function win2utf($s) {
for($i=0, $m=strlen($s); $i<$m; $i++) {
$c=ord($s[$i]);
if ($c<=127) {$t.=chr($c); continue; }
if ($c>=192 && $c<=207) {$t.=chr(208).chr($c-48); continue; }
if ($c>=208 && $c<=239) {$t.=chr(208).chr($c-48); continue; }
if ($c>=240 && $c<=255) {$t.=chr(209).chr($c-112); continue; }
if ($c==184) { $t.=chr(209).chr(209); continue; };
if ($c==168) { $t.=chr(208).chr(129); continue; };
if ($c==184) { $t.=chr(209).chr(145); continue; }; #ё
if ($c==168) { $t.=chr(208).chr(129); continue; }; #Ё
if ($c==179) { $t.=chr(209).chr(150); continue; }; #і
if ($c==178) { $t.=chr(208).chr(134); continue; }; #І
if ($c==191) { $t.=chr(209).chr(151); continue; }; #ї
if ($c==175) { $t.=chr(208).chr(135); continue; }; #ї
if ($c==186) { $t.=chr(209).chr(148); continue; }; #є
if ($c==170) { $t.=chr(208).chr(132); continue; }; #Є
if ($c==180) { $t.=chr(210).chr(145); continue; }; #ґ
if ($c==165) { $t.=chr(210).chr(144); continue; }; #Ґ
if ($c==184) { $t.=chr(209).chr(145); continue; }; #Ґ
}
return $t;
}
apoc at ukr dot net
17-Sep-2007 07:43
17-Sep-2007 07:43
:) what about NUMBER!!!???
function Utf8Win($str,$type="w")
{
static $conv='';
if (!is_array($conv))
{
$conv = array();
for($x=128;$x<=143;$x++)
{
$conv['u'][]=chr(209).chr($x);
$conv['w'][]=chr($x+112);
}
for($x=144;$x<=191;$x++)
{
$conv['u'][]=chr(208).chr($x);
$conv['w'][]=chr($x+48);
}
$conv['u'][]=chr(208).chr(129); // Ё
$conv['w'][]=chr(168);
$conv['u'][]=chr(209).chr(145); // ё
$conv['w'][]=chr(184);
$conv['u'][]=chr(208).chr(135); // Ї
$conv['w'][]=chr(175);
$conv['u'][]=chr(209).chr(151); // ї
$conv['w'][]=chr(191);
$conv['u'][]=chr(208).chr(134); // І
$conv['w'][]=chr(178);
$conv['u'][]=chr(209).chr(150); // і
$conv['w'][]=chr(179);
$conv['u'][]=chr(210).chr(144); // Ґ
$conv['w'][]=chr(165);
$conv['u'][]=chr(210).chr(145); // ґ
$conv['w'][]=chr(180);
$conv['u'][]=chr(208).chr(132); // Є
$conv['w'][]=chr(170);
$conv['u'][]=chr(209).chr(148); // є
$conv['w'][]=chr(186);
$conv['u'][]=chr(226).chr(132).chr(150); // №
$conv['w'][]=chr(185);
}
if ($type == 'w') { return str_replace($conv['u'],$conv['w'],$str); }
elseif ($type == 'u') { return str_replace($conv['w'], $conv['u'],$str); }
else { return $str; }
}
Vasyl Skotona
15-Sep-2007 06:28
15-Sep-2007 06:28
A better function to convert cp1251 string to utf8.
Works with russian and ukrainian text.
function unicod($str) {
$conv=array();
for($x=128;$x<=143;$x++) $conv[$x+112]=chr(209).chr($x);
for($x=144;$x<=191;$x++) $conv[$x+48]=chr(208).chr($x);
$conv[184]=chr(209).chr(145); #ё
$conv[168]=chr(208).chr(129); #Ё
$conv[179]=chr(209).chr(150); #і
$conv[178]=chr(208).chr(134); #І
$conv[191]=chr(209).chr(151); #ї
$conv[175]=chr(208).chr(135); #ї
$conv[186]=chr(209).chr(148); #є
$conv[170]=chr(208).chr(132); #Є
$conv[180]=chr(210).chr(145); #ґ
$conv[165]=chr(210).chr(144); #Ґ
$conv[184]=chr(209).chr(145); #Ґ
$ar=str_split($str);
foreach($ar as $b) if(isset($conv[ord($b)])) $nstr.=$conv[ord($b)]; else $nstr.=$b;
return $nstr;
}
Sote Korveziroski
24-May-2006 11:53
24-May-2006 11:53
I have made mistake remove this test line:
echo "<p>".ord($xchr)."</p>\n";
code should be like this:
// Modificated by tapin13
// Corrected by Timuretis
// Corrected by Sote for macedonian cyrillic
// Convert win-1251 to utf-8
function unicode_mk_cyr($str) {
$encode = "";
for ($ii=0;$ii<strlen($str);$ii++) {
$xchr=substr($str,$ii,1);
if (ord($xchr)>191) {
$xchr=ord($xchr)+848;
$xchr="&#" . $xchr . ";";
}
if(ord($xchr) == 129) {
$xchr = "Ѓ";
}
if(ord($xchr) == 163) {
$xchr = "Ј";
}
if(ord($xchr) == 138) {
$xchr = "Љ";
}
if(ord($xchr) == 140) {
$xchr = "Њ";
}
if(ord($xchr) == 143) {
$xchr = "Џ";
}
if(ord($xchr) == 141) {
$xchr = "Ќ";
}
if(ord($xchr) == 189) {
$xchr = "Ѕ";
}
if(ord($xchr) == 188) {
$xchr = "ј";
}
if(ord($xchr) == 131) {
$xchr = "ѓ";
}
if(ord($xchr) == 190) {
$xchr = "ѕ";
}
if(ord($xchr) == 154) {
$xchr = "љ";
}
if(ord($xchr) == 156) {
$xchr = "њ";
}
if(ord($xchr) == 159) {
$xchr = "џ";
}
if(ord($xchr) == 157) {
$xchr = "ќ";
}
$encode=$encode . $xchr;
}
return $encode;
}
Sote Korveziroski
24-May-2006 07:24
24-May-2006 07:24
Only this code works OK for me, for translating win-1251 to utf-8 for macedonian letters!
// Modificated by tapin13
// Corrected by Timuretis
// Corrected by Sote for macedonian cyrillic
// Convert win-1251 to utf-8
function unicode_mk_cyr($str) {
$encode = "";
for ($ii=0;$ii<strlen($str);$ii++) {
$xchr=substr($str,$ii,1);
echo "<p>".ord($xchr)."</p>\n";
if (ord($xchr)>191) {
$xchr=ord($xchr)+848;
$xchr="&#" . $xchr . ";";
}
if(ord($xchr) == 129) {
$xchr = "Ѓ";
}
if(ord($xchr) == 163) {
$xchr = "Ј";
}
if(ord($xchr) == 138) {
$xchr = "Љ";
}
if(ord($xchr) == 140) {
$xchr = "Њ";
}
if(ord($xchr) == 143) {
$xchr = "Џ";
}
if(ord($xchr) == 141) {
$xchr = "Ќ";
}
if(ord($xchr) == 189) {
$xchr = "Ѕ";
}
if(ord($xchr) == 188) {
$xchr = "ј";
}
if(ord($xchr) == 131) {
$xchr = "ѓ";
}
if(ord($xchr) == 190) {
$xchr = "ѕ";
}
if(ord($xchr) == 154) {
$xchr = "љ";
}
if(ord($xchr) == 156) {
$xchr = "њ";
}
if(ord($xchr) == 159) {
$xchr = "џ";
}
if(ord($xchr) == 157) {
$xchr = "ќ";
}
$encode=$encode . $xchr;
}
return $encode;
}
zehyaat] yandex dotru
23-Mar-2006 09:15
23-Mar-2006 09:15
Sorry for my previous post. NOT array_reverce, array_flip is actual function. Correct function:
function Encode($str,$type=u)
{
$conv=array();
for($x=192;$x<=239;$x++)
$conv[u][chr($x)]=chr(208).chr($x-48);
for($x=240;$x<=255;$x++)
$conv[u][chr($x)]=chr(209).chr($x-112);
$conv[u][chr(168)]=chr(208).chr(129);
$conv[u][chr(184)]=chr(209).chr(209);
$conv[w]=array_flip($conv[u]);
if($type=='w' || $type=='u')
return strtr($str,$conv[$type]);
else
return $str;
}
Sorry for my English ;)
zehya [at] yandex dotru
23-Mar-2006 08:58
23-Mar-2006 08:58
cathody at mail dot ru(27-Jul-2005 06:41)
You function doesn't work on my PC..
It's work:
function Encode2($str,$type)
{
$conv=array();
for($x=192;$x<=239;$x++)
$conv[u][chr($x)]=chr(208).chr($x-48);
for($x=240;$x<=255;$x++)
$conv[u][chr($x)]=chr(209).chr($x-112);
$conv[u][chr(168)]=chr(208).chr(129);
$conv[u][chr(184)]=chr(209).chr(209);
$conv[w]=array_reverse($conv[u]);
if($type=='w' || $type=='u')
return strtr($str,$conv[$type]);
else
return $str;
}
sidor <sidor at sidor dot nnov dot ru>
09-Mar-2006 06:23
09-Mar-2006 06:23
Sorry for my English
100% worked function for convertion string to utf-8. In this implementation support main cyrilic encodings (cp1251, koi8-r, cp866, mac) For supporting another codepages - just add needed codepage in $recode array (codes in UCS-4. Add just second part of codetable). Second argument for this function for cyrilic codepages - like in convert_cyr_string function ('k','w','a','d','m')
Writed in accordance with rfc2279
Created by Andrey A Sidorenko aka sidor
http://sidor.nnov.ru/str2utf.txt
Timuretis
06-Nov-2005 03:56
06-Nov-2005 03:56
// Modificated by tapin13
// Corrected by Timuretis
// Convert win-1251 to utf-8
function unicode_russian($str) {
$encode = "";
// 1025 = "Ё";
// 1105 = "ё";
for ($ii=0;$ii<strlen($str);$ii++) {
$xchr=substr($str,$ii,1);
if (ord($xchr)>191) {
$xchr=ord($xchr)+848;
$xchr="&#" . $xchr . ";";
}
if(ord($xchr) == 168) {
// $xchr = "Ё";
$xchr = "Ё"; //!!!!!!!!!!!!!!!!!!!!!!!
}
if(ord($xchr) == 184) {
// $xchr = "ё";
$xchr = "ё"; //!!!!!!!!!!!!!!!!!!!!!!
}
$encode=$encode . $xchr;
}
return $encode;
}
tapin13 at atilian dot co dot il
18-Oct-2005 11:20
18-Oct-2005 11:20
// Modificated by tapin13
// Convert win-1251 to utf-8
function unicode_russian($str) {
$encode = "";
// 1025 = "Ё";
// 1105 = "ё";
for ($ii=0;$ii<strlen($str);$ii++) {
$xchr=substr($str,$ii,1);
if (ord($xchr)>191) {
$xchr=ord($xchr)+848;
$xchr="&#" . $xchr . ";";
}
if(ord($xchr) == 168) {
$xchr = "Ё";
}
if(ord($xchr) == 184) {
$xchr = "ё";
}
$encode=$encode . $xchr;
}
return $encode;
}
webmaster at chassidus dot ru
30-Aug-2005 07:51
30-Aug-2005 07:51
//I've also built the same way for hebrew to utf converting
function heb2utf($s) {
for($i=0, $m=strlen($s); $i<$m; $i++) {
$c=ord($s[$i]);
if ($c<=127) {$t.=chr($c); continue; }
if ($c>=224 ) {$t.=chr(215).chr($c-80); continue; }
}
return $t;
}
//Simple unicoder and decoder for hebrew and russian:
function unicode_hebrew($str) {
for ($ii=0;$ii<strlen($str);$ii++) {
$xchr=substr($str,$ii,1);
if (ord($xchr)>223) {
$xchr=ord($xchr)+1264;
$xchr="&#" . $xchr . ";";
}
$encode=$encode . $xchr;
}
return $encode;
}
function unicode_russian($str) {
for ($ii=0;$ii<strlen($str);$ii++) {
$xchr=substr($str,$ii,1);
if (ord($xchr)>191) {
$xchr=ord($xchr)+848;
$xchr="&#" . $xchr . ";";
}
$encode=$encode . $xchr;
}
return $encode;
}
function decode_unicoded_hebrew($str) {
$decode="";
$ar=split("&#",$str);
foreach ($ar as $value ) {
$in1=strpos($value,";"); //end of code
if ($in1>0) {// unicode
$code=substr($value,0,$in1);
if ($code>=1456 and $code<=1514) { //hebrew
$code=$code-1264;
$xchr=chr($code);
} else { //other unicode
$xchr="&#" . $code . ";";
}
$xchr=$xchr . substr($value,$in1+1);
} else //not unicode
$xchr = $value;
$decode=$decode . $xchr;
}
return $decode;
}
function decode_unicoded_russian($str) {
$decode="";
$ar=split("&#",$str);
foreach ($ar as $value ) {
$in1=strpos($value,";"); //end of code
if ($in1>0) {// unicode
$code=substr($value,0,$in1);
if ($code>=1040 and $code<=1103) {
$code=$code-848;
$xchr=chr($code);
} else {
$xchr="&#" . $code . ";";
}
$xchr=$xchr . substr($value,$in1+1);
} else
$xchr = $value;
$decode=$decode . $xchr;
}
return $decode;
}
cathody at mail dot ru
27-Jul-2005 03:41
27-Jul-2005 03:41
Praising other people for their efforts to write a convenient UTF8 to Win-1251 functions may I mention that, since str_replace allows arrays as parameters, the function may be rewritten in a slightly efficient way (moreover, the array generated may be stored for performance improvement):
<?php
function Encode ( $str, $type )
{
// $type:
// 'w' - encodes from UTF to win
// 'u' - encodes from win to UTF
static $conv='';
if (!is_array ( $conv ))
{
$conv=array ();
for ( $x=128; $x <=143; $x++ )
{
$conv['utf'][]=chr(209).chr($x);
$conv['win'][]=chr($x+112);
}
for ( $x=144; $x <=191; $x++ )
{
$conv['utf'][]=chr(208).chr($x);
$conv['win'][]=chr($x+48);
}
$conv['utf'][]=chr(208).chr(129);
$conv['win'][]=chr(168);
$conv['utf'][]=chr(209).chr(145);
$conv['win'][]=chr(184);
}
if ( $type=='w' )
return str_replace ( $conv['utf'], $conv['win'], $str );
elseif ( $type=='u' )
return str_replace ( $conv['win'], $conv['utf'], $str );
else
return $str;
}
?>
artyomch at coolfold dot com
26-Apr-2005 10:38
26-Apr-2005 10:38
I needed a code for taking UTF8 encoded string from DB and printing it in Win1251 encoded HTML. The problem was that I had to print not just english & cyrillic characters, but all characters stored in UTF encoded string (in my case the DB contained english, russian & hebrew characters).
After reading carefully the UTF8 manual, I've written the following code, that converts all non-win1251 characters into html entities (&#XXXX;).
function utf8_2_win1251 ($str_src)
{
$str_dst = "";
$i = 0;
while ($i<strlen($str_src))
{
$code_dst = 0;
$code_src1 = ord($str_src[$i]);
$i++;
if ($code_src1<=127)
{
$str_dst .= chr($code_src1);
continue;
}
else
if (($code_src1 & 0xE0) == 0xC0)
{
$code_src2 = ord($str_src[$i++]);
if (($code_src2 & 0xC0) != 0x80)
continue;
$code_dst = ( ($code_src1 & 0x1F) << 6) + ($code_src2 & 0x3F);
}
else
if (($code_src1 & 0xF0) == 0xE0)
{
$code_src2 = ord($str_src[$i++]);
if (($code_src2 & 0xC0) != 0x80)
continue;
$code_src3 = ord($str_src[$i++]);
if (($code_src3 & 0xC0) != 0x80)
continue;
$code_dst = ( ($code_src1 & 0x1F) << 12) + ( ($code_src2 & 0x3F) << 6) + ($code_src3 & 0x3F);
}
else
if (($code_src1 & 0xF8) == 0xF0)
{
$code_src2 = ord($str_src[$i++]);
if (($code_src2 & 0xC0) != 0x80)
continue;
$code_src3 = ord($str_src[$i++]);
if (($code_src3 & 0xC0) != 0x80)
continue;
$code_src4 = ord($str_src[$i++]);
if (($code_src4 & 0xC0) != 0x80)
continue;
$code_dst = ( ($code_src1 & 0x1F) << 18) + ( ($code_src2 & 0x3F) << 12) + ( ($code_src3 & 0x3F) << 6) + ($code_src4 & 0x3F);
}
else
{
continue;
}
if ($code_dst)
{
if ($code_dst==0x401)
{
$str_dst .= "";
}
else
if ($code_dst==0x451)
{
$str_dst .= "";
}
else
if ( ($code_dst>=0x410) && ($code_dst<=0x44F) )
{
$str_dst .= chr ($code_dst-848);
}
else
$str_dst .= "&#{$code_dst};";
}
}
return $str_dst;
}
felix[at]tvpro.net.ru
13-Feb-2005 09:57
13-Feb-2005 09:57
Check this code -- exelent to convert win-1251 to UTF-8
just one fix!!!
if ($c==184) { $t.=chr(209).chr(145); continue; };
Anything more it is not necessary.
It is grateful to threed [at] koralsoft.com
28-Jul-2003 03:37
i tried all functions here to convert from cp1251 to unicode, but they don't work. i think that this work :
<?php
function win3utf($s) {
for($i=0, $m=strlen($s); $i<$m; $i++) {
$c=ord($s[$i]);
if ($c<=127) {$t.=chr($c); continue; }
if ($c>=192 && $c<=207) {$t.=chr(208).chr($c-48); continue; }
if ($c>=208 && $c<=239) {$t.=chr(208).chr($c-48); continue; }
if ($c>=240 && $c<=255) {$t.=chr(209).chr($c-112); continue; }
if ($c==184) { $t.=chr(209).chr(209); continue; };
if ($c==168) { $t.=chr(208).chr(129); continue; };
}
return $t;
}
?>
info at newstrack dot ru
02-Feb-2005 01:50
02-Feb-2005 01:50
Most useful conversion class is here http://mikolajj.republika.pl/
recommended for all
webmaster [ at ] platinumgeneration dot com
11-Jan-2005 03:36
11-Jan-2005 03:36
Here's a WORKING function to convert from UTF-8 to Windows-1251, if your hosting provider does not support iconv
<?php
for ($c=0;$c<strlen($s);$c++){
$i=ord($s[$c]);
if ($i<=127) $out.=$s[$c];
if ($byte2){
$new_c2=($c1&3)*64+($i&63);
$new_c1=($c1>>2)&5;
$new_i=$new_c1*256+$new_c2;
if ($new_i==1025){
$out_i=168;
} else {
if ($new_i==1105){
$out_i=184;
} else {
$out_i=$new_i-848;
}
}
$out.=chr($out_i);
$byte2=false;
}
if (($i>>5)==6) {
$c1=$i;
$byte2=true;
}
}
return $out;
}
?>
standov at cgu dot kiev dot ua
08-Dec-2004 10:05
08-Dec-2004 10:05
He is improved function to decode win1251->UTF8
<?php
function win2utf($s){
$c209 = chr(209); $c208 = chr(208); $c129 = chr(129);
for($i=0; $i<strlen($s); $i++) {
$c=ord($s[$i]);
if ($c>=192 and $c<=239) $t.=$c208.chr($c-48);
elseif ($c>239) $t.=$c209.chr($c-112);
elseif ($c==184) $t.=$c209.$c209;
elseif ($c==168) $t.=$c208.$c129;
else $t.=$s[$i];
}
return $t;
}
?>
aeon
28-Dec-2003 05:20
28-Dec-2003 05:20
threed's function works great, but the replacement for the letter small io (ё) needs to be changed from
<?php
if ($c==184) { $t.=chr(209).chr(209); continue; };
?>
to
<?php
if ($c==184) { $t.=chr(209).chr(145); continue; };
?>
so, the final working result should look like this:
<?php
function win3utf($s) {
for($i=0, $m=strlen($s); $i<$m; $i++) {
$c=ord($s[$i]);
if ($c<=127) {$t.=chr($c); continue; }
if ($c>=192 && $c<=207) {$t.=chr(208).chr($c-48); continue; }
if ($c>=208 && $c<=239) {$t.=chr(208).chr($c-48); continue; }
if ($c>=240 && $c<=255) {$t.=chr(209).chr($c-112); continue; }
if ($c==184) { $t.=chr(209).chr(209); continue; };
if ($c==168) { $t.=chr(208).chr(129); continue; };
}
return $t;
}
?>
almi at univ dot kiev dot ua
07-Dec-2003 12:43
07-Dec-2003 12:43
to convert cyrillic string to UTF-8 you can use icovn() function. It does work!
<?php
echo iconv ('CP1251','UTF-8','some cyr string');
?>
But you should copile your php '--with-iconv=[DIR]'
threed [at] koralsoft.com
28-Jul-2003 01:37
28-Jul-2003 01:37
i tried all functions here to convert from cp1251 to unicode, but they don't work. i think that this work :
<?php
function win3utf($s) {
for($i=0, $m=strlen($s); $i<$m; $i++) {
$c=ord($s[$i]);
if ($c<=127) {$t.=chr($c); continue; }
if ($c>=192 && $c<=207) {$t.=chr(208).chr($c-48); continue; }
if ($c>=208 && $c<=239) {$t.=chr(208).chr($c-48); continue; }
if ($c>=240 && $c<=255) {$t.=chr(209).chr($c-112); continue; }
if ($c==184) { $t.=chr(209).chr(209); continue; };
if ($c==168) { $t.=chr(208).chr(129); continue; };
}
return $t;
}
?>
checat at chat dot ru
12-May-2003 01:59
12-May-2003 01:59
See also more general
http://www.php.net/manual/ref.iconv.php iconv functions (standart in last glibc) and
http://www.php.net/manual/ref.recode.php Recode functions
german at artexpert dot ee
04-May-2003 02:13
04-May-2003 02:13
previous bit of code (grmaxim's win_to_utf8 function) didn't work for me, so I wrote my own func to convert from win1251 to utf8:
<?php
function win2utf($s) {
for($i=0,$m=strlen($s);$i<$m;$i++) {
$c=ord($s[$i]);
if ($c>127) // convert only special chars
if ($c==184) $t.=chr(209).chr(209); // small io
elseif ($c==168) $t.=chr(208).chr(129); // capital io
else $t.=($c>239?chr(209):chr(208)).chr($c-48);
else $t.=$s[$i];
}
return $t;
}
?>
Hope this helps
grmaxim at givc dot ru
17-Apr-2003 05:21
17-Apr-2003 05:21
I hope it to you we shall help
<?php
function win_to_utf8($str){
$str = convert_cyr_string($str, 'w','i'); // w - windows-1251 to i - iso8859-5
$str = utf8_encode ($str); // iso8859-5 to utf8
return $str;
}
function utf8_to_win($str){
$str = utf8_decode ($str); // utf8 to iso8859-5
$str = convert_cyr_string($str, 'i','w'); // w - windows-1251 to i - iso8859-5
return $str;
}
?>
If there are questions - shall help. Good luck friends!!!
pavel_bashkatov at elkogroup dot com
17-Apr-2003 02:20
17-Apr-2003 02:20
To: mihailsbo at lycos dot ru
Transliteration could be done easier:
<?
function transliterate($cyrstr)
{
$ru = array('A', 'a',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?',
'?', '?');
$en = array('A', 'a',
'B', 'b',
'V', 'v',
'G', 'g',
'D', 'd',
'E', 'e',
'E', 'e',
'Zh', 'zh',
'Z', 'z',
'I', 'i',
'J', 'j',
'K', 'k',
'L', 'l',
'M', 'm',
'N', 'n',
'O', 'o',
'P', 'p',
'R', 'r',
'S', 's',
'T', 't',
'U', 'u',
'F', 'f',
'H', 'h',
'C', 'c',
'Ch', 'ch',
'Sh', 'sh',
'Sch', 'sch',
'\'', '\'',
'Y', 'y',
'\'', '\'',
'E', 'e',
'Ju', 'ju',
'Ja', 'ja');
return str_replace($ru, $en, $cyrstr);
}
?>
mihailsbo at lycos dot ru
03-Mar-2003 12:15
03-Mar-2003 12:15
<?php
// Here is a function that may be useful if you wish
// to convert cyrillic text (windows-1251) to english
// letters (e.g. for sending to a cell phone)
function transliterate( $text )
{
$cyrlet = 'Ũ'.
'';
$englet = 'ABVGD ZIJKLMNOPRSTUFHC `Y`E '.
'abvgd zijklmnoprstufhc `y`e ';
$result = '';
for ( $i=0; $i<strlen($text); $i++ ) {
$c1 = $text[ $i ];
$p1 = strpos( $cyrlet, $c1 );
if ( $p1 === FALSE ) { $result .= $c1; continue; }
$ct = $englet[ $p1 ];
if ( $ct != ' ' ) { $result .= $ct; continue; }
switch ( $c1 )
{
case '':
$ct = 'Je';
break;
case '':
$ct = 'e';
break;
case '':
$ct = 'Jo';
break;
case '':
$ct = 'jo';
break;
case '':
$ct = 'Zh';
break;
case '':
$ct = 'zh';
break;
case '':
$ct = 'Ch';
break;
case '':
$ct = 'ch';
break;
case '':
$ct = 'Sh';
break;
case '':
$ct = 'sh';
break;
case '':
$ct = 'Sch';
break;
case '':
$ct = 'sch';
break;
case '':
$ct = 'Ju';
break;
case '':
$ct = 'ju';
break;
case '':
$ct = 'Ja';
break;
case '':
$ct = 'ja';
break;
default:
$ct = '?';
}
$result .= $ct;
}
return $result;
}
?>
// P.S. Thanks to PHP developers for rich and convenient
// programming language!
mitya at alesh dot ru
25-Sep-2002 11:21
25-Sep-2002 11:21
there is a little script that convert utf to cp1251
<?php
function u8($win,$h,$t) {
global $w8;
$w8[chr($h).chr($t)] = $win;
}
$c1 = chr(208);
$c2 = chr(209);
u8("",208,185); u8("",209,134); u8("",209,131);
u8("",208,186); u8("",208,181); u8("",208,189);
u8("",208,179); u8("",209,136); u8("",209,137);
u8("",208,183); u8("",209,133); u8("",209,138);
u8("",209,132); u8("",209,139); u8("",208,178);
u8("",208,176); u8("",208,191); u8("",209,128);
u8("",208,190); u8("",208,187); u8("",208,180);
u8("",208,182); u8("",209,141); u8("",209,143);
u8("",209,135); u8("",209,129); u8("",208,188);
u8("",208,184); u8("",209,130); u8("",209,140);
u8("",208,177); u8("",209,142); u8("",208,153);
u8("",208,166); u8("",208,163); u8("",208,154);
u8("",208,149); u8("",208,157); u8("",208,147);
u8("",208,168); u8("",208,169); u8("",208,151);
u8("",208,165); u8("",208,170); u8("",208,164);
u8("",208,171); u8("",208,146); u8("",208,144);
u8("",208,159); u8("",208,160); u8("",208,158);
u8("",208,155); u8(""