作者:佚名 | 来源:网络 | 添加时间:2006-06-16 20:05:47 | 人气:18378
在中文网站的开发中,无论你是否注意到,事实上系统都在使用某一种编码方式来处理中文,如GB2312,UTF-8等,前者是经常使用的代码页,而后者则能够兼容所有的客户访问端,是发展的方向。
在处理过程中常常能够见到的情形是在URL或者POST的数据中,中文被编码成为%E8%87%AA这样的形式,事实上,在.net中要将这些编码还原为UTF-8中文字符并不困难,原理与代码如下:
1.每个中文字符占3个字节并以%开始,因此%E8%87%AA是一个中文字符(自),而事实上E8,87,AA都是16进制的数字,分别为高4位和低4位
2.只要能够将每两个十六进制表示转换成为数字,并存入到byte中,然后将得到的Byte数组用UTF8解码器解码即可
public class UTF8Operator
{
/// <summary>
/// This method is used to convert the string from "%E6%9D%B1" to UTF8 Chinese Chracter
/// </summary>
/// <param name="theInput"></param>
/// <returns></returns>
public static string ConvertGB2UTF8(string theInput)
{
//Only the string whhich starts with % will be handled
if (theInput.StartsWith("%"))
{
UTF8Encoding utf8 = new UTF8Encoding();
String theString = theInput;
Byte[] bytes = new byte[theString.Length*3/9];
string [] split = theString.Split(Convert.ToChar("%"));
int i=0;
foreach (string s in split)
{
string inputStr = s.Trim();
int theValue = 0;
if(inputStr.Length == 2)
{
//Get the high position
theValue = theValue + ConvertHexToDec(inputStr.Substring(0,1))*16;
//Get the low position
theValue = theValue + ConvertHexToDec(inputStr.Substring(1,1));
//Only the index of bytes array is less than the length
if (i < bytes.Length)
{
bytes = (byte)theValue;
}
i++;
}
}
//Use the UTF8Encoding object to convert the bytes to string
string theResult = utf8.GetString(bytes);
return theResult;
}
else
{
return theInput;
}
}
//Convert the Hex Code to Dec. A-15, E-14....
public static int ConvertHexToDec(string theValue)
{
string myValue = theValue.Trim().ToUpper();
switch(myValue)
{
case "1":
return 1;
case "2":
return 2;
case "3":
return 3;
case "4":
return 4;
case "5":
return 5;
case "6":
return 6;
case "7":
return 7;
case "8":
return 8;
case "9":
return 9;
case "A":
return 10;
case "B":
return 11;
case "C":
return 12;
case "D":
return 13;
case "E":
return 14;
case "F":
return 15;
default:
return 0;
}
}
}