ifstream讀取unicode文本到string時,需要過濾文本開始ff fe兩個字節,否則轉成gbk會失敗。
ifstream讀取utf8文本到string時,需要過濾文本開始efbbbf三個字節,否則轉成gbk會失敗。
下面是實現代碼:
#include <iostream>#include <string>#include <fstream>#include <iconv.h>using namespace std;#PRagma comment(lib,"libIconv.lib")//編碼轉換,source_charset是源編碼,to_charset是目標編碼std::string code_convert(char *source_charset, char *to_charset, const std::string& sourceStr) //sourceStr是源編碼字符串{ iconv_t cd = iconv_open(to_charset, source_charset);//獲取轉換句柄,void*類型 if (cd == 0) return ""; size_t inlen = sourceStr.size(); size_t outlen = 255; char* inbuf = (char*)sourceStr.c_str(); char outbuf[255];//這里實在不知道需要多少個字節,這是個問題 //char *outbuf = new char[outlen]; 另外outbuf不能在堆上分配內存,否則轉換失敗,猜測跟iconv函數有關 memset(outbuf, 0, outlen); char *poutbuf = outbuf; //多加這個轉換是為了避免iconv這個函數出現char(*)[255]類型的實參與char**類型的形參不兼容 if (iconv(cd, &inbuf, &inlen, &poutbuf,&outlen) == -1) return ""; std::string strTemp(outbuf);//此時的strTemp為轉換編碼之后的字符串 iconv_close(cd); return strTemp;}//gbk轉UTF-8 std::string GbkToUtf8(const std::string& strGbk)// 傳入的strGbk是GBK編碼 { return code_convert("gb2312", "utf-8",strGbk);}//UTF-8轉gbkstd::string Utf8ToGbk(const std::string& strUtf8){ return code_convert("utf-8", "gb2312", strUtf8);}//gbk轉unicode,"UCS-2LE"代表unicode小端模式std::string GbkToUnicode(const std::string& strGbk)// 傳入的strGbk是GBK編碼 { return code_convert("gb2312", "UCS-2LE",strGbk);}//unicode轉gbkstd::string UnicodeToGbk(const std::string& strGbk)// 傳入的strGbk是GBK編碼 { return code_convert("UCS-2LE", "gb2312",strGbk);}int main() { //1、讀取"ANSI.txt" ifstream in("ANSI.txt"); string strGbk; in>>strGbk; in.close(); cout<<strGbk<<endl; int num = strGbk.size();//獲取兩個字符數,也是我字所占的字節數 unsigned char* p = (unsigned char*)strGbk.c_str(); for (int i = 0; i < num; i++) { printf("%0x", *p); p++; } //輸出ced2 所以我的GBK編碼是0xced2 printf("/n"); //2、讀取"unicode.txt" in.open("unicode.txt"); //過濾文本開始ff fe兩個字節 char a; in>>a; in>>a; string strUnicode; in >> strUnicode; in.close(); cout<<UnicodeToGbk(strUnicode)<<endl;//轉成gbk輸出 num = strUnicode.size(); p = (unsigned char*)strUnicode.c_str(); for (int i = 0; i < num; i++) { printf("%0x", *p); p++; } //輸出1162 因為默認是小端模式,所以我的unicode編碼是0x6211 printf("/n"); //3、讀取"utf8.txt" in.open("utf8.txt"); //過濾文本開始efbbbf三個字節 char b; in>>b; in>>b; in>>b; string strUtf8; in>>strUtf8; in.close(); cout<<Utf8ToGbk(strUtf8)<<endl;//轉成gbk輸出 num = strUtf8.size(); p = (unsigned char*)strUtf8.c_str(); for (int i = 0; i < num; i++) { printf("%0x", *p); p++; } //輸出e68891 printf("/n"); return 0;}
|
新聞熱點
疑難解答