为了让netsurf有复制粘贴功能, 使用CF_TEXT对英文没问题,对汉字就乱码了,因此一定要使用CF_UNICODETEXT。
这里就引出几个问题:
1. netsurf内部编码是utf-8的,如何换成utf-16给windows?
2. 操作剪贴板的例程?
3. 宽字符串的操作,比如取长度?
前两点容易搜索到,第3点有几个坑需要注意:
1. printf的参数是char, wprintf的参数的wchar_t,因此使用宽字符函数时一定要检查全部入口参数;
2. sprintf()没有maxlen, swprintf有:
int swprintf(wchar_t *wcs, size_t maxlen, const wchar_t *format, ...);
这个可把我折腾了一阵子。
贴上测试代码:
/*
wchar_t *wcs = L"xxx";
在x86机器上,编译器自动把L"xxx" 从UTF-8 转换为UCS-2LE。
不同称呼:
STD C Win
char ansi mbcs
wchar wchar_t WCHAR
sizeof(wchar_t) 在win上是2,在unix glibc为4.
*/
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <locale.h>
#ifdef _WIN32
#include <windows.h>
#define LC_STR "chs"
#else
#define LC_STR "zh_CN.UTF-8"
#endif
void dump_bytes(char *prefix, void *data, int size)
{
int i;
unsigned char *ptr = data;
printf("%s", prefix);
for (i = 0; i < size; ++i){
printf(" %02x", ptr[i]); //*(unsigned char*)data + i);
}
printf("\n");
}
wchar_t* char2wchar(char *mbcs, int length, int *len)
{
#ifdef _WIN32
int wlen = MultiByteToWideChar(CP_UTF8, 0, mbcs, length, NULL, 0);
wchar_t* wstring = malloc(sizeof(wchar_t) * (wlen + 1));
if (wstring == NULL) {
return 0;
}
MultiByteToWideChar(CP_UTF8, 0, mbcs, length, wstring, wlen);
if(len)*len = wlen;
return wstring;
#else
return NULL; /*todo*/
#endif
}
int main(int argc, char **argv)
{
wchar_t *w2, *w1 = L"百度一下,你就知道";
char *mbcs = "百度一下,你就知道";
char buf[1024] = "";
wchar_t wbuf[1024] = L"";
int l1, l2;
setlocale(LC_ALL, LC_STR);
printf("sizeof(wchar_t) %d\n", sizeof(wchar_t));
l1 = wcslen(w1);
wprintf(L"wcs %ls len %d\n", w1, l1);
sprintf(buf, "%ls %d %ls %s\n", w1, 123, w1, "end");
printf("%s", buf);
dump_bytes("L", w1, l1*2);
w2 = char2wchar(mbcs, strlen(mbcs), &l2);
printf("%d %ls %s\n", 123, w2, "end");
dump_bytes("C", w2, l2*2);
free(w2);
wchar_t w3[] = {0x0057, 0x0069, 0x006b, 0x0069, 0}; //L"WiKi";
memset(buf, 0, sizeof(buf));
sprintf(buf, "%ls==%ls", w3, w3);
printf("%s\n", buf);
dump_bytes("@", buf, strlen(buf));
memset(wbuf, 0, sizeof(wbuf));
swprintf(wbuf, sizeof(wbuf)/sizeof(wbuf[0]), L"%ls==%ls", w3, w3);
wprintf(L"%ls\n", wbuf);
dump_bytes("#", wbuf, wcslen(wbuf)*2);
return 0;
}
/*set fileencoding=utf-8*/
最后贴上我对netsurf的修改:
void gui_get_clipboard(char **buffer, size_t *length)
{
HANDLE clipboard_handle;
char *out = NULL;
wchar_t *content = NULL;
int wlen, ret = -1;
ret = OpenClipboard(input_window->main);
clipboard_handle = GetClipboardData(CF_UNICODETEXT);
if (clipboard_handle != NULL) {
content = GlobalLock(clipboard_handle);
wlen = wcslen(content);
ret = utf8_convert((char*)content, wlen*sizeof(wchar_t), "UCS-2LE", "UTF-8", &out);
if(UTF8_CONVERT_OK == ret){
*buffer = out;
*length = strlen(out);
}else{
*buffer = NULL;
*length = 0;
}
GlobalUnlock(clipboard_handle);
}
CloseClipboard();
}
void gui_set_clipboard(const char *buffer, size_t length,
nsclipboard_styles styles[], int n_styles)
{
HANDLE h, hnew;
wchar_t *orig, *new, *wbuf = NULL;
int ret1 = -1, ret2 = -1;
size_t len2, len;
if(!OpenClipboard(input_window->main))return;
h = GetClipboardData(CF_UNICODETEXT);
orig = (!h) ? L"" : GlobalLock(h);
len = wcslen(orig);
ret2 = utf8_convert(buffer, length, "UTF-8", "UCS-2LE", (char**)&wbuf);
len2 = wcslen(wbuf);
hnew = GlobalAlloc(GHND, (len + len2 + 1)*sizeof(wchar_t));
new = (wchar_t *)GlobalLock(hnew);
swprintf(new, (len + len2 + 1), L"%ls%ls", orig, wbuf);
if(h){
GlobalUnlock(h);
EmptyClipboard();
}
GlobalUnlock(hnew);
SetClipboardData(CF_UNICODETEXT, hnew);
free(wbuf);
CloseClipboard();
}