123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112 |
- #if defined(_WIN32)
- # include <stdio.h>
- # include <stdlib.h>
- # include <wchar.h>
- /*We need the following two to set stdin/stdout to binary.*/
- # include <io.h>
- # include <fcntl.h>
- # define WIN32_LEAN_AND_MEAN
- # define WIN32_EXTRA_LEAN
- # include <windows.h>
- # include "win32utf8.h"
- static char *utf16_to_utf8(const wchar_t *_src){
- char *dst;
- size_t len;
- size_t si;
- size_t di;
- len=wcslen(_src);
- dst=(char *)malloc(sizeof(*dst)*(3*len+1));
- if(dst==NULL)return dst;
- for(di=si=0;si<len;si++){
- unsigned c0;
- c0=_src[si];
- if(c0<0x80){
- /*Can be represented by a 1-byte sequence.*/
- dst[di++]=(char)c0;
- continue;
- }
- else if(c0<0x800){
- /*Can be represented by a 2-byte sequence.*/
- dst[di++]=(char)(0xC0|c0>>6);
- dst[di++]=(char)(0x80|c0&0x3F);
- continue;
- }
- else if(c0>=0xD800&&c0<0xDC00){
- unsigned c1;
- /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
- c1=_src[si+1];
- if(c1>=0xDC00&&c1<0xE000){
- unsigned w;
- /*Surrogate pair.*/
- w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000;
- /*Can be represented by a 4-byte sequence.*/
- dst[di++]=(char)(0xF0|w>>18);
- dst[di++]=(char)(0x80|w>>12&0x3F);
- dst[di++]=(char)(0x80|w>>6&0x3F);
- dst[di++]=(char)(0x80|w&0x3F);
- si++;
- continue;
- }
- }
- /*Anything else is either a valid 3-byte sequence, an invalid surrogate
- pair, or 'not a character'.
- In the latter two cases, we just encode the value as a 3-byte
- sequence anyway (producing technically invalid UTF-8).
- Later error handling will detect the problem, with a better
- chance of giving a useful error message.*/
- dst[di++]=(char)(0xE0|c0>>12);
- dst[di++]=(char)(0x80|c0>>6&0x3F);
- dst[di++]=(char)(0x80|c0&0x3F);
- }
- dst[di++]='\0';
- return dst;
- }
- typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line,
- int *num_args);
- /*Make a best-effort attempt to support UTF-8 on Windows.*/
- void win32_utf8_setup(int *_argc,const char ***_argv){
- HMODULE hlib;
- /*We need to set stdin/stdout to binary mode.
- This is unrelated to UTF-8 support, but it's platform specific and we need
- to do it in the same places.*/
- _setmode(_fileno(stdin),_O_BINARY);
- _setmode(_fileno(stdout),_O_BINARY);
- hlib=LoadLibraryA("shell32.dll");
- if(hlib!=NULL){
- command_line_to_argv_w_func command_line_to_argv_w;
- /*This function is only available on Windows 2000 or later.*/
- command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib,
- "CommandLineToArgvW");
- if(command_line_to_argv_w!=NULL){
- wchar_t **argvw;
- int argc;
- argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc);
- if(argvw!=NULL){
- int ai;
- /*Really, I don't see why argc would ever differ from *_argc, but let's
- be paranoid.*/
- if(argc>*_argc)argc=*_argc;
- for(ai=0;ai<argc;ai++){
- char *argv;
- argv=utf16_to_utf8(argvw[ai]);
- if(argv!=NULL)(*_argv)[ai]=argv;
- }
- *_argc=argc;
- LocalFree(argvw);
- }
- }
- FreeLibrary(hlib);
- }
- # if defined(CP_UTF8)
- /*This does not work correctly in all environments (it breaks output in
- mingw32 for me), and requires a Unicode font (e.g., when using the default
- Raster font, even characters that are available in the font's codepage
- won't display properly).*/
- /*SetConsoleOutputCP(CP_UTF8);*/
- # endif
- }
- #endif
|