1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
/**
* @file
* @brief Conversions between Unicode and local charsets, string
* manipulation functions that act on character types.
**/
#ifndef UNICODE_H
#define UNICODE_H
int strwidth(const char *s);
int strwidth(const string &s);
string chop_string(const char *s, int width, bool spaces = true);
string chop_string(const string &s, int width, bool spaces = true);
int wctoutf8(char *d, ucs_t s);
int utf8towc(ucs_t *d, const char *s);
#ifdef TARGET_OS_WINDOWS
typedef wchar_t utf16_t;
wstring utf8_to_16(const char *s);
string utf16_to_8(const wchar_t *s);
static inline wstring utf8_to_16(const string &s)
{
return utf8_to_16(s.c_str());
}
static inline string utf16_to_8(const wstring &s)
{
return utf16_to_8(s.c_str());
}
#else
typedef uint16_t utf16_t;
#endif
string utf8_to_mb(const char *s);
string mb_to_utf8(const char *s);
static inline string utf8_to_mb(const string &s)
{
return utf8_to_mb(s.c_str());
}
static inline string mb_to_utf8(const string &s)
{
return mb_to_utf8(s.c_str());
}
int wclen(ucs_t c);
#ifndef UNIX
int wcwidth(ucs_t c);
#endif
char *prev_glyph(char *s, char *start);
char *next_glyph(char *s);
#define OUTS(x) utf8_to_mb(x).c_str()
#define OUTW(x) utf8_to_16(x).c_str()
class LineInput
{
public:
virtual ~LineInput() {}
virtual bool eof() = 0;
virtual bool error() { return false; };
virtual string get_line() = 0;
};
class FileLineInput : public LineInput
{
enum bom_type
{
BOM_NORMAL, // system locale
BOM_UTF8,
BOM_UTF16LE,
BOM_UTF16BE,
BOM_UTF32LE,
BOM_UTF32BE,
};
FILE *f;
bom_type bom;
bool seen_eof;
public:
FileLineInput(const char *name);
~FileLineInput();
bool eof() { return seen_eof || !f; };
bool error() { return !f; };
string get_line();
};
// The file is always UTF-8, no BOM.
// Just read it as-is, merely validating for a well-formed stream.
class UTF8FileLineInput : public LineInput
{
FILE *f;
bool seen_eof;
public:
UTF8FileLineInput(const char *name);
~UTF8FileLineInput();
bool eof() { return seen_eof || !f; };
bool error() { return !f; };
string get_line();
};
extern unsigned short charset_vt100[128];
extern unsigned short charset_cp437[256];
#endif
|