| |||
| Home > The C and C++ Libraries > Tailoring locale and CTYPE using C macros > _get_lc_ctype() | |||
The CTYPE implementation is selected at
link time to be one of the following:
The C locale only. This is the default.
The ISO8859-1 (Latin-1) locale.
Shift-JIS.
UTF-8.
You can define your own CTYPE attribute
table with the following characteristics:
It must be read-only.
It is a byte array with indexes ranging from -1 to 255 inclusive (257 bytes in total)
Each byte is interpreted as eight attribute bits.
The values are defined in ctype.h as follows:
__Swhitespace characters
__Ppunctuation characters
__Bprintable space characters
__Llowercase letters
__Uuppercase letters
__Ndecimal digits
__Ccontrol characters
__Xhexadecimal-digit letters A-F and a-f
__Aalphabetic but neither uppercase nor lowercase, such as Japanese katakana.
A printable space character is defined as any character where
the result of both isprint() and isspace() is
true.
The first element in the array, the element located at -1,
must be zero. A skeletal implementation of the functions that return CTYPE data
is shown in Example 2.10.
There are also macros that define multibyte LC_CTYPE locales,
for example, LC_MBCTYPE_DEF. See the file rt_locale.h for
more information.
Example 2.10. LC_CTYPE_DEF Table
__LC_CTYPE_DEF(lcctype_c, "C")
{
__C, __C, __C, __C, __C, __C, __C, __C, __C, /* 0x00-0x08 */
__C+__S,__C+__S,__C+__S,__C+__S,__C+__S, /* 0x09-0x0D (BS,LF,VT,FF,CR) */
__C, __C, __C, __C, __C, __C, __C, __C, __C, /* 0x0E-0x16 */
__C, __C, __C, __C, __C, __C, __C, __C, __C, /* 0x17-0x1F */
__B+__S, /* space */
__P, __P, __P, __P, __P, __P, __P, __P, /* !"#$%&'( */
__P, __P, __P, __P, __P, __P, __P, /* )*+,-./ */
__N, __N, __N, __N, __N, __N, __N, __N, __N, __N, /* 0-9 */
__P, __P, __P, __P, __P, __P, __P, /* :;<=>?@ */
__U+__X, __U+__X, __U+__X, __U+__X, __U+__X, __U+__X, /* A-F */
__U, __U, __U, __U, __U, __U, __U, __U, __U, __U, /* G-P */
__U, __U, __U, __U, __U, __U, __U, __U, __U, __U, /* Q-Z */
__P, __P, __P, __P, __P, __P, /* [\]^_` */
__L+__X, __L+__X, __L+__X, __L+__X, __L+__X, __L+__X, /* a-f */
__L, __L, __L, __L, __L, __L, __L, __L, __L, __L, /* g-p */
__L, __L, __L, __L, __L, __L, __L, __L, __L, __L, /* q-z */
__P, __P, __P, __P, /* {|}~ */
__C, /* 0x7F */
/* the whole of the top half is illegal characters */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
__LC_CTYPE_DEF(lcctype_iso8859_1, "ISO8859-1")
{
__C, __C, __C, __C, __C, __C, __C, __C, __C, /* 0x00-0x08 */
__C+__S,__C+__S,__C+__S,__C+__S,__C+__S, /* 0x09-0x0D (BS,LF,VT,FF,CR) */
__C, __C, __C, __C, __C, __C, __C, __C, __C, /* 0x0E-0x16 */
__C, __C, __C, __C, __C, __C, __C, __C, __C, /* 0x17-0x1F */
__B+__S, /* space */
__P, __P, __P, __P, __P, __P, __P, __P, /* !"#$%&'( */
__P, __P, __P, __P, __P, __P, __P, /* )*+,-./ */
__N, __N, __N, __N, __N, __N, __N, __N, __N, __N, /* 0-9 */
__P, __P, __P, __P, __P, __P, __P, /* :;<=>?@ */
__U+__X, __U+__X, __U+__X, __U+__X, __U+__X, __U+__X, /* A-F */
__U, __U, __U, __U, __U, __U, __U, __U, __U, __U, /* G-P */
__U, __U, __U, __U, __U, __U, __U, __U, __U, __U, /* Q-Z */
__P, __P, __P, __P, __P, __P, /* [\]^_` */
__L+__X, __L+__X, __L+__X, __L+__X, __L+__X, __L+__X, /* a-f */
__L, __L, __L, __L, __L, __L, __L, __L, __L, __L, /* g-p */
__L, __L, __L, __L, __L, __L, __L, __L, __L, __L, /* q-z */
__P, __P, __P, __P, /* {|}~ */
__C, /* 0x7F */
/* ISO8859-1 top half:
* - 0x80-0x9f are control chars
* - 0xa0 is nonbreaking space (whitespace)
* - 0xa1-0xbf are punctuation chars
* - 0xc0-0xdf are uppercase chars except times sign at 0xd7
* - 0xe0-0xff are lowercase chars except divide sign at 0xf7 */
__C,__C,__C,__C,__C,__C,__C,__C, /* 0x80 - 0x87 */
__C,__C,__C,__C,__C,__C,__C,__C, /* 0x88 - 0x8f */
__C,__C,__C,__C,__C,__C,__C,__C, /* 0x90 - 0x97 */
__C,__C,__C,__C,__C,__C,__C,__C, /* 0x98 - 0x9f */
__B+__S,__P,__P,__P,__P,__P,__P,__P, /* 0xa0 - 0xa7 */
__P,__P,__P,__P,__P,__P,__P,__P, /* 0xa8 - 0xaf */
__P,__P,__P,__P,__P,__P,__P,__P, /* 0xb0 - 0xb7 */
__P,__P,__P,__P,__P,__P,__P,__P, /* 0xb8 - 0xbf */
__U,__U,__U,__U,__U,__U,__U,__U, /* 0xc0 - 0xc7 */
__U,__U,__U,__U,__U,__U,__U,__U, /* 0xc8 - 0xcf */
__U,__U,__U,__U,__U,__U,__U,__P, /* 0xd0 - 0xd7 */
__U,__U,__U,__U,__U,__U,__U,__U, /* 0xd8 - 0xdf */
__L,__L,__L,__L,__L,__L,__L,__L, /* 0xe0 - 0xe7 */
__L,__L,__L,__L,__L,__L,__L,__L, /* 0xe8 - 0xef */
__L,__L,__L,__L,__L,__L,__L,__P, /* 0xf0 - 0xf7 */
__L,__L,__L,__L,__L,__L,__L,__L, /* 0xf8 - 0xff */
};
_LC_INDEX_END(lcctype_dummy)
void const *_get_lc_ctype(void const *null, char const *name) {
return _findlocale(&lcctype_c_index, name);
}
void test_lc_ctype(void) {
EQS(setlocale(LC_CTYPE, NULL), "C"); /* verify starting point */
EQI(!!isalpha('@'), 0); /* test off-by-one */
EQI(!!isalpha('A'), 1);
EQI(!!isalpha('\xc1'), 0); /* C locale: isalpha(Aacute)==0 */
EQI(!setlocale(LC_CTYPE, "ISO8859-1"), 0);
EQS(setlocale(LC_CTYPE, NULL), "ISO8859-1");
EQI(!!isalpha('@'), 0); /* test off-by-one */
EQI(!!isalpha('A'), 1);
EQI(!!isalpha('\xc1'), 1); /* ISO8859 locale: isalpha(Aacute)!=0 */
EQI(!setlocale(LC_CTYPE, "C"), 0);
EQS(setlocale(LC_CTYPE, NULL), "C");
EQI(!!isalpha('@'), 0); /* test off-by-one */
EQI(!!isalpha('A'), 1);
EQI(!!isalpha('\xc1'), 0); /* C locale: isalpha(Aacute)==0 */
}