| |||
| Home > The C and C++ Libraries > Tailoring locale and CTYPE using C macros > _get_lc_collate() | |||
_get_lc_collate() must return a pointer
to the 0th entry in an array of unsigned bytes whose indexes range
from 0 to 255 inclusive (256 bytes total).
Each element gives the position in the collation sequence
of the character represented by the index of the element. For example,
if you want strcoll() to sort strings beginning with Z in
between those beginning with A and those beginning
with B, you can set up the LC_COLLATE table
so that array['A'] < array['Z'] and array['Z']
< array['B'].
_get_lc_collate() must return a pointer
to a collate structure. Use the macros in Example 2.11 to create the structure.
Example 2.11. LC_COLLATE_DEF Table
__LC_COLLATE_TRIVIAL_DEF(lccoll_c, "C")
__LC_COLLATE_DEF(lccoll_iso8859_1, "ISO8859-1")
{
/* Things preceding letters have normal ASCII ordering */
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, /* @ */ 0x41, /* A - then 7 A variants */
0x49, /* B */ 0x4a, /* C - then 1 C variant */
0x4c, /* D */ 0x4d, /* E - then 4 E variants */
0x52, /* F */ 0x53, /* G */
0x54, /* H */ 0x55, /* I - then 4 I variants */
0x5a, /* J */ 0x5b, /* K */
0x5c, /* L */ 0x5d, /* M */
0x5e, /* N - then 1 N variant */
0x60, /* O - then 6 O variants */
0x67, /* P */ 0x68, /* Q */
0x69, /* R */ 0x6a, /* S */
0x6b, /* T */ 0x6c, /* U - then 4 U variants */
0x71, /* V */ 0x72, /* W */
0x73, /* X */ 0x74, /* Y - then 1 Y variant */
0x76, /* Z - then capital Eth & Thorn */
0x79, /* [ */ 0x7a, /* \ */
0x7b, /* ] */ 0x7c, /* ^ */
0x7d, /* _ */ 0x7e, /* ` */
0x7f, /* a - then 7 a variants */
0x87, /* b */ 0x88, /* c - then 1 c variant */
0x8a, /* d */ 0x8b, /* e - then 4 e variants */
0x90, /* f */ 0x91, /* g */
0x92, /* h */ 0x93, /* i - then 4 i variants */
0x98, /* j */ 0x99, /* k */
0x9a, /* l */ 0x9b, /* m */
0x9c, /* n - then 1 n variant */
0x9e, /* o - then 6 o variants */
0xa5, /* p */ 0xa6, /* q */
0xa7, /* r */ 0xa8, /* s - then 1 s variant */
0xaa, /* t */ 0xab, /* u - then 4 u variants */
0xb0, /* v */ 0xb1, /* w */
0xb2, /* x */ 0xb3, /* y - then 2 y variants */
0xb6, /* z - then eth & thorn */
0xb9, /* { */ 0xba, /* | */
0xbb, /* } */ 0xbc, /* ~ */
0xbd, /* del */
/* top bit set control characters */
0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd,
0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5,
0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd,
/* other non_alpha */
0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5,
0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed,
0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5,
0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd,
0x42, /* A grave */ 0x43, /* A acute */
0x44, /* A circumflex */
0x45, /* A tilde */ 0x46, /* A umlaut */
0x47, /* A ring */ 0x48, /* AE */
0x4b, /* C cedilla */ 0x4e, /* E grave */
0x4f, /* E acute */ 0x50, /* E circumflex */
0x51, /* E umlaut */ 0x56, /* I grave */
0x57, /* I acute */ 0x58, /* I circumflex */
0x59, /* I umlaut */ 0x77, /* Eth */
0x5f, /* N tilde */ 0x61, /* O grave */
0x62, /* O acute */ 0x63, /* O circumflex */
0x64, /* O tilde */ 0x65, /* O umlaut */
0xfe, /* multiply */ 0x66, /* O with line */
0x6d, /* U grave */ 0x6e, /* U acute */
0x6f, /* U circumflex */ 0x70, /* U umlaut */
0x75, /* Y acute */ 0x78, /* Thorn */
0xa9, /* german sz */ 0x80, /* a grave */
0x81, /* a acute */ 0x82, /* a circumflex */
0x83, /* a tilde */ 0x84, /* a umlaut */
0x85, /* a ring */ 0x86, /* ae */
0x89, /* c cedilla */ 0x8c, /* e grave */
0x8d, /* e acute */ 0x8e, /* e circumflex */
0x8f, /* e umlaut */ 0x94, /* i grave */
0x95, /* i acute */ 0x96, /* i circumflex */
0x97, /* i umlaut */ 0xb7, /* eth */
0x9d, /* n tilde */ 0x9f, /* o grave */
0xa0, /* o acute */ 0xa1, /* o circumflex */
0xa2, /* o tilde */ 0xa3, /* o umlaut */
0xff, /* divide */ 0xa4, /* o with line */
0xac, /* u grave */ 0xad, /* u acute */
0xae, /* u circumflex */ 0xaf, /* u umlaut */
0xb4, /* y acute */ 0xb8, /* thorn */
0xb5 /* y umlaut */
};
__LC_INDEX_END(lccollate_dummy)
void const *_get_lc_collate(void const *null, char const *name) {
return _findlocale(&lccoll_c_index, name);
}
void test_lc_collate(void) {
char buf[5];
/* test both strxfrm and strcoll here*/
EQS(setlocale(LC_COLLATE, NULL), "C"); /* verify starting point */
EQS((strxfrm(buf, "\xEF", 4), buf), "\xEF");
EQI(strcoll("\xEF", "j") < 0, 0);
EQI(!setlocale(LC_COLLATE, "ISO8859-1"), 0);
EQS(setlocale(LC_COLLATE, NULL), "ISO8859-1");
EQS((strxfrm(buf, "\xEF", 4), buf), "\x97");
EQI(strcoll("\xEF", "j") < 0, 1);
EQI(!setlocale(LC_COLLATE, "C"), 0);
EQS(setlocale(LC_COLLATE, NULL), "C");
EQS((strxfrm(buf, "\xEF", 4), buf), "\xEF");
EQI(strcoll("\xEF", "j") < 0, 0);
}
The __LC_COLLATE_TRIVIAL_DEF macro defines
an array that has the element value equal to its index number. __LC_COLLATE_TRIVIAL_DEF(lccoll_c,
"C") is equivalent to the code in Example 2.12.
Example 2.12. LC_COLLATE_DEF
__LC_COLLATE_DEF(lccoll_c, "C")
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
...
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
};