4.6.5. _get_lc_collate()

_get_lc_collate() must return a pointer to the 0th entry in an array of unsigned bytes whose indexes range from 0 to 255 inclusive (256 bytes total).

Each element gives the position in the collation sequence of the character represented by the index of the element. For example, if you want strcoll() to sort strings beginning with Z in between those beginning with A and those beginning with B, you can set up the LC_COLLATE table so that array['A'] < array['Z'] and array['Z'] < array['B'].

_get_lc_collate() must return a pointer to a collate structure. Use the macros in Example 4.4 to create the structure.

Example 4.4. LC_COLLATE_DEF Table

__LC_COLLATE_TRIVIAL_DEF(lccoll_c, "C")	
__LC_COLLATE_DEF(lccoll_iso8859_1, "ISO8859-1")	
{
    /* Things preceding letters have normal ASCII ordering */	
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,	
    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,	
    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,	
    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,	
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,	
    0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,	
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,	
    0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,	
    0x40,  /* @ */    0x41,  /* A - then 7 A variants */	
    0x49,  /* B */    0x4a,  /* C - then 1 C variant */	
    0x4c,  /* D */    0x4d,  /* E - then 4 E variants */	
    0x52,  /* F */    0x53,  /* G */	
    0x54,  /* H */    0x55,  /* I - then 4 I variants */	
    0x5a,  /* J */    0x5b,  /* K */	
    0x5c,  /* L */    0x5d,  /* M */	
    0x5e,  /* N - then 1 N variant */	
    0x60,  /* O - then 6 O variants */	
    0x67,  /* P */    0x68,  /* Q */	
    0x69,  /* R */    0x6a,  /* S */	
    0x6b,  /* T */    0x6c,  /* U - then 4 U variants */	
    0x71,  /* V */    0x72,  /* W */	
    0x73,  /* X */    0x74,  /* Y - then 1 Y variant */	
    0x76,  /* Z - then capital Eth & Thorn */	
    0x79,  /* [ */    0x7a,  /* \ */	
    0x7b,  /* ] */    0x7c,  /* ^ */	
    0x7d,  /* _ */    0x7e,  /* ` */	
    0x7f,  /* a - then 7 a variants */	
    0x87,  /* b */    0x88,  /* c - then 1 c variant */	
    0x8a,  /* d */    0x8b,  /* e - then 4 e variants */	
    0x90,  /* f */    0x91,  /* g */	
    0x92,  /* h */    0x93,  /* i - then 4 i variants */	
    0x98,  /* j */    0x99,  /* k */	
    0x9a,  /* l */    0x9b,  /* m */	
    0x9c,  /* n - then 1 n variant */	
    0x9e,  /* o - then 6 o variants */	
    0xa5,  /* p */    0xa6,  /* q */	
    0xa7,  /* r */    0xa8,  /* s - then 1 s variant */	
    0xaa,  /* t */    0xab,  /* u - then 4 u variants */	
    0xb0,  /* v */    0xb1,  /* w */	
    0xb2,  /* x */    0xb3,  /* y - then 2 y variants */	
    0xb6,  /* z - then eth & thorn */	
    0xb9,  /* { */    0xba,  /* | */	
    0xbb,  /* } */    0xbc,  /* ~ */	
    0xbd,  /* del */	
    /* top bit set control characters */ 	
    0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,	
    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd,	
    0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5,	
    0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd,	
    /* other non_alpha */
    0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5,	
    0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed,	
    0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5,	
    0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd,	
    0x42,  /* A grave */    0x43,  /* A acute */	
    0x44,  /* A circumflex */	
    0x45,  /* A tilde */    0x46,  /* A umlaut */	
    0x47,  /* A ring */     0x48,  /* AE */	
    0x4b,  /* C cedilla */  0x4e,  /* E grave */	
    0x4f,  /* E acute */    0x50,  /* E circumflex */	
    0x51,  /* E umlaut */   0x56,  /* I grave */	
    0x57,  /* I acute */    0x58,  /* I circumflex */	
    0x59,  /* I umlaut */   0x77,  /* Eth */	
    0x5f,  /* N tilde */    0x61,  /* O grave */	
    0x62,  /* O acute */    0x63,  /* O circumflex */	
    0x64,  /* O tilde */    0x65,  /* O umlaut */	
    0xfe,  /* multiply */   0x66,  /* O with line */	
    0x6d,  /* U grave */    0x6e,  /* U acute */	
    0x6f,  /* U circumflex */  0x70,  /* U umlaut */	
    0x75,  /* Y acute */    0x78,  /* Thorn */	
    0xa9,  /* german sz */  0x80,  /* a grave */	
    0x81,  /* a acute */    0x82,  /* a circumflex */		
    0x83,  /* a tilde */    0x84,  /* a umlaut */	
    0x85,  /* a ring */     0x86,  /* ae */	
    0x89,  /* c cedilla */  0x8c,  /* e grave */	
    0x8d,  /* e acute */    0x8e,  /* e circumflex */	
    0x8f,  /* e umlaut */   0x94,  /* i grave */	
    0x95,  /* i acute */    0x96,  /* i circumflex */	
    0x97,  /* i umlaut */   0xb7,  /* eth */	
    0x9d,  /* n tilde */    0x9f,  /* o grave */	
    0xa0,  /* o acute */    0xa1,  /* o circumflex */		
    0xa2,  /* o tilde */    0xa3,  /* o umlaut */	
    0xff,  /* divide  */    0xa4,  /* o with line */	
    0xac,  /* u grave */    0xad,  /* u acute */	
    0xae,  /* u circumflex */ 0xaf,  /* u umlaut */	
    0xb4,  /* y acute */    0xb8,  /* thorn */	
    0xb5   /* y umlaut */	
};
__LC_INDEX_END(lccollate_dummy)
void const *_get_lc_collate(void const *null, char const *name) {
    return _findlocale(&lccoll_c_index, name);
}
void test_lc_collate(void) {
    char buf[5];
    /* test both strxfrm and strcoll here*/	
    EQS(setlocale(LC_COLLATE, NULL), "C");         /* verify starting point */	
    EQS((strxfrm(buf, "\xEF", 4), buf), "\xEF");	
    EQI(strcoll("\xEF", "j") < 0, 0);	
    EQI(!setlocale(LC_COLLATE, "ISO8859-1"), 0);   /* setlocale should work */	
    EQS(setlocale(LC_COLLATE, NULL), "ISO8859-1");	
    EQS((strxfrm(buf, "\xEF", 4), buf), "\x97");	
    EQI(strcoll("\xEF", "j") < 0, 1);	
    EQI(!setlocale(LC_COLLATE, "C"), 0);           /* setlocale should work */	
    EQS(setlocale(LC_COLLATE, NULL), "C");	
    EQS((strxfrm(buf, "\xEF", 4), buf), "\xEF");	
    EQI(strcoll("\xEF", "j") < 0, 0);	
}

The __LC_COLLATE_TRIVIAL_DEF macro defines an array that has the element value equal to its index number. __LC_COLLATE_TRIVIAL_DEF(lccoll_c, "C") is equivalent to the code in Example 4.5.

Example 4.5. LC_COLLATE_DEF

__LC_COLLATE_DEF(lccoll_c, "C")	
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 	
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,	
...
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,	
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff	
};
Copyright © 1999-2001 ARM Limited. All rights reserved.ARM DUI 0067D
Non-Confidential