Clone of PhatAC @ https://github.com/floaterxk/PhatAC

m_ctype.h 33KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807
  1. /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
  2. This program is free software; you can redistribute it and/or modify
  3. it under the terms of the GNU General Public License as published by
  4. the Free Software Foundation; version 2 of the License.
  5. This program is distributed in the hope that it will be useful,
  6. but WITHOUT ANY WARRANTY; without even the implied warranty of
  7. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  8. GNU General Public License for more details.
  9. You should have received a copy of the GNU General Public License
  10. along with this program; if not, write to the Free Software
  11. Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
  12. /*
  13. A better inplementation of the UNIX ctype(3) library.
  14. */
  15. #ifndef _m_ctype_h
  16. #define _m_ctype_h
  17. #include "my_global.h" /* uint16, uchar */
  18. #ifdef __cplusplus
  19. extern "C" {
  20. #endif
  21. #define MY_CS_NAME_SIZE 32
  22. #define MY_CS_CTYPE_TABLE_SIZE 257
  23. #define MY_CS_TO_LOWER_TABLE_SIZE 256
  24. #define MY_CS_TO_UPPER_TABLE_SIZE 256
  25. #define MY_CS_SORT_ORDER_TABLE_SIZE 256
  26. #define MY_CS_TO_UNI_TABLE_SIZE 256
  27. #define CHARSET_DIR "charsets/"
  28. #define my_wc_t ulong
  29. #define MY_CS_REPLACEMENT_CHARACTER 0xFFFD
  30. /*
  31. On i386 we store Unicode->CS conversion tables for
  32. some character sets using Big-endian order,
  33. to copy two bytes at onces.
  34. This gives some performance improvement.
  35. */
  36. #ifdef __i386__
  37. #define MB2(x) (((x) >> 8) + (((x) & 0xFF) << 8))
  38. #define MY_PUT_MB2(s, code) { *((uint16*)(s))= (code); }
  39. #else
  40. #define MB2(x) (x)
  41. #define MY_PUT_MB2(s, code) { (s)[0]= code >> 8; (s)[1]= code & 0xFF; }
  42. #endif
  43. typedef struct unicase_info_char_st
  44. {
  45. uint32 toupper;
  46. uint32 tolower;
  47. uint32 sort;
  48. } MY_UNICASE_CHARACTER;
  49. typedef struct unicase_info_st
  50. {
  51. my_wc_t maxchar;
  52. const MY_UNICASE_CHARACTER **page;
  53. } MY_UNICASE_INFO;
  54. extern MY_UNICASE_INFO my_unicase_default;
  55. extern MY_UNICASE_INFO my_unicase_turkish;
  56. extern MY_UNICASE_INFO my_unicase_mysql500;
  57. extern MY_UNICASE_INFO my_unicase_unicode520;
  58. #define MY_UCA_MAX_CONTRACTION 6
  59. #define MY_UCA_MAX_WEIGHT_SIZE 8
  60. #define MY_UCA_WEIGHT_LEVELS 1
  61. typedef struct my_contraction_t
  62. {
  63. my_wc_t ch[MY_UCA_MAX_CONTRACTION]; /* Character sequence */
  64. uint16 weight[MY_UCA_MAX_WEIGHT_SIZE];/* Its weight string, 0-terminated */
  65. my_bool with_context;
  66. } MY_CONTRACTION;
  67. typedef struct my_contraction_list_t
  68. {
  69. size_t nitems; /* Number of items in the list */
  70. MY_CONTRACTION *item; /* List of contractions */
  71. char *flags; /* Character flags, e.g. "is contraction head") */
  72. } MY_CONTRACTIONS;
  73. my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc);
  74. my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc);
  75. uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
  76. my_wc_t wc1, my_wc_t wc2);
  77. /* Collation weights on a single level (e.g. primary, secondary, tertiarty) */
  78. typedef struct my_uca_level_info_st
  79. {
  80. my_wc_t maxchar;
  81. uchar *lengths;
  82. uint16 **weights;
  83. MY_CONTRACTIONS contractions;
  84. } MY_UCA_WEIGHT_LEVEL;
  85. typedef struct uca_info_st
  86. {
  87. MY_UCA_WEIGHT_LEVEL level[MY_UCA_WEIGHT_LEVELS];
  88. /* Logical positions */
  89. my_wc_t first_non_ignorable;
  90. my_wc_t last_non_ignorable;
  91. my_wc_t first_primary_ignorable;
  92. my_wc_t last_primary_ignorable;
  93. my_wc_t first_secondary_ignorable;
  94. my_wc_t last_secondary_ignorable;
  95. my_wc_t first_tertiary_ignorable;
  96. my_wc_t last_tertiary_ignorable;
  97. my_wc_t first_trailing;
  98. my_wc_t last_trailing;
  99. my_wc_t first_variable;
  100. my_wc_t last_variable;
  101. } MY_UCA_INFO;
  102. extern MY_UCA_INFO my_uca_v400;
  103. typedef struct uni_ctype_st
  104. {
  105. uchar pctype;
  106. uchar *ctype;
  107. } MY_UNI_CTYPE;
  108. extern MY_UNI_CTYPE my_uni_ctype[256];
  109. /* wm_wc and wc_mb return codes */
  110. #define MY_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */
  111. #define MY_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */
  112. #define MY_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */
  113. #define MY_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */
  114. #define MY_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */
  115. /* These following three are currently not really used */
  116. #define MY_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */
  117. #define MY_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */
  118. #define MY_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */
  119. /* A helper macros for "need at least n bytes" */
  120. #define MY_CS_TOOSMALLN(n) (-100-(n))
  121. #define MY_SEQ_INTTAIL 1
  122. #define MY_SEQ_SPACES 2
  123. /* My charsets_list flags */
  124. #define MY_CS_COMPILED 1 /* compiled-in sets */
  125. #define MY_CS_CONFIG 2 /* sets that have a *.conf file */
  126. #define MY_CS_INDEX 4 /* sets listed in the Index file */
  127. #define MY_CS_LOADED 8 /* sets that are currently loaded */
  128. #define MY_CS_BINSORT 16 /* if binary sort order */
  129. #define MY_CS_PRIMARY 32 /* if primary collation */
  130. #define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
  131. #define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */
  132. #define MY_CS_READY 256 /* if a charset is initialized */
  133. #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
  134. #define MY_CS_CSSORT 1024 /* if case sensitive sort order */
  135. #define MY_CS_HIDDEN 2048 /* don't display in SHOW */
  136. #define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */
  137. #define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
  138. #define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
  139. #define MY_CS_LOWER_SORT 32768 /* If use lower case as weight */
  140. #define MY_CHARSET_UNDEFINED 0
  141. /* Character repertoire flags */
  142. #define MY_REPERTOIRE_ASCII 1 /* Pure ASCII U+0000..U+007F */
  143. #define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
  144. #define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */
  145. /* Flags for strxfrm */
  146. #define MY_STRXFRM_LEVEL1 0x00000001 /* for primary weights */
  147. #define MY_STRXFRM_LEVEL2 0x00000002 /* for secondary weights */
  148. #define MY_STRXFRM_LEVEL3 0x00000004 /* for tertiary weights */
  149. #define MY_STRXFRM_LEVEL4 0x00000008 /* fourth level weights */
  150. #define MY_STRXFRM_LEVEL5 0x00000010 /* fifth level weights */
  151. #define MY_STRXFRM_LEVEL6 0x00000020 /* sixth level weights */
  152. #define MY_STRXFRM_LEVEL_ALL 0x0000003F /* Bit OR for the above six */
  153. #define MY_STRXFRM_NLEVELS 6 /* Number of possible levels*/
  154. #define MY_STRXFRM_PAD_WITH_SPACE 0x00000040 /* if pad result with spaces */
  155. #define MY_STRXFRM_PAD_TO_MAXLEN 0x00000080 /* if pad tail(for filesort) */
  156. #define MY_STRXFRM_DESC_LEVEL1 0x00000100 /* if desc order for level1 */
  157. #define MY_STRXFRM_DESC_LEVEL2 0x00000200 /* if desc order for level2 */
  158. #define MY_STRXFRM_DESC_LEVEL3 0x00000300 /* if desc order for level3 */
  159. #define MY_STRXFRM_DESC_LEVEL4 0x00000800 /* if desc order for level4 */
  160. #define MY_STRXFRM_DESC_LEVEL5 0x00001000 /* if desc order for level5 */
  161. #define MY_STRXFRM_DESC_LEVEL6 0x00002000 /* if desc order for level6 */
  162. #define MY_STRXFRM_DESC_SHIFT 8
  163. #define MY_STRXFRM_UNUSED_00004000 0x00004000 /* for future extensions */
  164. #define MY_STRXFRM_UNUSED_00008000 0x00008000 /* for future extensions */
  165. #define MY_STRXFRM_REVERSE_LEVEL1 0x00010000 /* if reverse order for level1 */
  166. #define MY_STRXFRM_REVERSE_LEVEL2 0x00020000 /* if reverse order for level2 */
  167. #define MY_STRXFRM_REVERSE_LEVEL3 0x00040000 /* if reverse order for level3 */
  168. #define MY_STRXFRM_REVERSE_LEVEL4 0x00080000 /* if reverse order for level4 */
  169. #define MY_STRXFRM_REVERSE_LEVEL5 0x00100000 /* if reverse order for level5 */
  170. #define MY_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */
  171. #define MY_STRXFRM_REVERSE_SHIFT 16
  172. typedef struct my_uni_idx_st
  173. {
  174. uint16 from;
  175. uint16 to;
  176. const uchar *tab;
  177. } MY_UNI_IDX;
  178. typedef struct
  179. {
  180. uint beg;
  181. uint end;
  182. uint mb_len;
  183. } my_match_t;
  184. struct charset_info_st;
  185. typedef struct my_charset_loader_st
  186. {
  187. char error[128];
  188. void *(*once_alloc)(size_t);
  189. void *(*mem_malloc)(size_t);
  190. void *(*mem_realloc)(void *, size_t);
  191. void (*mem_free)(void *);
  192. void (*reporter)(enum loglevel, const char *format, ...);
  193. int (*add_collation)(struct charset_info_st *cs);
  194. } MY_CHARSET_LOADER;
  195. extern int (*my_string_stack_guard)(int);
  196. /* See strings/CHARSET_INFO.txt for information about this structure */
  197. typedef struct my_collation_handler_st
  198. {
  199. my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *);
  200. /* Collation routines */
  201. int (*strnncoll)(const struct charset_info_st *,
  202. const uchar *, size_t, const uchar *, size_t, my_bool);
  203. int (*strnncollsp)(const struct charset_info_st *,
  204. const uchar *, size_t, const uchar *, size_t,
  205. my_bool diff_if_only_endspace_difference);
  206. size_t (*strnxfrm)(const struct charset_info_st *,
  207. uchar *dst, size_t dstlen, uint nweights,
  208. const uchar *src, size_t srclen, uint flags);
  209. size_t (*strnxfrmlen)(const struct charset_info_st *, size_t);
  210. my_bool (*like_range)(const struct charset_info_st *,
  211. const char *s, size_t s_length,
  212. pchar w_prefix, pchar w_one, pchar w_many,
  213. size_t res_length,
  214. char *min_str, char *max_str,
  215. size_t *min_len, size_t *max_len);
  216. int (*wildcmp)(const struct charset_info_st *,
  217. const char *str,const char *str_end,
  218. const char *wildstr,const char *wildend,
  219. int escape,int w_one, int w_many);
  220. int (*strcasecmp)(const struct charset_info_st *, const char *,
  221. const char *);
  222. uint (*instr)(const struct charset_info_st *,
  223. const char *b, size_t b_length,
  224. const char *s, size_t s_length,
  225. my_match_t *match, uint nmatch);
  226. /* Hash calculation */
  227. void (*hash_sort)(const struct charset_info_st *cs, const uchar *key,
  228. size_t len, ulong *nr1, ulong *nr2);
  229. my_bool (*propagate)(const struct charset_info_st *cs, const uchar *str,
  230. size_t len);
  231. } MY_COLLATION_HANDLER;
  232. extern MY_COLLATION_HANDLER my_collation_mb_bin_handler;
  233. extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
  234. extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
  235. extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
  236. /* Some typedef to make it easy for C++ to make function pointers */
  237. typedef int (*my_charset_conv_mb_wc)(const struct charset_info_st *,
  238. my_wc_t *, const uchar *, const uchar *);
  239. typedef int (*my_charset_conv_wc_mb)(const struct charset_info_st *, my_wc_t,
  240. uchar *, uchar *);
  241. typedef size_t (*my_charset_conv_case)(const struct charset_info_st *,
  242. char *, size_t, char *, size_t);
  243. /* See strings/CHARSET_INFO.txt about information on this structure */
  244. typedef struct my_charset_handler_st
  245. {
  246. my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader);
  247. /* Multibyte routines */
  248. uint (*ismbchar)(const struct charset_info_st *, const char *,
  249. const char *);
  250. uint (*mbcharlen)(const struct charset_info_st *, uint c);
  251. size_t (*numchars)(const struct charset_info_st *, const char *b,
  252. const char *e);
  253. size_t (*charpos)(const struct charset_info_st *, const char *b,
  254. const char *e, size_t pos);
  255. size_t (*well_formed_len)(const struct charset_info_st *,
  256. const char *b,const char *e,
  257. size_t nchars, int *error);
  258. size_t (*lengthsp)(const struct charset_info_st *, const char *ptr,
  259. size_t length);
  260. size_t (*numcells)(const struct charset_info_st *, const char *b,
  261. const char *e);
  262. /* Unicode conversion */
  263. my_charset_conv_mb_wc mb_wc;
  264. my_charset_conv_wc_mb wc_mb;
  265. /* CTYPE scanner */
  266. int (*ctype)(const struct charset_info_st *cs, int *ctype,
  267. const uchar *s, const uchar *e);
  268. /* Functions for case and sort conversion */
  269. size_t (*caseup_str)(const struct charset_info_st *, char *);
  270. size_t (*casedn_str)(const struct charset_info_st *, char *);
  271. my_charset_conv_case caseup;
  272. my_charset_conv_case casedn;
  273. /* Charset dependant snprintf() */
  274. size_t (*snprintf)(const struct charset_info_st *, char *to, size_t n,
  275. const char *fmt,
  276. ...) MY_ATTRIBUTE((format(printf, 4, 5)));
  277. size_t (*long10_to_str)(const struct charset_info_st *, char *to, size_t n,
  278. int radix, long int val);
  279. size_t (*longlong10_to_str)(const struct charset_info_st *, char *to,
  280. size_t n, int radix, longlong val);
  281. void (*fill)(const struct charset_info_st *, char *to, size_t len,
  282. int fill);
  283. /* String-to-number conversion routines */
  284. long (*strntol)(const struct charset_info_st *, const char *s,
  285. size_t l, int base, char **e, int *err);
  286. ulong (*strntoul)(const struct charset_info_st *, const char *s,
  287. size_t l, int base, char **e, int *err);
  288. longlong (*strntoll)(const struct charset_info_st *, const char *s,
  289. size_t l, int base, char **e, int *err);
  290. ulonglong (*strntoull)(const struct charset_info_st *, const char *s,
  291. size_t l, int base, char **e, int *err);
  292. double (*strntod)(const struct charset_info_st *, char *s,
  293. size_t l, char **e, int *err);
  294. longlong (*strtoll10)(const struct charset_info_st *cs,
  295. const char *nptr, char **endptr, int *error);
  296. ulonglong (*strntoull10rnd)(const struct charset_info_st *cs,
  297. const char *str, size_t length,
  298. int unsigned_fl,
  299. char **endptr, int *error);
  300. size_t (*scan)(const struct charset_info_st *, const char *b,
  301. const char *e, int sq);
  302. } MY_CHARSET_HANDLER;
  303. extern MY_CHARSET_HANDLER my_charset_8bit_handler;
  304. extern MY_CHARSET_HANDLER my_charset_ascii_handler;
  305. extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
  306. /*
  307. We define this CHARSET_INFO_DEFINED here to prevent a repeat of the
  308. typedef in hash.c, which will cause a compiler error.
  309. */
  310. #define CHARSET_INFO_DEFINED
  311. /* See strings/CHARSET_INFO.txt about information on this structure */
  312. typedef struct charset_info_st
  313. {
  314. uint number;
  315. uint primary_number;
  316. uint binary_number;
  317. uint state;
  318. const char *csname;
  319. const char *name;
  320. const char *comment;
  321. const char *tailoring;
  322. const uchar *ctype;
  323. const uchar *to_lower;
  324. const uchar *to_upper;
  325. const uchar *sort_order;
  326. MY_UCA_INFO *uca; /* This can be changed in apply_one_rule() */
  327. const uint16 *tab_to_uni;
  328. const MY_UNI_IDX *tab_from_uni;
  329. const MY_UNICASE_INFO *caseinfo;
  330. const struct lex_state_maps_st *state_maps; /* parser internal data */
  331. const uchar *ident_map; /* parser internal data */
  332. uint strxfrm_multiply;
  333. uchar caseup_multiply;
  334. uchar casedn_multiply;
  335. uint mbminlen;
  336. uint mbmaxlen;
  337. uint mbmaxlenlen;
  338. my_wc_t min_sort_char;
  339. my_wc_t max_sort_char; /* For LIKE optimization */
  340. uchar pad_char;
  341. my_bool escape_with_backslash_is_dangerous;
  342. uchar levels_for_compare;
  343. uchar levels_for_order;
  344. MY_CHARSET_HANDLER *cset;
  345. MY_COLLATION_HANDLER *coll;
  346. } CHARSET_INFO;
  347. #define ILLEGAL_CHARSET_INFO_NUMBER (~0U)
  348. extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin;
  349. extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
  350. extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
  351. extern CHARSET_INFO my_charset_big5_chinese_ci;
  352. extern CHARSET_INFO my_charset_big5_bin;
  353. extern CHARSET_INFO my_charset_cp932_japanese_ci;
  354. extern CHARSET_INFO my_charset_cp932_bin;
  355. extern CHARSET_INFO my_charset_cp1250_czech_ci;
  356. extern CHARSET_INFO my_charset_eucjpms_japanese_ci;
  357. extern CHARSET_INFO my_charset_eucjpms_bin;
  358. extern CHARSET_INFO my_charset_euckr_korean_ci;
  359. extern CHARSET_INFO my_charset_euckr_bin;
  360. extern CHARSET_INFO my_charset_gb2312_chinese_ci;
  361. extern CHARSET_INFO my_charset_gb2312_bin;
  362. extern CHARSET_INFO my_charset_gbk_chinese_ci;
  363. extern CHARSET_INFO my_charset_gbk_bin;
  364. extern CHARSET_INFO my_charset_gb18030_chinese_ci;
  365. extern CHARSET_INFO my_charset_gb18030_bin;
  366. extern CHARSET_INFO my_charset_latin1_german2_ci;
  367. extern CHARSET_INFO my_charset_latin1_bin;
  368. extern CHARSET_INFO my_charset_latin2_czech_ci;
  369. extern CHARSET_INFO my_charset_sjis_japanese_ci;
  370. extern CHARSET_INFO my_charset_sjis_bin;
  371. extern CHARSET_INFO my_charset_tis620_thai_ci;
  372. extern CHARSET_INFO my_charset_tis620_bin;
  373. extern CHARSET_INFO my_charset_ucs2_general_ci;
  374. extern CHARSET_INFO my_charset_ucs2_bin;
  375. extern CHARSET_INFO my_charset_ucs2_unicode_ci;
  376. extern CHARSET_INFO my_charset_ucs2_general_mysql500_ci;
  377. extern CHARSET_INFO my_charset_ujis_japanese_ci;
  378. extern CHARSET_INFO my_charset_ujis_bin;
  379. extern CHARSET_INFO my_charset_utf16_bin;
  380. extern CHARSET_INFO my_charset_utf16_general_ci;
  381. extern CHARSET_INFO my_charset_utf16_unicode_ci;
  382. extern CHARSET_INFO my_charset_utf16le_bin;
  383. extern CHARSET_INFO my_charset_utf16le_general_ci;
  384. extern CHARSET_INFO my_charset_utf32_bin;
  385. extern CHARSET_INFO my_charset_utf32_general_ci;
  386. extern CHARSET_INFO my_charset_utf32_unicode_ci;
  387. extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_utf8_general_ci;
  388. extern CHARSET_INFO my_charset_utf8_tolower_ci;
  389. extern CHARSET_INFO my_charset_utf8_unicode_ci;
  390. extern CHARSET_INFO my_charset_utf8_bin;
  391. extern CHARSET_INFO my_charset_utf8_general_mysql500_ci;
  392. extern CHARSET_INFO my_charset_utf8mb4_bin;
  393. extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_utf8mb4_general_ci;
  394. extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
  395. #define MY_UTF8MB3 "utf8"
  396. #define MY_UTF8MB4 "utf8mb4"
  397. /* declarations for simple charsets */
  398. extern size_t my_strnxfrm_simple(const CHARSET_INFO *,
  399. uchar *dst, size_t dstlen, uint nweights,
  400. const uchar *src, size_t srclen, uint flags);
  401. size_t my_strnxfrmlen_simple(const CHARSET_INFO *, size_t);
  402. extern int my_strnncoll_simple(const CHARSET_INFO *, const uchar *, size_t,
  403. const uchar *, size_t, my_bool);
  404. extern int my_strnncollsp_simple(const CHARSET_INFO *, const uchar *, size_t,
  405. const uchar *, size_t,
  406. my_bool diff_if_only_endspace_difference);
  407. extern void my_hash_sort_simple(const CHARSET_INFO *cs,
  408. const uchar *key, size_t len,
  409. ulong *nr1, ulong *nr2);
  410. extern size_t my_lengthsp_8bit(const CHARSET_INFO *cs, const char *ptr,
  411. size_t length);
  412. extern uint my_instr_simple(const struct charset_info_st *,
  413. const char *b, size_t b_length,
  414. const char *s, size_t s_length,
  415. my_match_t *match, uint nmatch);
  416. /* Functions for 8bit */
  417. extern size_t my_caseup_str_8bit(const CHARSET_INFO *, char *);
  418. extern size_t my_casedn_str_8bit(const CHARSET_INFO *, char *);
  419. extern size_t my_caseup_8bit(const CHARSET_INFO *, char *src, size_t srclen,
  420. char *dst, size_t dstlen);
  421. extern size_t my_casedn_8bit(const CHARSET_INFO *, char *src, size_t srclen,
  422. char *dst, size_t dstlen);
  423. extern int my_strcasecmp_8bit(const CHARSET_INFO * cs, const char *,
  424. const char *);
  425. int my_mb_wc_8bit(const CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,
  426. const uchar *e);
  427. int my_wc_mb_8bit(const CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
  428. int my_mb_ctype_8bit(const CHARSET_INFO *,int *, const uchar *,const uchar *);
  429. int my_mb_ctype_mb(const CHARSET_INFO *,int *, const uchar *,const uchar *);
  430. size_t my_scan_8bit(const CHARSET_INFO *cs, const char *b, const char *e,
  431. int sq);
  432. size_t my_snprintf_8bit(const struct charset_info_st *, char *to, size_t n,
  433. const char *fmt, ...)
  434. MY_ATTRIBUTE((format(printf, 4, 5)));
  435. long my_strntol_8bit(const CHARSET_INFO *, const char *s, size_t l,
  436. int base, char **e, int *err);
  437. ulong my_strntoul_8bit(const CHARSET_INFO *, const char *s, size_t l,
  438. int base, char **e, int *err);
  439. longlong my_strntoll_8bit(const CHARSET_INFO *, const char *s, size_t l,
  440. int base, char **e, int *err);
  441. ulonglong my_strntoull_8bit(const CHARSET_INFO *, const char *s, size_t l,
  442. int base, char **e, int *err);
  443. double my_strntod_8bit(const CHARSET_INFO *, char *s, size_t l, char **e,
  444. int *err);
  445. size_t my_long10_to_str_8bit(const CHARSET_INFO *, char *to, size_t l,
  446. int radix, long int val);
  447. size_t my_longlong10_to_str_8bit(const CHARSET_INFO *, char *to, size_t l,
  448. int radix, longlong val);
  449. longlong my_strtoll10_8bit(const CHARSET_INFO *cs,
  450. const char *nptr, char **endptr, int *error);
  451. longlong my_strtoll10_ucs2(const CHARSET_INFO *cs,
  452. const char *nptr, char **endptr, int *error);
  453. ulonglong my_strntoull10rnd_8bit(const CHARSET_INFO *cs,
  454. const char *str, size_t length, int
  455. unsigned_fl, char **endptr, int *error);
  456. ulonglong my_strntoull10rnd_ucs2(const CHARSET_INFO *cs,
  457. const char *str, size_t length,
  458. int unsigned_fl, char **endptr, int *error);
  459. void my_fill_8bit(const CHARSET_INFO *cs, char* to, size_t l, int fill);
  460. /* For 8-bit character set */
  461. my_bool my_like_range_simple(const CHARSET_INFO *cs,
  462. const char *ptr, size_t ptr_length,
  463. pbool escape, pbool w_one, pbool w_many,
  464. size_t res_length,
  465. char *min_str, char *max_str,
  466. size_t *min_length, size_t *max_length);
  467. /* For ASCII-based multi-byte character sets with mbminlen=1 */
  468. my_bool my_like_range_mb(const CHARSET_INFO *cs,
  469. const char *ptr, size_t ptr_length,
  470. pbool escape, pbool w_one, pbool w_many,
  471. size_t res_length,
  472. char *min_str, char *max_str,
  473. size_t *min_length, size_t *max_length);
  474. /* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */
  475. my_bool my_like_range_generic(const CHARSET_INFO *cs,
  476. const char *ptr, size_t ptr_length,
  477. pbool escape, pbool w_one, pbool w_many,
  478. size_t res_length,
  479. char *min_str, char *max_str,
  480. size_t *min_length, size_t *max_length);
  481. int my_wildcmp_8bit(const CHARSET_INFO *,
  482. const char *str,const char *str_end,
  483. const char *wildstr,const char *wildend,
  484. int escape, int w_one, int w_many);
  485. int my_wildcmp_bin(const CHARSET_INFO *,
  486. const char *str,const char *str_end,
  487. const char *wildstr,const char *wildend,
  488. int escape, int w_one, int w_many);
  489. size_t my_numchars_8bit(const CHARSET_INFO *, const char *b, const char *e);
  490. size_t my_numcells_8bit(const CHARSET_INFO *, const char *b, const char *e);
  491. size_t my_charpos_8bit(const CHARSET_INFO *, const char *b, const char *e,
  492. size_t pos);
  493. size_t my_well_formed_len_8bit(const CHARSET_INFO *, const char *b,
  494. const char *e, size_t pos, int *error);
  495. uint my_mbcharlen_8bit(const CHARSET_INFO *, uint c);
  496. /* Functions for multibyte charsets */
  497. extern size_t my_caseup_str_mb(const CHARSET_INFO *, char *);
  498. extern size_t my_casedn_str_mb(const CHARSET_INFO *, char *);
  499. extern size_t my_caseup_mb(const CHARSET_INFO *, char *src, size_t srclen,
  500. char *dst, size_t dstlen);
  501. extern size_t my_casedn_mb(const CHARSET_INFO *, char *src, size_t srclen,
  502. char *dst, size_t dstlen);
  503. extern size_t my_caseup_mb_varlen(const CHARSET_INFO *, char *src,
  504. size_t srclen, char *dst, size_t dstlen);
  505. extern size_t my_casedn_mb_varlen(const CHARSET_INFO *, char *src,
  506. size_t srclen, char *dst, size_t dstlen);
  507. extern size_t my_caseup_ujis(const CHARSET_INFO *, char *src, size_t srclen,
  508. char *dst, size_t dstlen);
  509. extern size_t my_casedn_ujis(const CHARSET_INFO *, char *src, size_t srclen,
  510. char *dst, size_t dstlen);
  511. extern int my_strcasecmp_mb(const CHARSET_INFO * cs,const char *,
  512. const char *);
  513. int my_wildcmp_mb(const CHARSET_INFO *,
  514. const char *str,const char *str_end,
  515. const char *wildstr,const char *wildend,
  516. int escape, int w_one, int w_many);
  517. size_t my_numchars_mb(const CHARSET_INFO *, const char *b, const char *e);
  518. size_t my_numcells_mb(const CHARSET_INFO *, const char *b, const char *e);
  519. size_t my_charpos_mb(const CHARSET_INFO *, const char *b, const char *e,
  520. size_t pos);
  521. size_t my_well_formed_len_mb(const CHARSET_INFO *, const char *b,
  522. const char *e, size_t pos, int *error);
  523. uint my_instr_mb(const struct charset_info_st *,
  524. const char *b, size_t b_length,
  525. const char *s, size_t s_length,
  526. my_match_t *match, uint nmatch);
  527. int my_strnncoll_mb_bin(const CHARSET_INFO * cs,
  528. const uchar *s, size_t slen,
  529. const uchar *t, size_t tlen,
  530. my_bool t_is_prefix);
  531. int my_strnncollsp_mb_bin(const CHARSET_INFO *cs,
  532. const uchar *a, size_t a_length,
  533. const uchar *b, size_t b_length,
  534. my_bool diff_if_only_endspace_difference);
  535. int my_wildcmp_mb_bin(const CHARSET_INFO *cs,
  536. const char *str,const char *str_end,
  537. const char *wildstr,const char *wildend,
  538. int escape, int w_one, int w_many);
  539. int my_strcasecmp_mb_bin(const CHARSET_INFO * cs MY_ATTRIBUTE((unused)),
  540. const char *s, const char *t);
  541. void my_hash_sort_mb_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
  542. const uchar *key, size_t len,ulong *nr1, ulong *nr2);
  543. size_t my_strnxfrm_mb(const CHARSET_INFO *,
  544. uchar *dst, size_t dstlen, uint nweights,
  545. const uchar *src, size_t srclen, uint flags);
  546. size_t my_strnxfrm_unicode(const CHARSET_INFO *,
  547. uchar *dst, size_t dstlen, uint nweights,
  548. const uchar *src, size_t srclen, uint flags);
  549. size_t my_strnxfrm_unicode_full_bin(const CHARSET_INFO *,
  550. uchar *dst, size_t dstlen, uint nweights,
  551. const uchar *src, size_t srclen, uint flags);
  552. size_t my_strnxfrmlen_unicode_full_bin(const CHARSET_INFO *, size_t);
  553. int my_wildcmp_unicode(const CHARSET_INFO *cs,
  554. const char *str, const char *str_end,
  555. const char *wildstr, const char *wildend,
  556. int escape, int w_one, int w_many,
  557. const MY_UNICASE_INFO *weights);
  558. extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader,
  559. const char *buf, size_t buflen);
  560. extern char *my_strchr(const CHARSET_INFO *cs, const char *str,
  561. const char *end, pchar c);
  562. extern size_t my_strcspn(const CHARSET_INFO *cs, const char *str,
  563. const char *end, const char *reject,
  564. size_t reject_length);
  565. my_bool my_propagate_simple(const CHARSET_INFO *cs, const uchar *str,
  566. size_t len);
  567. my_bool my_propagate_complex(const CHARSET_INFO *cs, const uchar *str,
  568. size_t len);
  569. uint my_string_repertoire(const CHARSET_INFO *cs, const char *str, size_t len);
  570. my_bool my_charset_is_ascii_based(const CHARSET_INFO *cs);
  571. my_bool my_charset_is_8bit_pure_ascii(const CHARSET_INFO *cs);
  572. uint my_charset_repertoire(const CHARSET_INFO *cs);
  573. uint my_strxfrm_flag_normalize(uint flags, uint nlevels);
  574. void my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
  575. uint flags, uint level);
  576. size_t my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO *cs,
  577. uchar *str, uchar *frmend, uchar *strend,
  578. uint nweights, uint flags, uint level);
  579. my_bool my_charset_is_ascii_compatible(const CHARSET_INFO *cs);
  580. const MY_CONTRACTIONS *my_charset_get_contractions(const CHARSET_INFO *cs,
  581. int level);
  582. extern size_t my_vsnprintf_ex(const CHARSET_INFO *cs, char *to, size_t n,
  583. const char* fmt, va_list ap);
  584. size_t my_convert(char *to, size_t to_length, const CHARSET_INFO *to_cs,
  585. const char *from, size_t from_length,
  586. const CHARSET_INFO *from_cs, uint *errors);
  587. uint my_mbcharlen_ptr(const CHARSET_INFO *cs, const char *s, const char *e);
  588. #define _MY_U 01 /* Upper case */
  589. #define _MY_L 02 /* Lower case */
  590. #define _MY_NMR 04 /* Numeral (digit) */
  591. #define _MY_SPC 010 /* Spacing character */
  592. #define _MY_PNT 020 /* Punctuation */
  593. #define _MY_CTR 040 /* Control character */
  594. #define _MY_B 0100 /* Blank */
  595. #define _MY_X 0200 /* heXadecimal digit */
  596. #define my_isascii(c) (!((c) & ~0177))
  597. #define my_toascii(c) ((c) & 0177)
  598. #define my_tocntrl(c) ((c) & 31)
  599. #define my_toprint(c) ((c) | 64)
  600. #define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)])
  601. #define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)])
  602. #define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L))
  603. #define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_U)
  604. #define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_L)
  605. #define my_isdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_NMR)
  606. #define my_isxdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_X)
  607. #define my_isalnum(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR))
  608. #define my_isspace(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_SPC)
  609. #define my_ispunct(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_PNT)
  610. #define my_isprint(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B))
  611. #define my_isgraph(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR))
  612. #define my_iscntrl(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_CTR)
  613. /* Some macros that should be cleaned up a little */
  614. #define my_isvar(s,c) (my_isalnum(s,c) || (c) == '_')
  615. #define my_isvar_start(s,c) (my_isalpha(s,c) || (c) == '_')
  616. #define my_binary_compare(s) ((s)->state & MY_CS_BINSORT)
  617. #define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM)
  618. #define my_strnxfrm(cs, d, dl, s, sl) \
  619. ((cs)->coll->strnxfrm((cs), (d), (dl), (dl), (s), (sl), MY_STRXFRM_PAD_WITH_SPACE))
  620. #define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
  621. #define my_like_range(s, a, b, c, d, e, f, g, h, i, j) \
  622. ((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j)))
  623. #define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m)))
  624. #define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
  625. #define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))
  626. #define use_mb(s) ((s)->cset->ismbchar != NULL)
  627. #define my_ismbchar(s, a, b) ((s)->cset->ismbchar((s), (a), (b)))
  628. #define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
  629. /**
  630. Get the length of gb18030 code by the given two leading bytes
  631. @param[in] s charset_info
  632. @param[in] a first byte of gb18030 code
  633. @param[in] b second byte of gb18030 code
  634. @return the length of gb18030 code starting with given two bytes,
  635. the length would be 2 or 4 for valid gb18030 code,
  636. or 0 for invalid gb18030 code
  637. */
  638. #define my_mbcharlen_2(s, a, b) ((s)->cset->mbcharlen((s),((((a) & 0xFF) << 8) + ((b) & 0xFF))))
  639. /**
  640. Get the maximum length of leading bytes needed to determine the length of a
  641. multi-byte gb18030 code
  642. @param[in] s charset_info
  643. @return number of leading bytes we need, would be 2 for gb18030
  644. and 1 for all other charsets
  645. */
  646. #define my_mbmaxlenlen(s) ((s)->mbmaxlenlen)
  647. /**
  648. Judge if the given byte is a possible leading byte for a charset.
  649. For gb18030 whose mbmaxlenlen is 2, we can't determine the length of
  650. a multi-byte character by looking at the first byte only
  651. @param[in] s charset_info
  652. @param[in] i possible leading byte
  653. @return true if it is, otherwise false
  654. */
  655. #define my_ismb1st(s, i) \
  656. (my_mbcharlen((s), (i)) > 1 || \
  657. (my_mbmaxlenlen((s)) == 2 && my_mbcharlen((s), (i)) == 0))
  658. #define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a)))
  659. #define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a)))
  660. #define my_strntol(s, a, b, c, d, e) ((s)->cset->strntol((s),(a),(b),(c),(d),(e)))
  661. #define my_strntoul(s, a, b, c, d, e) ((s)->cset->strntoul((s),(a),(b),(c),(d),(e)))
  662. #define my_strntoll(s, a, b, c, d, e) ((s)->cset->strntoll((s),(a),(b),(c),(d),(e)))
  663. #define my_strntoull(s, a, b, c,d, e) ((s)->cset->strntoull((s),(a),(b),(c),(d),(e)))
  664. #define my_strntod(s, a, b, c, d) ((s)->cset->strntod((s),(a),(b),(c),(d)))
  665. #ifdef __cplusplus
  666. }
  667. #endif
  668. #endif /* _m_ctype_h */