utf.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00087
#ifndef __UTF_H__
00088
#define __UTF_H__
00089
00090
00091
00092
00093
00094
#include <stddef.h>
00095
#include "unicode/umachine.h"
00096
00097
00098
00099
#ifndef UTF_SIZE
00100
00101 # define UTF_SIZE 16
00102
#endif
00103
00105 #define U_SIZEOF_UCHAR (UTF_SIZE>>3)
00106
00111
#ifndef U_HAVE_WCHAR_H
00112
# define U_HAVE_WCHAR_H 1
00113
#endif
00114
00115
00116
#if U_SIZEOF_WCHAR_T==0
00117
# undef U_SIZEOF_WCHAR_T
00118 # define U_SIZEOF_WCHAR_T 4
00119
#endif
00120
00129
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
00130
# ifdef __STDC_ISO_10646__
00131
# if (U_SIZEOF_WCHAR_T==2)
00132
# define U_WCHAR_IS_UTF16
00133
# elif (U_SIZEOF_WCHAR_T==4)
00134
# define U_WCHAR_IS_UTF32
00135
# endif
00136
# elif defined __UCS2__
00137
# if (__OS390__ || __OS400__) && (U_SIZEOF_WCHAR_T==2)
00138
# define U_WCHAR_IS_UTF16
00139
# endif
00140
# elif defined __UCS4__
00141
# if (U_SIZEOF_WCHAR_T==4)
00142
# define U_WCHAR_IS_UTF32
00143
# endif
00144
# elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
00145
# define U_WCHAR_IS_UTF16
00146
# endif
00147
#endif
00148
00154
#if U_SIZEOF_WCHAR_T==4
00155 typedef wchar_t UChar32;
00156
#else
00157
typedef uint32_t
UChar32;
00158
#endif
00159
00167 typedef int32_t
UTextOffset;
00168
00169
00170
#if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE)
00171
00175 # define UTF_SAFE
00176
#endif
00177
00178
00179
00192 #define UTF8_ERROR_VALUE_1 0x15
00193
#define UTF8_ERROR_VALUE_2 0x9f
00194
00199 #define UTF_ERROR_VALUE 0xffff
00200
00201
00202
00204 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00205
00209 #define UTF_IS_UNICODE_NONCHAR(c) \
00210
(((((c) & 0xfffe) == 0xfffe) || ((c) >= 0xfdd0 && (c) <= 0xfdef)) && \
00211
((c) <= 0x10ffff))
00212
00226 #define UTF_IS_UNICODE_CHAR(c) \
00227
((uint32_t)(c)<0xd800 || \
00228
((uint32_t)(c)>0xdfff && \
00229
(uint32_t)(c)<=0x10ffff && \
00230
!UTF_IS_UNICODE_NONCHAR(c)))
00231
00236 #define UTF_IS_ERROR(c) \
00237
(((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00238
00240 #define UTF_IS_VALID(c) \
00241
(UTF_IS_UNICODE_CHAR(c) && \
00242
(c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00243
00244
00245
00246
#include "unicode/utf8.h"
00247
#include "unicode/utf16.h"
00248
#include "unicode/utf32.h"
00249
00250
00251
00258
#if UTF_SIZE==8
00259
00260
# error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16
00261
00262
00263
00264
00265
00266
# include <limits.h>
00267
00268
00269
# if CHAR_MAX>=255
00270
typedef char UChar;
00271
# else
00272
typedef uint8_t
UChar;
00273
# endif
00274
00275
#elif UTF_SIZE==16
00276
00277
00278
# if U_SIZEOF_WCHAR_T==2
00279
typedef wchar_t UChar;
00280
# else
00281 typedef uint16_t
UChar;
00282
# endif
00283
00285 # define UTF_IS_SINGLE(uchar) UTF16_IS_SINGLE(uchar)
00286
00287 # define UTF_IS_LEAD(uchar) UTF16_IS_LEAD(uchar)
00288
00289 # define UTF_IS_TRAIL(uchar) UTF16_IS_TRAIL(uchar)
00290
00292 # define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
00293
00294 # define UTF_CHAR_LENGTH(c) UTF16_CHAR_LENGTH(c)
00295
00296 # define UTF_MAX_CHAR_LENGTH UTF16_MAX_CHAR_LENGTH
00297
00298 # define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00299
00301 # define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
00302
00303 # define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00304
00306 # define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00307
00308 # define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00309
00311 # define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00312
00313 # define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00314
00316 # define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
00317
00318 # define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
00319
00321 # define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
00322
00323 # define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
00324
00326 # define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
00327
00328 # define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
00329
00331 # define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
00332
00333 # define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00334
00336 # define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
00337
00338 # define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
00339
00341 # define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
00342
00343 # define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
00344
00346 # define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00347
00348 # define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00349
00350
#elif UTF_SIZE==32
00351
00352
# error UTF-32 is not implemented, undefine UTF_SIZE or define it to 16
00353
00354
typedef UChar32 UChar;
00355
00356
#else
00357
# error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented
00358
#endif
00359
00360
00361
00469
#ifdef UTF_SAFE
00470
00471 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
00472
00473 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
00474 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
00475 # define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
00476 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
00477 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
00478
00479 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
00480 # define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
00481 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
00482 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00483
00484
#elif defined(UTF_STRICT)
00485
00486
# define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)
00487
00488
# define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
00489
# define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
00490
# define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
00491
# define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
00492
# define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
00493
00494
# define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
00495
# define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
00496
# define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
00497
# define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00498
00499
#else
00500
00501
# define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
00502
00503
# define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_UNSAFE(s, i, c)
00504
# define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_UNSAFE(s, i, c)
00505
# define UTF_FWD_1(s, i, length) UTF_FWD_1_UNSAFE(s, i)
00506
# define UTF_FWD_N(s, i, length, n) UTF_FWD_N_UNSAFE(s, i, n)
00507
# define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_UNSAFE(s, i)
00508
00509
# define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_UNSAFE(s, i, c)
00510
# define UTF_BACK_1(s, start, i) UTF_BACK_1_UNSAFE(s, i)
00511
# define UTF_BACK_N(s, start, i, n) UTF_BACK_N_UNSAFE(s, i, n)
00512
# define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
00513
00514
#endif
00515
00516
#endif
Generated on Wed Jul 28 05:45:13 2004 for ICU 2.1 by
1.3.7