golden hour
/opt/alt/libicu65/usr/include/unicode
⬆️ Go Up
Upload
File/Folder
Size
Actions
alphaindex.h
26.48 KB
Del
OK
appendable.h
8.49 KB
Del
OK
basictz.h
9.15 KB
Del
OK
brkiter.h
27.8 KB
Del
OK
bytestream.h
9.6 KB
Del
OK
bytestrie.h
20.77 KB
Del
OK
bytestriebuilder.h
7.08 KB
Del
OK
calendar.h
105.74 KB
Del
OK
caniter.h
7.43 KB
Del
OK
casemap.h
25.33 KB
Del
OK
char16ptr.h
7.22 KB
Del
OK
chariter.h
24.05 KB
Del
OK
choicfmt.h
23.91 KB
Del
OK
coleitr.h
13.76 KB
Del
OK
coll.h
56.23 KB
Del
OK
compactdecimalformat.h
6.88 KB
Del
OK
curramt.h
3.76 KB
Del
OK
currpinf.h
7.3 KB
Del
OK
currunit.h
4.05 KB
Del
OK
datefmt.h
40.67 KB
Del
OK
dbbi.h
1.19 KB
Del
OK
dcfmtsym.h
20.13 KB
Del
OK
decimfmt.h
87.38 KB
Del
OK
docmain.h
6.97 KB
Del
OK
dtfmtsym.h
37.7 KB
Del
OK
dtintrv.h
3.84 KB
Del
OK
dtitvfmt.h
46.63 KB
Del
OK
dtitvinf.h
18.51 KB
Del
OK
dtptngen.h
25.08 KB
Del
OK
dtrule.h
8.68 KB
Del
OK
edits.h
20.74 KB
Del
OK
enumset.h
2.08 KB
Del
OK
errorcode.h
4.84 KB
Del
OK
fieldpos.h
8.69 KB
Del
OK
filteredbrk.h
5.37 KB
Del
OK
fmtable.h
24.42 KB
Del
OK
format.h
12.5 KB
Del
OK
formattedvalue.h
10.27 KB
Del
OK
fpositer.h
3.04 KB
Del
OK
gender.h
3.33 KB
Del
OK
gregocal.h
31.71 KB
Del
OK
icudataver.h
1.03 KB
Del
OK
icuplug.h
11.88 KB
Del
OK
idna.h
12.7 KB
Del
OK
listformatter.h
9.47 KB
Del
OK
localebuilder.h
11.27 KB
Del
OK
localematcher.h
22.5 KB
Del
OK
localpointer.h
19.69 KB
Del
OK
locdspnm.h
7.12 KB
Del
OK
locid.h
47.4 KB
Del
OK
measfmt.h
11.33 KB
Del
OK
measunit.h
93.31 KB
Del
OK
measure.h
4.32 KB
Del
OK
messagepattern.h
33.71 KB
Del
OK
msgfmt.h
44.11 KB
Del
OK
normalizer2.h
34.03 KB
Del
OK
normlzr.h
30.94 KB
Del
OK
nounit.h
2.69 KB
Del
OK
numberformatter.h
86.31 KB
Del
OK
numberrangeformatter.h
30.14 KB
Del
OK
numfmt.h
49.81 KB
Del
OK
numsys.h
7.19 KB
Del
OK
parseerr.h
3.08 KB
Del
OK
parsepos.h
5.56 KB
Del
OK
platform.h
28.08 KB
Del
OK
plurfmt.h
25.2 KB
Del
OK
plurrule.h
18.39 KB
Del
OK
ptypes.h
3.49 KB
Del
OK
putil.h
6.33 KB
Del
OK
rbbi.h
26.58 KB
Del
OK
rbnf.h
48.73 KB
Del
OK
rbtz.h
15.6 KB
Del
OK
regex.h
84.36 KB
Del
OK
region.h
9.18 KB
Del
OK
reldatefmt.h
22.62 KB
Del
OK
rep.h
9.37 KB
Del
OK
resbund.h
18.07 KB
Del
OK
schriter.h
6.32 KB
Del
OK
scientificnumberformatter.h
6.4 KB
Del
OK
search.h
22.22 KB
Del
OK
selfmt.h
14.3 KB
Del
OK
simpleformatter.h
12.59 KB
Del
OK
simpletz.h
45.44 KB
Del
OK
smpdtfmt.h
70.97 KB
Del
OK
sortkey.h
11.18 KB
Del
OK
std_string.h
1.05 KB
Del
OK
strenum.h
9.92 KB
Del
OK
stringoptions.h
5.79 KB
Del
OK
stringpiece.h
7.38 KB
Del
OK
stringtriebuilder.h
15.33 KB
Del
OK
stsearch.h
21.3 KB
Del
OK
symtable.h
4.27 KB
Del
OK
tblcoll.h
36.61 KB
Del
OK
timezone.h
41.02 KB
Del
OK
tmunit.h
3.38 KB
Del
OK
tmutamt.h
4.9 KB
Del
OK
tmutfmt.h
7.85 KB
Del
OK
translit.h
65.82 KB
Del
OK
tzfmt.h
42.89 KB
Del
OK
tznames.h
16.85 KB
Del
OK
tzrule.h
35.37 KB
Del
OK
tztrans.h
6.12 KB
Del
OK
ubidi.h
89.56 KB
Del
OK
ubiditransform.h
12.65 KB
Del
OK
ubrk.h
23.97 KB
Del
OK
ucal.h
56.9 KB
Del
OK
ucasemap.h
15.18 KB
Del
OK
ucat.h
5.36 KB
Del
OK
uchar.h
140.56 KB
Del
OK
ucharstrie.h
22.58 KB
Del
OK
ucharstriebuilder.h
7.21 KB
Del
OK
uchriter.h
13.2 KB
Del
OK
uclean.h
11.21 KB
Del
OK
ucnv.h
83.09 KB
Del
OK
ucnv_cb.h
6.59 KB
Del
OK
ucnv_err.h
20.99 KB
Del
OK
ucnvsel.h
6.14 KB
Del
OK
ucol.h
61.46 KB
Del
OK
ucoleitr.h
9.46 KB
Del
OK
uconfig.h
12.07 KB
Del
OK
ucpmap.h
5.53 KB
Del
OK
ucptrie.h
22.46 KB
Del
OK
ucsdet.h
14.67 KB
Del
OK
ucurr.h
16.12 KB
Del
OK
udat.h
60.88 KB
Del
OK
udata.h
15.56 KB
Del
OK
udateintervalformat.h
10.03 KB
Del
OK
udatpg.h
26.01 KB
Del
OK
udisplaycontext.h
5.89 KB
Del
OK
uenum.h
7.78 KB
Del
OK
ufieldpositer.h
4.36 KB
Del
OK
uformattable.h
10.94 KB
Del
OK
uformattedvalue.h
12.14 KB
Del
OK
ugender.h
2 KB
Del
OK
uidna.h
33.37 KB
Del
OK
uiter.h
22.77 KB
Del
OK
uldnames.h
10.45 KB
Del
OK
ulistformatter.h
8.83 KB
Del
OK
uloc.h
52.54 KB
Del
OK
ulocdata.h
11.26 KB
Del
OK
umachine.h
14.53 KB
Del
OK
umisc.h
1.33 KB
Del
OK
umsg.h
24.23 KB
Del
OK
umutablecptrie.h
8.24 KB
Del
OK
unifilt.h
3.96 KB
Del
OK
unifunct.h
4.04 KB
Del
OK
unimatch.h
6.1 KB
Del
OK
unirepl.h
3.38 KB
Del
OK
uniset.h
64.9 KB
Del
OK
unistr.h
170.43 KB
Del
OK
unorm.h
20.52 KB
Del
OK
unorm2.h
24.66 KB
Del
OK
unum.h
53.62 KB
Del
OK
unumberformatter.h
25.36 KB
Del
OK
unumsys.h
7.21 KB
Del
OK
uobject.h
10.68 KB
Del
OK
upluralrules.h
7.88 KB
Del
OK
uregex.h
72.05 KB
Del
OK
uregion.h
9.84 KB
Del
OK
ureldatefmt.h
17.26 KB
Del
OK
urename.h
130.97 KB
Del
OK
urep.h
5.38 KB
Del
OK
ures.h
36.54 KB
Del
OK
uscript.h
26.87 KB
Del
OK
usearch.h
38.12 KB
Del
OK
uset.h
40 KB
Del
OK
usetiter.h
9.55 KB
Del
OK
ushape.h
18 KB
Del
OK
uspoof.h
65.9 KB
Del
OK
usprep.h
8.14 KB
Del
OK
ustdio.h
38.54 KB
Del
OK
ustream.h
1.89 KB
Del
OK
ustring.h
72.47 KB
Del
OK
ustringtrie.h
3.15 KB
Del
OK
utext.h
58.13 KB
Del
OK
utf.h
7.86 KB
Del
OK
utf16.h
23.32 KB
Del
OK
utf32.h
763 B
Del
OK
utf8.h
30.96 KB
Del
OK
utf_old.h
45.83 KB
Del
OK
utmscale.h
13.78 KB
Del
OK
utrace.h
15.73 KB
Del
OK
utrans.h
25.52 KB
Del
OK
utypes.h
30.74 KB
Del
OK
uvernum.h
6.67 KB
Del
OK
uversion.h
6 KB
Del
OK
vtzone.h
20.3 KB
Del
OK
Edit: utf16.h
// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 1999-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: utf16.h * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 1999sep09 * created by: Markus W. Scherer */ /** * \file * \brief C API: 16-bit Unicode handling macros * * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. * * For more information see utf.h and the ICU User Guide Strings chapter * (http://userguide.icu-project.org/strings). * * <em>Usage:</em> * ICU coding guidelines for if() statements should be followed when using these macros. * Compound statements (curly braces {}) must be used for if-else-while... * bodies and all macro statements should be terminated with semicolon. */ #ifndef __UTF16_H__ #define __UTF16_H__ #include "unicode/umachine.h" #ifndef __UTF_H__ # include "unicode/utf.h" #endif /* single-code point definitions -------------------------------------------- */ /** * Does this code unit alone encode a code point (BMP, not a surrogate)? * @param c 16-bit code unit * @return TRUE or FALSE * @stable ICU 2.4 */ #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) /** * Is this code unit a lead surrogate (U+d800..U+dbff)? * @param c 16-bit code unit * @return TRUE or FALSE * @stable ICU 2.4 */ #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) /** * Is this code unit a trail surrogate (U+dc00..U+dfff)? * @param c 16-bit code unit * @return TRUE or FALSE * @stable ICU 2.4 */ #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) /** * Is this code unit a surrogate (U+d800..U+dfff)? * @param c 16-bit code unit * @return TRUE or FALSE * @stable ICU 2.4 */ #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) /** * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), * is it a lead surrogate? * @param c 16-bit code unit * @return TRUE or FALSE * @stable ICU 2.4 */ #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) /** * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), * is it a trail surrogate? * @param c 16-bit code unit * @return TRUE or FALSE * @stable ICU 4.2 */ #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) /** * Helper constant for U16_GET_SUPPLEMENTARY. * @internal */ #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) /** * Get a supplementary code point value (U+10000..U+10ffff) * from its lead and trail surrogates. * The result is undefined if the input values are not * lead and trail surrogates. * * @param lead lead surrogate (U+d800..U+dbff) * @param trail trail surrogate (U+dc00..U+dfff) * @return supplementary code point (U+10000..U+10ffff) * @stable ICU 2.4 */ #define U16_GET_SUPPLEMENTARY(lead, trail) \ (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) /** * Get the lead surrogate (0xd800..0xdbff) for a * supplementary code point (0x10000..0x10ffff). * @param supplementary 32-bit code point (U+10000..U+10ffff) * @return lead surrogate (U+d800..U+dbff) for supplementary * @stable ICU 2.4 */ #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) /** * Get the trail surrogate (0xdc00..0xdfff) for a * supplementary code point (0x10000..0x10ffff). * @param supplementary 32-bit code point (U+10000..U+10ffff) * @return trail surrogate (U+dc00..U+dfff) for supplementary * @stable ICU 2.4 */ #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) /** * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). * @param c 32-bit code point * @return 1 or 2 * @stable ICU 2.4 */ #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) /** * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). * @return 2 * @stable ICU 2.4 */ #define U16_MAX_LENGTH 2 /** * Get a code point from a string at a random-access offset, * without changing the offset. * "Unsafe" macro, assumes well-formed UTF-16. * * The offset may point to either the lead or trail surrogate unit * for a supplementary code point, in which case the macro will read * the adjacent matching surrogate as well. * The result is undefined if the offset points to a single, unpaired surrogate. * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. * * @param s const UChar * string * @param i string offset * @param c output UChar32 variable * @see U16_GET * @stable ICU 2.4 */ #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(U16_IS_SURROGATE(c)) { \ if(U16_IS_SURROGATE_LEAD(c)) { \ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ } else { \ (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ } \ } \ } UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a random-access offset, * without changing the offset. * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * The offset may point to either the lead or trail surrogate unit * for a supplementary code point, in which case the macro will read * the adjacent matching surrogate as well. * * The length can be negative for a NUL-terminated string. * * If the offset points to a single, unpaired surrogate, then * c is set to that unpaired surrogate. * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. * * @param s const UChar * string * @param start starting string offset (usually 0) * @param i string offset, must be start<=i<length * @param length string length * @param c output UChar32 variable * @see U16_GET_UNSAFE * @stable ICU 2.4 */ #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(U16_IS_SURROGATE(c)) { \ uint16_t __c2; \ if(U16_IS_SURROGATE_LEAD(c)) { \ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ } \ } else { \ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ } \ } \ } \ } UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a random-access offset, * without changing the offset. * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * The offset may point to either the lead or trail surrogate unit * for a supplementary code point, in which case the macro will read * the adjacent matching surrogate as well. * * The length can be negative for a NUL-terminated string. * * If the offset points to a single, unpaired surrogate, then * c is set to U+FFFD. * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. * * @param s const UChar * string * @param start starting string offset (usually 0) * @param i string offset, must be start<=i<length * @param length string length * @param c output UChar32 variable * @see U16_GET_UNSAFE * @stable ICU 60 */ #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(U16_IS_SURROGATE(c)) { \ uint16_t __c2; \ if(U16_IS_SURROGATE_LEAD(c)) { \ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ } else { \ (c)=0xfffd; \ } \ } else { \ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ } else { \ (c)=0xfffd; \ } \ } \ } \ } UPRV_BLOCK_MACRO_END /* definitions with forward iteration --------------------------------------- */ /** * Get a code point from a string at a code point boundary offset, * and advance the offset to the next code point boundary. * (Post-incrementing forward iteration.) * "Unsafe" macro, assumes well-formed UTF-16. * * The offset may point to the lead surrogate unit * for a supplementary code point, in which case the macro will read * the following trail surrogate as well. * If the offset points to a trail surrogate, then that itself * will be returned as the code point. * The result is undefined if the offset points to a single, unpaired lead surrogate. * * @param s const UChar * string * @param i string offset * @param c output UChar32 variable * @see U16_NEXT * @stable ICU 2.4 */ #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(U16_IS_LEAD(c)) { \ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ } \ } UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a code point boundary offset, * and advance the offset to the next code point boundary. * (Post-incrementing forward iteration.) * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * The length can be negative for a NUL-terminated string. * * The offset may point to the lead surrogate unit * for a supplementary code point, in which case the macro will read * the following trail surrogate as well. * If the offset points to a trail surrogate or * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. * * @param s const UChar * string * @param i string offset, must be i<length * @param length string length * @param c output UChar32 variable * @see U16_NEXT_UNSAFE * @stable ICU 2.4 */ #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(U16_IS_LEAD(c)) { \ uint16_t __c2; \ if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ ++(i); \ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ } \ } \ } UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a code point boundary offset, * and advance the offset to the next code point boundary. * (Post-incrementing forward iteration.) * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * The length can be negative for a NUL-terminated string. * * The offset may point to the lead surrogate unit * for a supplementary code point, in which case the macro will read * the following trail surrogate as well. * If the offset points to a trail surrogate or * to a single, unpaired lead surrogate, then c is set to U+FFFD. * * @param s const UChar * string * @param i string offset, must be i<length * @param length string length * @param c output UChar32 variable * @see U16_NEXT_UNSAFE * @stable ICU 60 */ #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(U16_IS_SURROGATE(c)) { \ uint16_t __c2; \ if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ ++(i); \ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ } else { \ (c)=0xfffd; \ } \ } \ } UPRV_BLOCK_MACRO_END /** * Append a code point to a string, overwriting 1 or 2 code units. * The offset points to the current end of the string contents * and is advanced (post-increment). * "Unsafe" macro, assumes a valid code point and sufficient space in the string. * Otherwise, the result is undefined. * * @param s const UChar * string buffer * @param i string offset * @param c code point to append * @see U16_APPEND * @stable ICU 2.4 */ #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else { \ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ } \ } UPRV_BLOCK_MACRO_END /** * Append a code point to a string, overwriting 1 or 2 code units. * The offset points to the current end of the string contents * and is advanced (post-increment). * "Safe" macro, checks for a valid code point. * If a surrogate pair is written, checks for sufficient space in the string. * If the code point is not valid or a trail surrogate does not fit, * then isError is set to TRUE. * * @param s const UChar * string buffer * @param i string offset, must be i<capacity * @param capacity size of the string buffer * @param c code point to append * @param isError output UBool set to TRUE if an error occurs, otherwise not modified * @see U16_APPEND_UNSAFE * @stable ICU 2.4 */ #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ } else /* c>0x10ffff or not enough space */ { \ (isError)=TRUE; \ } \ } UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the next. * (Post-incrementing iteration.) * "Unsafe" macro, assumes well-formed UTF-16. * * @param s const UChar * string * @param i string offset * @see U16_FWD_1 * @stable ICU 2.4 */ #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_LEAD((s)[(i)++])) { \ ++(i); \ } \ } UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the next. * (Post-incrementing iteration.) * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * The length can be negative for a NUL-terminated string. * * @param s const UChar * string * @param i string offset, must be i<length * @param length string length * @see U16_FWD_1_UNSAFE * @stable ICU 2.4 */ #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ ++(i); \ } \ } UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the n-th next one, * i.e., move forward by n code points. * (Post-incrementing iteration.) * "Unsafe" macro, assumes well-formed UTF-16. * * @param s const UChar * string * @param i string offset * @param n number of code points to skip * @see U16_FWD_N * @stable ICU 2.4 */ #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ U16_FWD_1_UNSAFE(s, i); \ --__N; \ } \ } UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the n-th next one, * i.e., move forward by n code points. * (Post-incrementing iteration.) * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * The length can be negative for a NUL-terminated string. * * @param s const UChar * string * @param i int32_t string offset, must be i<length * @param length int32_t string length * @param n number of code points to skip * @see U16_FWD_N_UNSAFE * @stable ICU 2.4 */ #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ U16_FWD_1(s, i, length); \ --__N; \ } \ } UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary * at the start of a code point. * If the offset points to the trail surrogate of a surrogate pair, * then the offset is decremented. * Otherwise, it is not modified. * "Unsafe" macro, assumes well-formed UTF-16. * * @param s const UChar * string * @param i string offset * @see U16_SET_CP_START * @stable ICU 2.4 */ #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[i])) { \ --(i); \ } \ } UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary * at the start of a code point. * If the offset points to the trail surrogate of a surrogate pair, * then the offset is decremented. * Otherwise, it is not modified. * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * @param s const UChar * string * @param start starting string offset (usually 0) * @param i string offset, must be start<=i * @see U16_SET_CP_START_UNSAFE * @stable ICU 2.4 */ #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ --(i); \ } \ } UPRV_BLOCK_MACRO_END /* definitions with backward iteration -------------------------------------- */ /** * Move the string offset from one code point boundary to the previous one * and get the code point between them. * (Pre-decrementing backward iteration.) * "Unsafe" macro, assumes well-formed UTF-16. * * The input offset may be the same as the string length. * If the offset is behind a trail surrogate unit * for a supplementary code point, then the macro will read * the preceding lead surrogate as well. * If the offset is behind a lead surrogate, then that itself * will be returned as the code point. * The result is undefined if the offset is behind a single, unpaired trail surrogate. * * @param s const UChar * string * @param i string offset * @param c output UChar32 variable * @see U16_PREV * @stable ICU 2.4 */ #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(U16_IS_TRAIL(c)) { \ (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ } \ } UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one * and get the code point between them. * (Pre-decrementing backward iteration.) * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * The input offset may be the same as the string length. * If the offset is behind a trail surrogate unit * for a supplementary code point, then the macro will read * the preceding lead surrogate as well. * If the offset is behind a lead surrogate or behind a single, unpaired * trail surrogate, then c is set to that unpaired surrogate. * * @param s const UChar * string * @param start starting string offset (usually 0) * @param i string offset, must be start<i * @param c output UChar32 variable * @see U16_PREV_UNSAFE * @stable ICU 2.4 */ #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(U16_IS_TRAIL(c)) { \ uint16_t __c2; \ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ --(i); \ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ } \ } \ } UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one * and get the code point between them. * (Pre-decrementing backward iteration.) * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * The input offset may be the same as the string length. * If the offset is behind a trail surrogate unit * for a supplementary code point, then the macro will read * the preceding lead surrogate as well. * If the offset is behind a lead surrogate or behind a single, unpaired * trail surrogate, then c is set to U+FFFD. * * @param s const UChar * string * @param start starting string offset (usually 0) * @param i string offset, must be start<i * @param c output UChar32 variable * @see U16_PREV_UNSAFE * @stable ICU 60 */ #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(U16_IS_SURROGATE(c)) { \ uint16_t __c2; \ if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ --(i); \ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ } else { \ (c)=0xfffd; \ } \ } \ } UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one. * (Pre-decrementing backward iteration.) * The input offset may be the same as the string length. * "Unsafe" macro, assumes well-formed UTF-16. * * @param s const UChar * string * @param i string offset * @see U16_BACK_1 * @stable ICU 2.4 */ #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[--(i)])) { \ --(i); \ } \ } UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one. * (Pre-decrementing backward iteration.) * The input offset may be the same as the string length. * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * @param s const UChar * string * @param start starting string offset (usually 0) * @param i string offset, must be start<i * @see U16_BACK_1_UNSAFE * @stable ICU 2.4 */ #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ --(i); \ } \ } UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the n-th one before it, * i.e., move backward by n code points. * (Pre-decrementing backward iteration.) * The input offset may be the same as the string length. * "Unsafe" macro, assumes well-formed UTF-16. * * @param s const UChar * string * @param i string offset * @param n number of code points to skip * @see U16_BACK_N * @stable ICU 2.4 */ #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ U16_BACK_1_UNSAFE(s, i); \ --__N; \ } \ } UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the n-th one before it, * i.e., move backward by n code points. * (Pre-decrementing backward iteration.) * The input offset may be the same as the string length. * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * @param s const UChar * string * @param start start of string * @param i string offset, must be start<i * @param n number of code points to skip * @see U16_BACK_N_UNSAFE * @stable ICU 2.4 */ #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0 && (i)>(start)) { \ U16_BACK_1(s, start, i); \ --__N; \ } \ } UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary after a code point. * If the offset is behind the lead surrogate of a surrogate pair, * then the offset is incremented. * Otherwise, it is not modified. * The input offset may be the same as the string length. * "Unsafe" macro, assumes well-formed UTF-16. * * @param s const UChar * string * @param i string offset * @see U16_SET_CP_LIMIT * @stable ICU 2.4 */ #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_LEAD((s)[(i)-1])) { \ ++(i); \ } \ } UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary after a code point. * If the offset is behind the lead surrogate of a surrogate pair, * then the offset is incremented. * Otherwise, it is not modified. * The input offset may be the same as the string length. * "Safe" macro, handles unpaired surrogates and checks for string boundaries. * * The length can be negative for a NUL-terminated string. * * @param s const UChar * string * @param start int32_t starting string offset (usually 0) * @param i int32_t string offset, start<=i<=length * @param length int32_t string length * @see U16_SET_CP_LIMIT_UNSAFE * @stable ICU 2.4 */ #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ ++(i); \ } \ } UPRV_BLOCK_MACRO_END #endif
Save