UnicodeBlockObjects.h revision 9b6b158b
1/*
2 * Copyright 2001-2016, Haiku, Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 */
5#ifndef _UNICODEBLOCKOBJECTS_H
6#define _UNICODEBLOCKOBJECTS_H
7
8
9#include <Font.h>
10
11
12// Unicode block list with their unicode encoding range
13//
14// Original BeOS-compatible blocks
15const unicode_block B_BASIC_LATIN_BLOCK(					/* 0000 - 007F			*/	0x0000000000000000LL, 0x0000000000000001LL);
16const unicode_block B_LATIN1_SUPPLEMENT_BLOCK(				/* 0080 - 00FF			*/	0x0000000000000000LL, 0x0000000000000002LL);
17const unicode_block B_LATIN_EXTENDED_A_BLOCK(				/* 0100 - 017F			*/	0x0000000000000000LL, 0x0000000000000004LL);
18const unicode_block B_LATIN_EXTENDED_B_BLOCK(				/* 0180 - 024F			*/	0x0000000000000000LL, 0x0000000000000008LL);
19const unicode_block B_IPA_EXTENSIONS_BLOCK(					/* 0250 - 02AF			*/	0x0000000000000000LL, 0x0000000000000010LL);
20const unicode_block B_SPACING_MODIFIER_LETTERS_BLOCK(		/* 02B0 - 02FF			*/	0x0000000000000000LL, 0x0000000000000020LL);
21const unicode_block B_COMBINING_DIACRITICAL_MARKS_BLOCK(	/* 0300 - 036F			*/	0x0000000000000000LL, 0x0000000000000040LL);
22const unicode_block B_BASIC_GREEK_BLOCK(					/* 0370 - 03CF			*/	0x0000000000000000LL, 0x0000000000000080LL);
23const unicode_block B_GREEK_SYMBOLS_AND_COPTIC_BLOCK(		/* 03D0 - 03FF			*/	0x0000000000000000LL, 0x0000000000000100LL);
24const unicode_block B_CYRILLIC_BLOCK(						/* 0400 - 04FF			*/	0x0000000000000000LL, 0x0000000000000200LL);
25const unicode_block B_ARMENIAN_BLOCK(						/* 0530 - 058F			*/	0x0000000000000000LL, 0x0000000000000400LL);
26const unicode_block B_BASIC_HEBREW_BLOCK(					/* 0590 - 05CF			*/	0x0000000000000000LL, 0x0000000000000800LL);
27const unicode_block B_HEBREW_EXTENDED_BLOCK(				/* 05D0 - 05FF			*/	0x0000000000000000LL, 0x0000000000001000LL);
28const unicode_block B_BASIC_ARABIC_BLOCK(					/* 0600 - 0670			*/	0x0000000000000000LL, 0x0000000000002000LL);
29const unicode_block B_ARABIC_EXTENDED_BLOCK(				/* 0671 - 06FF			*/	0x0000000000000000LL, 0x0000000000004000LL);
30const unicode_block B_DEVANAGARI_BLOCK(						/* 0900 - 097F			*/	0x0000000000000000LL, 0x0000000000008000LL);
31const unicode_block B_BENGALI_BLOCK(						/* 0980 - 09FF			*/	0x0000000000000000LL, 0x0000000000010000LL);
32const unicode_block B_GURMUKHI_BLOCK(						/* 0A00 - 0A7F			*/	0x0000000000000000LL, 0x0000000000020000LL);
33const unicode_block B_GUJARATI_BLOCK(						/* 0A80 - 0AFF			*/	0x0000000000000000LL, 0x0000000000040000LL);
34const unicode_block B_ORIYA_BLOCK(							/* 0B00 - 0B7F			*/	0x0000000000000000LL, 0x0000000000080000LL);
35const unicode_block B_TAMIL_BLOCK(							/* 0B80 - 0BFF			*/	0x0000000000000000LL, 0x0000000000100000LL);
36const unicode_block B_TELUGU_BLOCK(							/* 0C00 - 0C7F			*/	0x0000000000000000LL, 0x0000000000200000LL);
37const unicode_block B_KANNADA_BLOCK(						/* 0C80 - 0CFF			*/	0x0000000000000000LL, 0x0000000000400000LL);
38const unicode_block B_MALAYALAM_BLOCK(						/* 0D00 - 0D7F			*/	0x0000000000000000LL, 0x0000000000800000LL);
39const unicode_block B_THAI_BLOCK(							/* 0E00 - 0E7F			*/	0x0000000000000000LL, 0x0000000001000000LL);
40const unicode_block B_LAO_BLOCK(							/* 0E80 - 0EFF			*/	0x0000000000000000LL, 0x0000000002000000LL);
41const unicode_block B_BASIC_GEORGIAN_BLOCK(					/* 10A0 - 10CF			*/	0x0000000000000000LL, 0x0000000004000000LL);
42const unicode_block B_GEORGIAN_EXTENDED_BLOCK(				/* 10D0 - 10FF			*/	0x0000000000000000LL, 0x0000000008000000LL);
43const unicode_block B_HANGUL_JAMO_BLOCK(					/* 1100 - 11FF			*/	0x0000000000000000LL, 0x0000000010000000LL);
44const unicode_block B_LATIN_EXTENDED_ADDITIONAL_BLOCK(		/* 1E00 - 1EFF			*/	0x0000000000000000LL, 0x0000000020000000LL);
45const unicode_block B_GREEK_EXTENDED_BLOCK(					/* 1F00 - 1FFF			*/	0x0000000000000000LL, 0x0000000040000000LL);
46const unicode_block B_GENERAL_PUNCTUATION_BLOCK(			/* 2000 - 206F			*/	0x0000000000000000LL, 0x0000000080000000LL);
47const unicode_block B_SUPERSCRIPTS_AND_SUBSCRIPTS_BLOCK(	/* 2070 - 209F			*/	0x0000000000000000LL, 0x0000000100000000LL);
48const unicode_block B_CURRENCY_SYMBOLS_BLOCK(				/* 20A0 - 20CF			*/	0x0000000000000000LL, 0x0000000200000000LL);
49const unicode_block B_COMBINING_MARKS_FOR_SYMBOLS_BLOCK(	/* 20D0 - 20FF			*/	0x0000000000000000LL, 0x0000000400000000LL);
50const unicode_block B_LETTERLIKE_SYMBOLS_BLOCK(				/* 2100 - 214F			*/	0x0000000000000000LL, 0x0000000800000000LL);
51const unicode_block B_NUMBER_FORMS_BLOCK(					/* 2150 - 218F			*/	0x0000000000000000LL, 0x0000001000000000LL);
52const unicode_block B_ARROWS_BLOCK(							/* 2190 - 21FF			*/	0x0000000000000000LL, 0x0000002000000000LL);
53const unicode_block B_MATHEMATICAL_OPERATORS_BLOCK(			/* 2200 - 22FF			*/	0x0000000000000000LL, 0x0000004000000000LL);
54const unicode_block B_MISCELLANEOUS_TECHNICAL_BLOCK(		/* 2300 - 23FF			*/	0x0000000000000000LL, 0x0000008000000000LL);
55const unicode_block B_CONTROL_PICTURES_BLOCK(				/* 2400 - 243F			*/	0x0000000000000000LL, 0x0000010000000000LL);
56const unicode_block B_OPTICAL_CHARACTER_RECOGNITION_BLOCK(	/* 2440 - 245F			*/	0x0000000000000000LL, 0x0000020000000000LL);
57const unicode_block B_ENCLOSED_ALPHANUMERICS_BLOCK(			/* 2460 - 24FF			*/	0x0000000000000000LL, 0x0000040000000000LL);
58const unicode_block B_BOX_DRAWING_BLOCK(					/* 2500 - 257F			*/	0x0000000000000000LL, 0x0000080000000000LL);
59const unicode_block B_BLOCK_ELEMENTS_BLOCK(					/* 2580 - 259F			*/	0x0000000000000000LL, 0x0000100000000000LL);
60const unicode_block B_GEOMETRIC_SHAPES_BLOCK(				/* 25A0 - 25FF			*/	0x0000000000000000LL, 0x0000200000000000LL);
61const unicode_block B_MISCELLANEOUS_SYMBOLS_BLOCK(			/* 2600 - 26FF			*/	0x0000000000000000LL, 0x0000400000000000LL);
62const unicode_block B_DINGBATS_BLOCK(						/* 2700 - 27BF			*/	0x0000000000000000LL, 0x0000800000000000LL);
63const unicode_block B_CJK_SYMBOLS_AND_PUNCTUATION_BLOCK(	/* 3000 - 303F			*/	0x0000000000000000LL, 0x0001000000000000LL);
64const unicode_block B_HIRAGANA_BLOCK(						/* 3040 - 309F			*/	0x0000000000000000LL, 0x0002000000000000LL);
65const unicode_block B_KATAKANA_BLOCK(						/* 30A0 - 30FF			*/	0x0000000000000000LL, 0x0004000000000000LL);
66const unicode_block B_BOPOMOFO_BLOCK(						/* 3100 - 312F			*/	0x0000000000000000LL, 0x0008000000000000LL);
67const unicode_block B_HANGUL_COMPATIBILITY_JAMO_BLOCK(		/* 3130 - 318F			*/	0x0000000000000000LL, 0x0010000000000000LL);
68const unicode_block B_CJK_MISCELLANEOUS_BLOCK(				/* 3190 - 319F			*/	0x0000000000000000LL, 0x0020000000000000LL);
69const unicode_block B_ENCLOSED_CJK_LETTERS_AND_MONTHS_BLOCK(/* 3200 - 32FF			*/	0x0000000000000000LL, 0x0040000000000000LL);
70const unicode_block B_CJK_COMPATIBILITY_BLOCK(				/* 3300 - 33FF			*/	0x0000000000000000LL, 0x0080000000000000LL);
71const unicode_block B_HANGUL_BLOCK(							/* AC00 - D7AF			*/	0x0000000000000000LL, 0x0100000000000000LL);
72const unicode_block B_HIGH_SURROGATES_BLOCK(				/* D800 - DBFF			*/	0x0000000000000000LL, 0x0200000000000000LL);
73const unicode_block B_LOW_SURROGATES_BLOCK(					/* DC00 - DFFF			*/	0x0000000000000000LL, 0x0400000000000000LL);
74const unicode_block B_CJK_UNIFIED_IDEOGRAPHS_BLOCK(			/* 4E00 - 9FFF			*/	0x0000000000000000LL, 0x0800000000000000LL);
75const unicode_block B_PRIVATE_USE_AREA_BLOCK(				/* E000 - F8FF			*/	0x0000000000000000LL, 0x1000000000000000LL);
76const unicode_block B_CJK_COMPATIBILITY_IDEOGRAPHS_BLOCK(	/* F900 - FAFF			*/	0x0000000000000000LL, 0x2000000000000000LL);
77const unicode_block B_ALPHABETIC_PRESENTATION_FORMS_BLOCK(	/* FB00 - FB4F			*/	0x0000000000000000LL, 0x4000000000000000LL);
78const unicode_block B_ARABIC_PRESENTATION_FORMS_A_BLOCK(	/* FB50 - FDFF			*/	0x0000000000000000LL, 0x8000000000000000LL);
79const unicode_block B_COMBINING_HALF_MARKS_BLOCK(			/* FE20 - FE2F			*/	0x0000000000000001LL, 0x0000000000000000LL);
80const unicode_block B_CJK_COMPATIBILITY_FORMS_BLOCK(		/* FE30 - FE4F			*/	0x0000000000000002LL, 0x0000000000000000LL);
81const unicode_block B_SMALL_FORM_VARIANTS_BLOCK(			/* FE50 - FE6F			*/	0x0000000000000004LL, 0x0000000000000000LL);
82const unicode_block B_ARABIC_PRESENTATION_FORMS_B_BLOCK(	/* FE70 - FEFE			*/	0x0000000000000008LL, 0x0000000000000000LL);
83const unicode_block B_HALFWIDTH_AND_FULLWIDTH_FORMS_BLOCK(	/* FF00 - FFEF			*/	0x0000000000000010LL, 0x0000000000000000LL);
84const unicode_block B_SPECIALS_BLOCK(						/* FEFF and FFF0 - FFFF	*/	0x0000000000000020LL, 0x0000000000000000LL);
85const unicode_block B_TIBETAN_BLOCK(						/* 0F00 - 0FBF			*/	0x0000000000000040LL, 0x0000000000000000LL);
86
87
88const unicode_block_range kUnicodeBlockMap[] = {
89	{0x0000, 0x007f, B_BASIC_LATIN_BLOCK },
90	{0x0080, 0x00ff, B_LATIN1_SUPPLEMENT_BLOCK },
91	{0x0100, 0x017f, B_LATIN_EXTENDED_A_BLOCK },
92	{0x0180, 0x024f, B_LATIN_EXTENDED_B_BLOCK },
93	{0x0250, 0x02af, B_IPA_EXTENSIONS_BLOCK },
94	{0x02b0, 0x02ff, B_SPACING_MODIFIER_LETTERS_BLOCK },
95	{0x0300, 0x036f, B_COMBINING_DIACRITICAL_MARKS_BLOCK },
96	{0x0370, 0x03cf, B_BASIC_GREEK_BLOCK },
97	{0x03d0, 0x03ff, B_GREEK_SYMBOLS_AND_COPTIC_BLOCK },
98	{0x0400, 0x04ff, B_CYRILLIC_BLOCK },
99	{0x0530, 0x058f, B_ARMENIAN_BLOCK },
100	{0x0590, 0x05cf, B_BASIC_HEBREW_BLOCK },
101	{0x05d0, 0x05ff, B_HEBREW_EXTENDED_BLOCK },
102	{0x0600, 0x0670, B_BASIC_ARABIC_BLOCK },
103	{0x0671, 0x06ff, B_ARABIC_EXTENDED_BLOCK },
104	{0x0900, 0x097f, B_DEVANAGARI_BLOCK },
105	{0x0980, 0x09ff, B_BENGALI_BLOCK },
106	{0x0a00, 0x0a7f, B_GURMUKHI_BLOCK },
107	{0x0a80, 0x0aff, B_GUJARATI_BLOCK },
108	{0x0b00, 0x0b7f, B_ORIYA_BLOCK },
109	{0x0b80, 0x0bff, B_TAMIL_BLOCK },
110	{0x0c00, 0x0c7f, B_TELUGU_BLOCK },
111	{0x0c80, 0x0cff, B_KANNADA_BLOCK},
112	{0x0d00, 0x0d7f, B_MALAYALAM_BLOCK},
113	{0x0e00, 0x0e7f, B_THAI_BLOCK},
114	{0x0e80, 0x0eff, B_LAO_BLOCK},
115	{0x0f00, 0x0fff, B_TIBETAN_BLOCK},
116	{0x10a0, 0x10ff, B_BASIC_GEORGIAN_BLOCK},
117	{0x1100, 0x11ff, B_HANGUL_JAMO_BLOCK},
118	{0x1e00, 0x1eff, B_LATIN_EXTENDED_ADDITIONAL_BLOCK},
119	{0x1f00, 0x1fff, B_GREEK_EXTENDED_BLOCK},
120	{0x2000, 0x206f, B_GENERAL_PUNCTUATION_BLOCK},
121	{0x2070, 0x209f, B_SUPERSCRIPTS_AND_SUBSCRIPTS_BLOCK},
122	{0x20a0, 0x20cf, B_CURRENCY_SYMBOLS_BLOCK},
123	{0x20d0, 0x20ff, B_COMBINING_MARKS_FOR_SYMBOLS_BLOCK},
124	{0x2100, 0x214f, B_LETTERLIKE_SYMBOLS_BLOCK},
125	{0x2150, 0x218f, B_NUMBER_FORMS_BLOCK},
126	{0x2190, 0x21ff, B_ARROWS_BLOCK},
127	{0x2200, 0x22ff, B_MATHEMATICAL_OPERATORS_BLOCK},
128	{0x2300, 0x23ff, B_MISCELLANEOUS_TECHNICAL_BLOCK},
129	{0x2400, 0x243f, B_CONTROL_PICTURES_BLOCK},
130	{0x2440, 0x245f, B_OPTICAL_CHARACTER_RECOGNITION_BLOCK},
131	{0x2460, 0x24ff, B_ENCLOSED_ALPHANUMERICS_BLOCK},
132	{0x2500, 0x257f, B_BOX_DRAWING_BLOCK},
133	{0x2580, 0x259f, B_BLOCK_ELEMENTS_BLOCK},
134	{0x25a0, 0x25ff, B_GEOMETRIC_SHAPES_BLOCK},
135	{0x2600, 0x26ff, B_MISCELLANEOUS_SYMBOLS_BLOCK},
136	{0x2700, 0x27bf, B_DINGBATS_BLOCK},
137	{0x3000, 0x303f, B_CJK_SYMBOLS_AND_PUNCTUATION_BLOCK},
138	{0x3040, 0x309f, B_HIRAGANA_BLOCK},
139	{0x30a0, 0x30ff, B_KATAKANA_BLOCK},
140	{0x3100, 0x312f, B_BOPOMOFO_BLOCK},
141	{0x3130, 0x318f, B_HANGUL_COMPATIBILITY_JAMO_BLOCK},
142	{0x3190, 0x319f, B_CJK_MISCELLANEOUS_BLOCK},
143	{0x3200, 0x32ff, B_ENCLOSED_CJK_LETTERS_AND_MONTHS_BLOCK},
144	{0x3300, 0x33ff, B_CJK_COMPATIBILITY_BLOCK},
145	{0x4e00, 0x9fff, B_CJK_UNIFIED_IDEOGRAPHS_BLOCK},
146	{0xd800, 0xdb7f, B_HIGH_SURROGATES_BLOCK},
147	{0xdc00, 0xdfff, B_LOW_SURROGATES_BLOCK},
148	{0xe000, 0xf8ff, B_PRIVATE_USE_AREA_BLOCK},
149	{0xf900, 0xfaff, B_CJK_COMPATIBILITY_IDEOGRAPHS_BLOCK},
150	{0xfb00, 0xfb4f, B_ALPHABETIC_PRESENTATION_FORMS_BLOCK},
151	{0xfb50, 0xfdff, B_ARABIC_PRESENTATION_FORMS_A_BLOCK},
152	{0xfe20, 0xfe2f, B_COMBINING_HALF_MARKS_BLOCK},
153	{0xfe30, 0xfe4f, B_CJK_COMPATIBILITY_FORMS_BLOCK},
154	{0xfe50, 0xfe6f, B_SMALL_FORM_VARIANTS_BLOCK},
155	{0xfe70, 0xfeff, B_ARABIC_PRESENTATION_FORMS_B_BLOCK},
156	{0xff00, 0xffef, B_HALFWIDTH_AND_FULLWIDTH_FORMS_BLOCK},
157	{0xfff0, 0xffff, B_SPECIALS_BLOCK}
158};
159
160const uint32 kNumUnicodeBlockRanges
161	= sizeof(kUnicodeBlockMap) / sizeof(kUnicodeBlockMap[0]);
162
163#endif	// _UNICODEBLOCKOBJECTS_H
164