mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 00:28:52 +02:00 
			
		
		
		
	unicode: Add utf8-data module
utf8data.h contains a large database table which is an auto-generated decodification trie for the unicode normalization functions. Allow building it into a separate module. Based on a patch from Shreeya Patel <shreeya.patel@collabora.com>. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
This commit is contained in:
		
							parent
							
								
									6ca99ce756
								
							
						
					
					
						commit
						2b3d047870
					
				
					 9 changed files with 126 additions and 91 deletions
				
			
		|  | @ -8,7 +8,16 @@ config UNICODE | ||||||
| 	  Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding | 	  Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding | ||||||
| 	  support. | 	  support. | ||||||
| 
 | 
 | ||||||
|  | config UNICODE_UTF8_DATA | ||||||
|  | 	tristate "UTF-8 normalization and casefolding tables" | ||||||
|  | 	depends on UNICODE | ||||||
|  | 	default UNICODE | ||||||
|  | 	help | ||||||
|  | 	  This contains a large table of case foldings, which can be loaded as | ||||||
|  | 	  a separate module if you say M here.  To be on the safe side stick | ||||||
|  | 	  to the default of Y.  Saying N here makes no sense, if you do not want | ||||||
|  | 	  utf8 casefolding support, disable CONFIG_UNICODE instead. | ||||||
|  | 
 | ||||||
| config UNICODE_NORMALIZATION_SELFTEST | config UNICODE_NORMALIZATION_SELFTEST | ||||||
| 	tristate "Test UTF-8 normalization support" | 	tristate "Test UTF-8 normalization support" | ||||||
| 	depends on UNICODE | 	depends on UNICODE_UTF8_DATA | ||||||
| 	default n |  | ||||||
|  |  | ||||||
|  | @ -2,14 +2,15 @@ | ||||||
| 
 | 
 | ||||||
| obj-$(CONFIG_UNICODE) += unicode.o | obj-$(CONFIG_UNICODE) += unicode.o | ||||||
| obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o | obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o | ||||||
|  | obj-$(CONFIG_UNICODE_UTF8_DATA) += utf8data.o | ||||||
| 
 | 
 | ||||||
| unicode-y := utf8-norm.o utf8-core.o | unicode-y := utf8-norm.o utf8-core.o | ||||||
| 
 | 
 | ||||||
| $(obj)/utf8-norm.o: $(obj)/utf8data.h | $(obj)/utf8-data.o: $(obj)/utf8data.c | ||||||
| 
 | 
 | ||||||
| # In the normal build, the checked-in utf8data.h is just shipped.
 | # In the normal build, the checked-in utf8data.c is just shipped.
 | ||||||
| #
 | #
 | ||||||
| # To generate utf8data.h from UCD, put *.txt files in this directory
 | # To generate utf8data.c from UCD, put *.txt files in this directory
 | ||||||
| # and pass REGENERATE_UTF8DATA=1 from the command line.
 | # and pass REGENERATE_UTF8DATA=1 from the command line.
 | ||||||
| ifdef REGENERATE_UTF8DATA | ifdef REGENERATE_UTF8DATA | ||||||
| 
 | 
 | ||||||
|  | @ -24,15 +25,15 @@ quiet_cmd_utf8data = GEN     $@ | ||||||
| 		-t $(srctree)/$(src)/NormalizationTest.txt \
 | 		-t $(srctree)/$(src)/NormalizationTest.txt \
 | ||||||
| 		-o $@ | 		-o $@ | ||||||
| 
 | 
 | ||||||
| $(obj)/utf8data.h: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE | $(obj)/utf8data.c: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE | ||||||
| 	$(call if_changed,utf8data) | 	$(call if_changed,utf8data) | ||||||
| 
 | 
 | ||||||
| else | else | ||||||
| 
 | 
 | ||||||
| $(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE | $(obj)/utf8data.c: $(src)/utf8data.c_shipped FORCE | ||||||
| 	$(call if_changed,shipped) | 	$(call if_changed,shipped) | ||||||
| 
 | 
 | ||||||
| endif | endif | ||||||
| 
 | 
 | ||||||
| targets += utf8data.h | targets += utf8data.c | ||||||
| hostprogs += mkutf8data | hostprogs += mkutf8data | ||||||
|  |  | ||||||
|  | @ -3287,12 +3287,10 @@ static void write_file(void) | ||||||
| 		open_fail(utf8_name, errno); | 		open_fail(utf8_name, errno); | ||||||
| 
 | 
 | ||||||
| 	fprintf(file, "/* This file is generated code, do not edit. */\n"); | 	fprintf(file, "/* This file is generated code, do not edit. */\n"); | ||||||
| 	fprintf(file, "#ifndef __INCLUDED_FROM_UTF8NORM_C__\n"); |  | ||||||
| 	fprintf(file, "#error Only nls_utf8-norm.c should include this file.\n"); |  | ||||||
| 	fprintf(file, "#endif\n"); |  | ||||||
| 	fprintf(file, "\n"); | 	fprintf(file, "\n"); | ||||||
| 	fprintf(file, "static const unsigned int utf8vers = %#x;\n", | 	fprintf(file, "#include <linux/module.h>\n"); | ||||||
| 		unicode_maxage); | 	fprintf(file, "#include <linux/kernel.h>\n"); | ||||||
|  | 	fprintf(file, "#include \"utf8n.h\"\n"); | ||||||
| 	fprintf(file, "\n"); | 	fprintf(file, "\n"); | ||||||
| 	fprintf(file, "static const unsigned int utf8agetab[] = {\n"); | 	fprintf(file, "static const unsigned int utf8agetab[] = {\n"); | ||||||
| 	for (i = 0; i != ages_count; i++) | 	for (i = 0; i != ages_count; i++) | ||||||
|  | @ -3339,6 +3337,22 @@ static void write_file(void) | ||||||
| 		fprintf(file, "\n"); | 		fprintf(file, "\n"); | ||||||
| 	} | 	} | ||||||
| 	fprintf(file, "};\n"); | 	fprintf(file, "};\n"); | ||||||
|  | 	fprintf(file, "\n"); | ||||||
|  | 	fprintf(file, "struct utf8data_table utf8_data_table = {\n"); | ||||||
|  | 	fprintf(file, "\t.utf8agetab = utf8agetab,\n"); | ||||||
|  | 	fprintf(file, "\t.utf8agetab_size = ARRAY_SIZE(utf8agetab),\n"); | ||||||
|  | 	fprintf(file, "\n"); | ||||||
|  | 	fprintf(file, "\t.utf8nfdicfdata = utf8nfdicfdata,\n"); | ||||||
|  | 	fprintf(file, "\t.utf8nfdicfdata_size = ARRAY_SIZE(utf8nfdicfdata),\n"); | ||||||
|  | 	fprintf(file, "\n"); | ||||||
|  | 	fprintf(file, "\t.utf8nfdidata = utf8nfdidata,\n"); | ||||||
|  | 	fprintf(file, "\t.utf8nfdidata_size = ARRAY_SIZE(utf8nfdidata),\n"); | ||||||
|  | 	fprintf(file, "\n"); | ||||||
|  | 	fprintf(file, "\t.utf8data = utf8data,\n"); | ||||||
|  | 	fprintf(file, "};\n"); | ||||||
|  | 	fprintf(file, "EXPORT_SYMBOL_GPL(utf8_data_table);"); | ||||||
|  | 	fprintf(file, "\n"); | ||||||
|  | 	fprintf(file, "MODULE_LICENSE(\"GPL v2\");\n"); | ||||||
| 	fclose(file); | 	fclose(file); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -160,25 +160,45 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(utf8_normalize); | EXPORT_SYMBOL(utf8_normalize); | ||||||
| 
 | 
 | ||||||
|  | static const struct utf8data *find_table_version(const struct utf8data *table, | ||||||
|  | 		size_t nr_entries, unsigned int version) | ||||||
|  | { | ||||||
|  | 	size_t i = nr_entries - 1; | ||||||
|  | 
 | ||||||
|  | 	while (version < table[i].maxage) | ||||||
|  | 		i--; | ||||||
|  | 	if (version > table[i].maxage) | ||||||
|  | 		return NULL; | ||||||
|  | 	return &table[i]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| struct unicode_map *utf8_load(unsigned int version) | struct unicode_map *utf8_load(unsigned int version) | ||||||
| { | { | ||||||
| 	struct unicode_map *um; | 	struct unicode_map *um; | ||||||
| 
 | 
 | ||||||
| 	if (!utf8version_is_supported(version)) |  | ||||||
| 		return ERR_PTR(-EINVAL); |  | ||||||
| 
 |  | ||||||
| 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL); | 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL); | ||||||
| 	if (!um) | 	if (!um) | ||||||
| 		return ERR_PTR(-ENOMEM); | 		return ERR_PTR(-ENOMEM); | ||||||
| 	um->version = version; | 	um->version = version; | ||||||
| 	um->ntab[UTF8_NFDI] = utf8nfdi(version); | 
 | ||||||
|  | 	um->tables = symbol_request(utf8_data_table); | ||||||
|  | 	if (!um->tables) | ||||||
|  | 		goto out_free_um; | ||||||
|  | 
 | ||||||
|  | 	if (!utf8version_is_supported(um, version)) | ||||||
|  | 		goto out_symbol_put; | ||||||
|  | 	um->ntab[UTF8_NFDI] = find_table_version(um->tables->utf8nfdidata, | ||||||
|  | 			um->tables->utf8nfdidata_size, um->version); | ||||||
| 	if (!um->ntab[UTF8_NFDI]) | 	if (!um->ntab[UTF8_NFDI]) | ||||||
| 		goto out_free_um; | 		goto out_symbol_put; | ||||||
| 	um->ntab[UTF8_NFDICF] = utf8nfdicf(version); | 	um->ntab[UTF8_NFDICF] = find_table_version(um->tables->utf8nfdicfdata, | ||||||
|  | 			um->tables->utf8nfdicfdata_size, um->version); | ||||||
| 	if (!um->ntab[UTF8_NFDICF]) | 	if (!um->ntab[UTF8_NFDICF]) | ||||||
| 		goto out_free_um; | 		goto out_symbol_put; | ||||||
| 	return um; | 	return um; | ||||||
| 
 | 
 | ||||||
|  | out_symbol_put: | ||||||
|  | 	symbol_put(um->tables); | ||||||
| out_free_um: | out_free_um: | ||||||
| 	kfree(um); | 	kfree(um); | ||||||
| 	return ERR_PTR(-EINVAL); | 	return ERR_PTR(-EINVAL); | ||||||
|  | @ -187,7 +207,10 @@ EXPORT_SYMBOL(utf8_load); | ||||||
| 
 | 
 | ||||||
| void utf8_unload(struct unicode_map *um) | void utf8_unload(struct unicode_map *um) | ||||||
| { | { | ||||||
| 	kfree(um); | 	if (um) { | ||||||
|  | 		symbol_put(utf8_data_table); | ||||||
|  | 		kfree(um); | ||||||
|  | 	} | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(utf8_unload); | EXPORT_SYMBOL(utf8_unload); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -6,21 +6,12 @@ | ||||||
| 
 | 
 | ||||||
| #include "utf8n.h" | #include "utf8n.h" | ||||||
| 
 | 
 | ||||||
| struct utf8data { | int utf8version_is_supported(const struct unicode_map *um, unsigned int version) | ||||||
| 	unsigned int maxage; |  | ||||||
| 	unsigned int offset; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| #define __INCLUDED_FROM_UTF8NORM_C__ |  | ||||||
| #include "utf8data.h" |  | ||||||
| #undef __INCLUDED_FROM_UTF8NORM_C__ |  | ||||||
| 
 |  | ||||||
| int utf8version_is_supported(unsigned int version) |  | ||||||
| { | { | ||||||
| 	int i = ARRAY_SIZE(utf8agetab) - 1; | 	int i = um->tables->utf8agetab_size - 1; | ||||||
| 
 | 
 | ||||||
| 	while (i >= 0 && utf8agetab[i] != 0) { | 	while (i >= 0 && um->tables->utf8agetab[i] != 0) { | ||||||
| 		if (version == utf8agetab[i]) | 		if (version == um->tables->utf8agetab[i]) | ||||||
| 			return 1; | 			return 1; | ||||||
| 		i--; | 		i--; | ||||||
| 	} | 	} | ||||||
|  | @ -161,7 +152,7 @@ typedef const unsigned char utf8trie_t; | ||||||
|  * underlying datatype: unsigned char. |  * underlying datatype: unsigned char. | ||||||
|  * |  * | ||||||
|  * leaf[0]: The unicode version, stored as a generation number that is |  * leaf[0]: The unicode version, stored as a generation number that is | ||||||
|  *          an index into utf8agetab[].  With this we can filter code |  *          an index into ->utf8agetab[].  With this we can filter code | ||||||
|  *          points based on the unicode version in which they were |  *          points based on the unicode version in which they were | ||||||
|  *          defined.  The CCC of a non-defined code point is 0. |  *          defined.  The CCC of a non-defined code point is 0. | ||||||
|  * leaf[1]: Canonical Combining Class. During normalization, we need |  * leaf[1]: Canonical Combining Class. During normalization, we need | ||||||
|  | @ -313,7 +304,7 @@ static utf8leaf_t *utf8nlookup(const struct unicode_map *um, | ||||||
| 		enum utf8_normalization n, unsigned char *hangul, const char *s, | 		enum utf8_normalization n, unsigned char *hangul, const char *s, | ||||||
| 		size_t len) | 		size_t len) | ||||||
| { | { | ||||||
| 	utf8trie_t	*trie = utf8data + um->ntab[n]->offset; | 	utf8trie_t	*trie = um->tables->utf8data + um->ntab[n]->offset; | ||||||
| 	int		offlen; | 	int		offlen; | ||||||
| 	int		offset; | 	int		offset; | ||||||
| 	int		mask; | 	int		mask; | ||||||
|  | @ -404,7 +395,8 @@ ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n, | ||||||
| 		leaf = utf8nlookup(um, n, hangul, s, len); | 		leaf = utf8nlookup(um, n, hangul, s, len); | ||||||
| 		if (!leaf) | 		if (!leaf) | ||||||
| 			return -1; | 			return -1; | ||||||
| 		if (utf8agetab[LEAF_GEN(leaf)] > um->ntab[n]->maxage) | 		if (um->tables->utf8agetab[LEAF_GEN(leaf)] > | ||||||
|  | 		    um->ntab[n]->maxage) | ||||||
| 			ret += utf8clen(s); | 			ret += utf8clen(s); | ||||||
| 		else if (LEAF_CCC(leaf) == DECOMPOSE) | 		else if (LEAF_CCC(leaf) == DECOMPOSE) | ||||||
| 			ret += strlen(LEAF_STR(leaf)); | 			ret += strlen(LEAF_STR(leaf)); | ||||||
|  | @ -520,7 +512,7 @@ int utf8byte(struct utf8cursor *u8c) | ||||||
| 
 | 
 | ||||||
| 		ccc = LEAF_CCC(leaf); | 		ccc = LEAF_CCC(leaf); | ||||||
| 		/* Characters that are too new have CCC 0. */ | 		/* Characters that are too new have CCC 0. */ | ||||||
| 		if (utf8agetab[LEAF_GEN(leaf)] > | 		if (u8c->um->tables->utf8agetab[LEAF_GEN(leaf)] > | ||||||
| 		    u8c->um->ntab[u8c->n]->maxage) { | 		    u8c->um->ntab[u8c->n]->maxage) { | ||||||
| 			ccc = STOPPER; | 			ccc = STOPPER; | ||||||
| 		} else if (ccc == DECOMPOSE) { | 		} else if (ccc == DECOMPOSE) { | ||||||
|  | @ -597,25 +589,3 @@ int utf8byte(struct utf8cursor *u8c) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(utf8byte); | EXPORT_SYMBOL(utf8byte); | ||||||
| 
 |  | ||||||
| const struct utf8data *utf8nfdi(unsigned int maxage) |  | ||||||
| { |  | ||||||
| 	int i = ARRAY_SIZE(utf8nfdidata) - 1; |  | ||||||
| 
 |  | ||||||
| 	while (maxage < utf8nfdidata[i].maxage) |  | ||||||
| 		i--; |  | ||||||
| 	if (maxage > utf8nfdidata[i].maxage) |  | ||||||
| 		return NULL; |  | ||||||
| 	return &utf8nfdidata[i]; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| const struct utf8data *utf8nfdicf(unsigned int maxage) |  | ||||||
| { |  | ||||||
| 	int i = ARRAY_SIZE(utf8nfdicfdata) - 1; |  | ||||||
| 
 |  | ||||||
| 	while (maxage < utf8nfdicfdata[i].maxage) |  | ||||||
| 		i--; |  | ||||||
| 	if (maxage > utf8nfdicfdata[i].maxage) |  | ||||||
| 		return NULL; |  | ||||||
| 	return &utf8nfdicfdata[i]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  | @ -255,21 +255,21 @@ static void check_utf8_comparisons(struct unicode_map *table) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void check_supported_versions(void) | static void check_supported_versions(struct unicode_map *um) | ||||||
| { | { | ||||||
| 	/* Unicode 7.0.0 should be supported. */ | 	/* Unicode 7.0.0 should be supported. */ | ||||||
| 	test(utf8version_is_supported(UNICODE_AGE(7, 0, 0))); | 	test(utf8version_is_supported(um, UNICODE_AGE(7, 0, 0))); | ||||||
| 
 | 
 | ||||||
| 	/* Unicode 9.0.0 should be supported. */ | 	/* Unicode 9.0.0 should be supported. */ | ||||||
| 	test(utf8version_is_supported(UNICODE_AGE(9, 0, 0))); | 	test(utf8version_is_supported(um, UNICODE_AGE(9, 0, 0))); | ||||||
| 
 | 
 | ||||||
| 	/* Unicode 1x.0.0 (the latest version) should be supported. */ | 	/* Unicode 1x.0.0 (the latest version) should be supported. */ | ||||||
| 	test(utf8version_is_supported(UTF8_LATEST)); | 	test(utf8version_is_supported(um, UTF8_LATEST)); | ||||||
| 
 | 
 | ||||||
| 	/* Next versions don't exist. */ | 	/* Next versions don't exist. */ | ||||||
| 	test(!utf8version_is_supported(UNICODE_AGE(13, 0, 0))); | 	test(!utf8version_is_supported(um, UNICODE_AGE(13, 0, 0))); | ||||||
| 	test(!utf8version_is_supported(UNICODE_AGE(0, 0, 0))); | 	test(!utf8version_is_supported(um, UNICODE_AGE(0, 0, 0))); | ||||||
| 	test(!utf8version_is_supported(UNICODE_AGE(-1, -1, -1))); | 	test(!utf8version_is_supported(um, UNICODE_AGE(-1, -1, -1))); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int __init init_test_ucd(void) | static int __init init_test_ucd(void) | ||||||
|  | @ -285,7 +285,7 @@ static int __init init_test_ucd(void) | ||||||
| 		return PTR_ERR(um); | 		return PTR_ERR(um); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	check_supported_versions(); | 	check_supported_versions(um); | ||||||
| 	check_utf8_nfdi(um); | 	check_utf8_nfdi(um); | ||||||
| 	check_utf8_nfdicf(um); | 	check_utf8_nfdicf(um); | ||||||
| 	check_utf8_comparisons(um); | 	check_utf8_comparisons(um); | ||||||
|  |  | ||||||
|  | @ -1,9 +1,8 @@ | ||||||
| /* This file is generated code, do not edit. */ | /* This file is generated code, do not edit. */ | ||||||
| #ifndef __INCLUDED_FROM_UTF8NORM_C__ |  | ||||||
| #error Only nls_utf8-norm.c should include this file. |  | ||||||
| #endif |  | ||||||
| 
 | 
 | ||||||
| static const unsigned int utf8vers = 0xc0100; | #include <linux/module.h> | ||||||
|  | #include <linux/kernel.h> | ||||||
|  | #include "utf8n.h" | ||||||
| 
 | 
 | ||||||
| static const unsigned int utf8agetab[] = { | static const unsigned int utf8agetab[] = { | ||||||
| 	0, | 	0, | ||||||
|  | @ -4107,3 +4106,18 @@ static const unsigned char utf8data[64256] = { | ||||||
| 	0x52,0x04,0x00,0x00,0x11,0x04,0x00,0x00,0x02,0x00,0xcf,0x86,0xcf,0x06,0x02,0x00, | 	0x52,0x04,0x00,0x00,0x11,0x04,0x00,0x00,0x02,0x00,0xcf,0x86,0xcf,0x06,0x02,0x00, | ||||||
| 	0x81,0x80,0xcf,0x86,0x85,0x84,0xcf,0x86,0xcf,0x06,0x02,0x00,0x00,0x00,0x00,0x00 | 	0x81,0x80,0xcf,0x86,0x85,0x84,0xcf,0x86,0xcf,0x06,0x02,0x00,0x00,0x00,0x00,0x00 | ||||||
| }; | }; | ||||||
|  | 
 | ||||||
|  | struct utf8data_table utf8_data_table = { | ||||||
|  | 	.utf8agetab = utf8agetab, | ||||||
|  | 	.utf8agetab_size = ARRAY_SIZE(utf8agetab), | ||||||
|  | 
 | ||||||
|  | 	.utf8nfdicfdata = utf8nfdicfdata, | ||||||
|  | 	.utf8nfdicfdata_size = ARRAY_SIZE(utf8nfdicfdata), | ||||||
|  | 
 | ||||||
|  | 	.utf8nfdidata = utf8nfdidata, | ||||||
|  | 	.utf8nfdidata_size = ARRAY_SIZE(utf8nfdidata), | ||||||
|  | 
 | ||||||
|  | 	.utf8data = utf8data, | ||||||
|  | }; | ||||||
|  | EXPORT_SYMBOL_GPL(utf8_data_table); | ||||||
|  | MODULE_LICENSE("GPL v2"); | ||||||
|  | @ -13,25 +13,7 @@ | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/unicode.h> | #include <linux/unicode.h> | ||||||
| 
 | 
 | ||||||
| int utf8version_is_supported(unsigned int version); | int utf8version_is_supported(const struct unicode_map *um, unsigned int version); | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Look for the correct const struct utf8data for a unicode version. |  | ||||||
|  * Returns NULL if the version requested is too new. |  | ||||||
|  * |  | ||||||
|  * Two normalization forms are supported: nfdi and nfdicf. |  | ||||||
|  * |  | ||||||
|  * nfdi: |  | ||||||
|  *  - Apply unicode normalization form NFD. |  | ||||||
|  *  - Remove any Default_Ignorable_Code_Point. |  | ||||||
|  * |  | ||||||
|  * nfdicf: |  | ||||||
|  *  - Apply unicode normalization form NFD. |  | ||||||
|  *  - Remove any Default_Ignorable_Code_Point. |  | ||||||
|  *  - Apply a full casefold (C + F). |  | ||||||
|  */ |  | ||||||
| extern const struct utf8data *utf8nfdi(unsigned int maxage); |  | ||||||
| extern const struct utf8data *utf8nfdicf(unsigned int maxage); |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Determine the length of the normalized from of the string, |  * Determine the length of the normalized from of the string, | ||||||
|  | @ -78,4 +60,24 @@ int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um, | ||||||
|  */ |  */ | ||||||
| extern int utf8byte(struct utf8cursor *u8c); | extern int utf8byte(struct utf8cursor *u8c); | ||||||
| 
 | 
 | ||||||
|  | struct utf8data { | ||||||
|  | 	unsigned int maxage; | ||||||
|  | 	unsigned int offset; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct utf8data_table { | ||||||
|  | 	const unsigned int *utf8agetab; | ||||||
|  | 	int utf8agetab_size; | ||||||
|  | 
 | ||||||
|  | 	const struct utf8data *utf8nfdicfdata; | ||||||
|  | 	int utf8nfdicfdata_size; | ||||||
|  | 
 | ||||||
|  | 	const struct utf8data *utf8nfdidata; | ||||||
|  | 	int utf8nfdidata_size; | ||||||
|  | 
 | ||||||
|  | 	const unsigned char *utf8data; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | extern struct utf8data_table utf8_data_table; | ||||||
|  | 
 | ||||||
| #endif /* UTF8NORM_H */ | #endif /* UTF8NORM_H */ | ||||||
|  |  | ||||||
|  | @ -6,6 +6,7 @@ | ||||||
| #include <linux/dcache.h> | #include <linux/dcache.h> | ||||||
| 
 | 
 | ||||||
| struct utf8data; | struct utf8data; | ||||||
|  | struct utf8data_table; | ||||||
| 
 | 
 | ||||||
| #define UNICODE_MAJ_SHIFT		16 | #define UNICODE_MAJ_SHIFT		16 | ||||||
| #define UNICODE_MIN_SHIFT		8 | #define UNICODE_MIN_SHIFT		8 | ||||||
|  | @ -49,6 +50,7 @@ enum utf8_normalization { | ||||||
| struct unicode_map { | struct unicode_map { | ||||||
| 	unsigned int version; | 	unsigned int version; | ||||||
| 	const struct utf8data *ntab[UTF8_NMAX]; | 	const struct utf8data *ntab[UTF8_NMAX]; | ||||||
|  | 	const struct utf8data_table *tables; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| int utf8_validate(const struct unicode_map *um, const struct qstr *str); | int utf8_validate(const struct unicode_map *um, const struct qstr *str); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Christoph Hellwig
						Christoph Hellwig