summaryrefslogtreecommitdiffstats
path: root/perl/perl-text-unaccent/ligatures.diff
blob: be5e834e2081a1e19d3c71d5985216e2a0128eb3 (plain)
diff -Naur Text-Unaccent-1.08/unac.c Text-Unaccent-1.08.patched/unac.c
--- Text-Unaccent-1.08/unac.c	2004-10-17 15:00:36.000000000 -0400
+++ Text-Unaccent-1.08.patched/unac.c	2021-08-22 15:27:29.207153493 -0400
@@ -21,14 +21,6 @@
 #endif /* HAVE_CONFIG_H */
 
 /*
- * Perl config.h defines HAS_VPRINTF if printf variants are 
- * available
- */
-#ifdef HAS_VPRINTF
-#define HAVE_VSNPRINTF
-#endif /* HAS_VPRINTF */
-
-/*
  * If configure.in has not defined this symbol, assume const. It
  * does not harm much: a warning will be issued during compilation.
  */
@@ -140,6 +132,9 @@
  * 	0055 LATIN CAPITAL LETTER U
  * 00DD LATIN CAPITAL LETTER Y WITH ACUTE
  * 	0059 LATIN CAPITAL LETTER Y
+ * 00DF LATIN SMALL LETTER SHARP S
+ * 	0073 LATIN SMALL LETTER S
+ * 	0073 LATIN SMALL LETTER S
  * 00E0 LATIN SMALL LETTER A WITH GRAVE
  * 	0061 LATIN SMALL LETTER A
  * 00E1 LATIN SMALL LETTER A WITH ACUTE
@@ -360,6 +355,12 @@
  * 	004F LATIN CAPITAL LETTER O
  * 0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE
  * 	006F LATIN SMALL LETTER O
+ * 0152 LATIN CAPITAL LIGATURE OE
+ * 	004F LATIN CAPITAL LETTER O
+ * 	0045 LATIN CAPITAL LETTER E
+ * 0153 LATIN SMALL LIGATURE OE
+ * 	006F LATIN SMALL LETTER O
+ * 	0065 LATIN SMALL LETTER E
  * 0154 LATIN CAPITAL LETTER R WITH ACUTE
  * 	0052 LATIN CAPITAL LETTER R
  * 0155 LATIN SMALL LETTER R WITH ACUTE
@@ -472,6 +473,9 @@
  * 	0066 LATIN SMALL LETTER F
  * 0193 LATIN CAPITAL LETTER G WITH HOOK
  * 	0047 LATIN CAPITAL LETTER G
+ * 0195 LATIN SMALL LETTER HV
+ * 	0068 LATIN SMALL LETTER H
+ * 	0076 LATIN SMALL LETTER V
  * 0197 LATIN CAPITAL LETTER I WITH STROKE
  * 	0049 LATIN CAPITAL LETTER I
  * 0198 LATIN CAPITAL LETTER K WITH HOOK
@@ -12929,13 +12933,13 @@
 unsigned char unac_positions[UNAC_BLOCK_COUNT][UNAC_BLOCK_SIZE + 1] = {
 /* 0 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 },
 /* 1 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 34, 37, 38 },
-/* 2 */ { 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 },
+/* 2 */ { 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34 },
 /* 3 */ { 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 },
 /* 4 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 },
 /* 5 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35 },
-/* 6 */ { 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34 },
+/* 6 */ { 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 },
 /* 7 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 },
-/* 8 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 },
+/* 8 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 },
 /* 9 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 },
 /* 10 */ { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41 },
 /* 11 */ { 0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 37, 38, 39 },
@@ -13145,13 +13149,13 @@
 
 unsigned short unac_data0[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF };
 unsigned short unac_data1[] = { 0x0020, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0020, 0xFFFF, 0x0061, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0020, 0xFFFF, 0xFFFF, 0x0032, 0x0033, 0x0020, 0x03BC, 0xFFFF, 0xFFFF, 0x0020, 0x0031, 0x006F, 0xFFFF, 0x0031, 0x2044, 0x0034, 0x0031, 0x2044, 0x0032, 0x0033, 0x2044, 0x0034, 0xFFFF };
-unsigned short unac_data2[] = { 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0xFFFF, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0xFFFF, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0xFFFF, 0xFFFF };
+unsigned short unac_data2[] = { 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0xFFFF, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0xFFFF, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0xFFFF, 0x0073, 0x0073 };
 unsigned short unac_data3[] = { 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0065, 0x0063, 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 0xFFFF, 0x006E, 0x006F, 0x006F, 0x006F, 0x006F, 0x006F, 0xFFFF, 0x006F, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0xFFFF, 0x0079 };
 unsigned short unac_data4[] = { 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, 0x0044, 0x0064, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067 };
 unsigned short unac_data5[] = { 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0048, 0x0068, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0xFFFF, 0x0049, 0x004A, 0x0069, 0x006A, 0x004A, 0x006A, 0x004B, 0x006B, 0xFFFF, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x00B7 };
-unsigned short unac_data6[] = { 0x006C, 0x00B7, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E, 0x006E, 0x02BC, 0x006E, 0xFFFF, 0xFFFF, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0xFFFF, 0xFFFF, 0x0052, 0x0072, 0x0052, 0x0072, 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073 };
+unsigned short unac_data6[] = { 0x006C, 0x00B7, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E, 0x006E, 0x02BC, 0x006E, 0xFFFF, 0xFFFF, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x0045, 0x006F, 0x0065, 0x0052, 0x0072, 0x0052, 0x0072, 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073 };
 unsigned short unac_data7[] = { 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0054, 0x0074, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, 0x0059, 0x005A, 0x007A, 0x005A, 0x007A, 0x005A, 0x007A, 0x0073 };
-unsigned short unac_data8[] = { 0x0062, 0x0042, 0x0042, 0x0062, 0xFFFF, 0xFFFF, 0xFFFF, 0x0043, 0x0063, 0xFFFF, 0x0044, 0x0044, 0x0064, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0046, 0x0066, 0x0047, 0xFFFF, 0xFFFF, 0xFFFF, 0x0049, 0x004B, 0x006B, 0x006C, 0xFFFF, 0xFFFF, 0x004E, 0x006E, 0x004F };
+unsigned short unac_data8[] = { 0x0062, 0x0042, 0x0042, 0x0062, 0xFFFF, 0xFFFF, 0xFFFF, 0x0043, 0x0063, 0xFFFF, 0x0044, 0x0044, 0x0064, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0046, 0x0066, 0x0047, 0xFFFF, 0x0068, 0x0076, 0xFFFF, 0x0049, 0x004B, 0x006B, 0x006C, 0xFFFF, 0xFFFF, 0x004E, 0x006E, 0x004F };
 unsigned short unac_data9[] = { 0x004F, 0x006F, 0xFFFF, 0xFFFF, 0x0050, 0x0070, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0074, 0x0054, 0x0074, 0x0054, 0x0055, 0x0075, 0xFFFF, 0x0056, 0x0059, 0x0079, 0x005A, 0x007A, 0xFFFF, 0xFFFF, 0xFFFF, 0x0292, 0xFFFF, 0xFFFF, 0xFFFF, 0x0296, 0xFFFF };
 unsigned short unac_data10[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0044, 0x005A, 0x0044, 0x007A, 0x0064, 0x007A, 0x004C, 0x004A, 0x004C, 0x006A, 0x006C, 0x006A, 0x004E, 0x004A, 0x004E, 0x006A, 0x006E, 0x006A, 0x0041, 0x0061, 0x0049, 0x0069, 0x004F, 0x006F, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0xFFFF, 0x0041, 0x0061 };
 unsigned short unac_data11[] = { 0x0041, 0x0061, 0x0041, 0x0045, 0x0061, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, 0x004B, 0x006B, 0x004F, 0x006F, 0x004F, 0x006F, 0x01B7, 0x0292, 0x006A, 0x0044, 0x005A, 0x0044, 0x007A, 0x0064, 0x007A, 0x0047, 0x0067, 0xFFFF, 0xFFFF, 0x004E, 0x006E, 0x0041, 0x0061, 0x0041, 0x0045, 0x0061, 0x0065, 0x004F, 0x006F };
@@ -13881,9 +13885,9 @@
     *out_lengthp = 0;
   } else {
     char* utf16 = 0;
-    int utf16_length = 0;
+    size_t utf16_length = 0;
     char* utf16_unaccented = 0;
-    int utf16_unaccented_length = 0;
+    size_t utf16_unaccented_length = 0;
   
     if(convert(charset, utf16be(), in, in_length, &utf16, &utf16_length) < 0) {
       return -1;
@@ -13901,6 +13905,7 @@
   return 0;
 }
 
+
 #define UNAC_VERSION "1.8.0"
 
 const char* unac_version(void)