summaryrefslogtreecommitdiffstats
path: root/system/noto-emoji/patches/1e8488167c26ac1e780e374961d08f1fb1d1e880.patch
blob: 35bdd3d6616cc8205395121cf62d2693f6d65bfc (plain)
From 1e8488167c26ac1e780e374961d08f1fb1d1e880 Mon Sep 17 00:00:00 2001
From: Andrew Clemons <andrew.clemons@gmail.com>
Date: Thu, 9 Mar 2017 21:48:18 +1300
Subject: [PATCH] Add support for "narrow" python to emoji_builder.py

The code currently only works with wide builds since it does not take
UTF-16 surrogate pairs into account.
---
 third_party/color_emoji/emoji_builder.py | 41 ++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/third_party/color_emoji/emoji_builder.py b/third_party/color_emoji/emoji_builder.py
index c118e83..3d3e0c8 100644
--- a/third_party/color_emoji/emoji_builder.py
+++ b/third_party/color_emoji/emoji_builder.py
@@ -25,10 +25,36 @@
 
 from nototools import font_data
 
+def myunichr(cp):
+	if sys.maxunicode < 0x10FFFF and cp > 0xFFFF:
+		return ("\\U" + hex(cp)[2:].zfill(8)).decode("unicode-escape")
+	return unichr(cp)
+
+def myord(high, low):
+	return (ord(high) - 0xD800) * 0x400 + (ord(low) - 0xDC00) + 0x10000
+
+def begins_with_surrogate(string):
+	return sys.maxunicode < 0x10FFFF and len(string) > 1 and (0xD800 <= ord(string[0]) <= 0xDBFF) and (0xDC00 <= ord(string[1]) <= 0xDFFF)
+
 def get_glyph_name_from_gsub (string, font, cmap_dict):
 	ligatures = font['GSUB'].table.LookupList.Lookup[0].SubTable[0].ligatures
-	first_glyph = cmap_dict[ord (string[0])]
-	rest_of_glyphs = [cmap_dict[ord (ch)] for ch in string[1:]]
+
+	if begins_with_surrogate(string):
+			first_glyph = cmap_dict[myord(string[0], string[1])]
+			string = string[2:]
+	else:
+			first_glyph = cmap_dict[ord (string[0])]
+			string = string[1:]
+
+	rest_of_glyphs = []
+	while (len(string) > 0):
+			if begins_with_surrogate(string):
+					rest_of_glyphs.append(cmap_dict[myord(string[0], string[1])])
+					string = string[2:]
+			else:
+					rest_of_glyphs.append(cmap_dict[ord (string[0])])
+					string = string[1:]
+
 	for ligature in ligatures[first_glyph]:
 		if ligature.Component == rest_of_glyphs:
 			return ligature.LigGlyph
@@ -462,13 +488,13 @@ def is_vs(cp):
 			if "_" in codes:
 				pieces = codes.split ("_")
 				cps = [int(code, 16) for code in pieces]
-				uchars = "".join ([unichr(cp) for cp in cps if not is_vs(cp)])
+				uchars = "".join ([myunichr(cp) for cp in cps if not is_vs(cp)])
 			else:
 				cp = int(codes, 16)
 				if is_vs(cp):
 					print "ignoring unexpected vs input %04x" % cp
 					continue
-				uchars = unichr(cp)
+				uchars = myunichr(cp)
 			img_files[uchars] = img_file
 		if not img_files:
 			raise Exception ("No image files found in '%s'." % glb)
@@ -483,6 +509,13 @@ def is_vs(cp):
 				except:
 					print "no cmap entry for %x" % ord(uchars)
 					raise ValueError("%x" % ord(uchars))
+			elif len (uchars) == 2 and begins_with_surrogate(uchars):
+				cp = myord(uchars[0], uchars[1])
+				try:
+					glyph_name = unicode_cmap.cmap[cp]
+				except:
+					print "no cmap entry for %x" % cp
+					raise ValueError("%x" % ord(uchars))
 			else:
 				glyph_name = get_glyph_name_from_gsub (uchars, font, unicode_cmap.cmap)
 			glyph_id = font.getGlyphID (glyph_name)