From 097cf525738e1db8c8c9f46c7192907d2533f903 Mon Sep 17 00:00:00 2001 From: Claudine Peyrat <88194877+claudinepeyrat06@users.noreply.github.com> Date: Wed, 12 Feb 2025 19:16:57 +0100 Subject: allow some arabic voyels (#3501) * Update font_variant.py start allowing voyel use category to recognise punctuation * Update font_variant.py * Update font_variant.py make it cleaner --- lib/lettering/font_variant.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/lib/lettering/font_variant.py b/lib/lettering/font_variant.py index 50c4543f..d7dc0f91 100644 --- a/lib/lettering/font_variant.py +++ b/lib/lettering/font_variant.py @@ -5,7 +5,7 @@ import os from collections import defaultdict -from unicodedata import normalize +from unicodedata import normalize, category import inkex @@ -139,18 +139,22 @@ class FontVariant(object): glyph_selection = [glyph_name for glyph_name, glyph_layer in self.glyphs.items() if glyph_name.startswith(character)] return sorted(glyph_selection, key=lambda glyph: (len(glyph.split('.')[0]), len(glyph)), reverse=True) - def isbinding(self, character): + def is_binding(self, character): # after a non binding letter a letter can only be in isol or fina shape. # binding glyph only have two shapes, isol and fina + non_binding_char = ['ا', 'أ', 'ﺇ', 'آ', 'ٱ', 'د', 'ذ', 'ر', 'ز', 'و'] normalized_non_binding_char = [normalize('NFKC', letter) for letter in non_binding_char] return not (character in normalized_non_binding_char) - def ispunctuation(self, character): - # punctuation sign are not considered as part of the word. They onnly have one shape - punctuation_signs = ['؟', '،', '.', ',', ';', '.', '!', ':', '؛'] - normalized_punctuation_signs = [normalize('NFKC', letter) for letter in punctuation_signs] - return (character in normalized_punctuation_signs) + def is_mark(self, character): + # this category includes all the combining diacritics. + + return (category(character)[0] == 'M') + + def is_letter(self, character): + + return (category(character)[0] == 'L') def get_glyph(self, character, word): """ @@ -169,26 +173,27 @@ class FontVariant(object): # in arabic each letter (or ligature) may have up to 4 different shapes, hence 4 glyphs # this computes the shape of the glyph that represents word[starting:ending+1] - # punctuation is not really part of the word + # punctuation or a combining accent is not really part of the word # they may appear at begining or end of words # computes where the actual word begins and ends up last_char_index = len(word)-1 first_char_index = 0 - while self.ispunctuation(word[last_char_index]): + while not self.is_letter(word[last_char_index]): last_char_index = last_char_index - 1 - while self.ispunctuation(word[first_char_index]): + + while not self.is_letter(word[first_char_index]): first_char_index = first_char_index + 1 - # first glyph is eithher isol or init depending wether it is also the last glyph of the actual word + # first glyph is either isol or init depending if it is also the last glyph of the actual word if starting == first_char_index: - if not self.isbinding(word[ending]) or len(word) == 1: + if not self.is_binding(word[ending]) or len(word) == 1: shape = 'isol' else: shape = 'init' # last glyph is final if previous is binding, isol otherwise # a non binding glyph behaves like the last glyph - elif ending == last_char_index or not self.isbinding(word[ending]): + elif ending == last_char_index or not self.is_binding(word[ending]): if previous_is_binding: shape = 'fina' else: @@ -211,14 +216,17 @@ class FontVariant(object): for glyph in glyph_selection: glyph_name = glyph.split('.') if len(glyph_name) == 2 and glyph_name[1] in ['isol', 'init', 'medi', 'fina']: - is_binding = self.isbinding(glyph_name[0][-1]) + is_binding = self.is_binding(glyph_name[0][-1]) if len(word) < i + len(glyph_name[0]): continue shape = self.get_next_glyph_shape(word, i, i + len(glyph_name[0]) - 1, previous_is_binding) if glyph_name[1] == shape and word[i:].startswith(glyph_name[0]): return self.glyphs[glyph], len(glyph_name[0]), is_binding elif word[i:].startswith(glyph): - return self.glyphs[glyph], len(glyph), True + if self.is_mark(word[i]): + return self.glyphs[glyph], len(glyph), previous_is_binding + else: + return self.glyphs[glyph], len(glyph), True # nothing was found return self.glyphs.get(self.default_glyph, None), 1, True -- cgit v1.2.3