#!/usr/bin/env python3 """ Convert the X11 locale.alias file into a mapping dictionary suitable for locale.py. Written by Marc-Andre Lemburg , 2004-12-10. """ import locale import sys _locale = locale # Location of the X11 alias file. LOCALE_ALIAS = '/usr/share/X11/locale/locale.alias' # Location of the glibc SUPPORTED locales file. SUPPORTED = '/usr/share/i18n/SUPPORTED' def parse(filename): with open(filename, encoding='latin1') as f: lines = list(f) # Remove mojibake in /usr/share/X11/locale/locale.alias. # b'\xef\xbf\xbd' == '\ufffd'.encode('utf-8') lines = [line for line in lines if '\xef\xbf\xbd' not in line] data = {} for line in lines: line = line.strip() if not line: continue if line[:1] == '#': continue locale, alias = line.split() # Fix non-standard locale names, e.g. ks_IN@devanagari.UTF-8 if '@' in alias: alias_lang, _, alias_mod = alias.partition('@') if '.' in alias_mod: alias_mod, _, alias_enc = alias_mod.partition('.') alias = alias_lang + '.' + alias_enc + '@' + alias_mod # Strip ':' if locale[-1] == ':': locale = locale[:-1] # Lower-case locale locale = locale.lower() # Ignore one letter locale mappings (except for 'c') if len(locale) == 1 and locale != 'c': continue if '@' in locale and '@' not in alias: # Do not simply remove the "@euro" modifier. # Glibc generates separate locales with the "@euro" modifier, and # not always generates a locale without it with the same encoding. # It can also affect collation. if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'): alias += '@euro' # Normalize encoding, if given if '.' in locale: lang, encoding = locale.split('.')[:2] encoding = encoding.replace('-', '') encoding = encoding.replace('_', '') locale = lang + '.' + encoding data[locale] = alias # Conflict with glibc. data.pop('el_gr@euro', None) data.pop('uz_uz@cyrillic', None) data.pop('uz_uz.utf8@cyrillic', None) return data def parse_glibc_supported(filename): with open(filename, encoding='latin1') as f: lines = list(f) data = {} for line in lines: line = line.strip() if not line: continue if line[:1] == '#': continue line = line.replace('/', ' ').strip() line = line.rstrip('\\').rstrip() words = line.split() if len(words) != 2: continue alias, alias_encoding = words # Lower-case locale locale = alias.lower() # Normalize encoding, if given if '.' in locale: lang, encoding = locale.split('.')[:2] encoding = encoding.replace('-', '') encoding = encoding.replace('_', '') locale = lang + '.' + encoding # Add an encoding to alias alias, _, modifier = alias.partition('@') alias = _locale._replace_encoding(alias, alias_encoding) if modifier: alias += '@' + modifier data[locale] = alias return data def pprint(data): items = sorted(data.items()) for k, v in items: print(f" {k!a:<40}{v!a},") def print_differences(data, olddata): items = sorted(olddata.items()) for k, v in items: if k not in data: print(f'# removed {k!a}') elif olddata[k] != data[k]: print(f'# updated {k!a} -> {olddata[k]!a} to {data[k]!a}') # Additions are not mentioned def optimize(data): locale_alias = locale.locale_alias locale.locale_alias = data.copy() for k, v in data.items(): del locale.locale_alias[k] if locale.normalize(k) != v: locale.locale_alias[k] = v newdata = locale.locale_alias errors = check(data) locale.locale_alias = locale_alias if errors: sys.exit(1) return newdata def check(data): # Check that all alias definitions from the X11 file # are actually mapped to the correct alias locales. errors = 0 for k, v in data.items(): if locale.normalize(k) != v: print(f'ERROR: {k!a} -> {locale.normalize(k)!a} != {v!a}', file=sys.stderr) errors += 1 return errors if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--locale-alias', default=LOCALE_ALIAS, help='location of the X11 alias file ' f'(default: {LOCALE_ALIAS})') parser.add_argument('--glibc-supported', default=SUPPORTED, help='location of the glibc SUPPORTED locales file ' f'(default: {SUPPORTED})') args = parser.parse_args() data = locale.locale_alias.copy() data.update(parse_glibc_supported(args.glibc_supported)) data.update(parse(args.locale_alias)) # Hardcode 'c.utf8' -> 'C.UTF-8' because 'en_US.UTF-8' does not exist # on all platforms. data['c.utf8'] = 'C.UTF-8' while True: # Repeat optimization while the size is decreased. n = len(data) data = optimize(data) if len(data) == n: break print_differences(data, locale.locale_alias) print() print('locale_alias = {') pprint(data) print('}')