Files
cpython/Tools/i18n/makelocalealias.py
Stan Ulbrych 78acd8e95e gh-138286: Run `ruff on Tools/i18n` (#138287)
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
2025-08-31 20:29:02 +00:00

168 lines
5.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Convert the X11 locale.alias file into a mapping dictionary suitable
for locale.py.
Written by Marc-Andre Lemburg <mal@genix.com>, 2004-12-10.
"""
import locale
import sys
_locale = locale
# Location of the X11 alias file.
LOCALE_ALIAS = '/usr/share/X11/locale/locale.alias'
# Location of the glibc SUPPORTED locales file.
SUPPORTED = '/usr/share/i18n/SUPPORTED'
def parse(filename):
with open(filename, encoding='latin1') as f:
lines = list(f)
# Remove mojibake in /usr/share/X11/locale/locale.alias.
# b'\xef\xbf\xbd' == '\ufffd'.encode('utf-8')
lines = [line for line in lines if '\xef\xbf\xbd' not in line]
data = {}
for line in lines:
line = line.strip()
if not line:
continue
if line[:1] == '#':
continue
locale, alias = line.split()
# Fix non-standard locale names, e.g. ks_IN@devanagari.UTF-8
if '@' in alias:
alias_lang, _, alias_mod = alias.partition('@')
if '.' in alias_mod:
alias_mod, _, alias_enc = alias_mod.partition('.')
alias = alias_lang + '.' + alias_enc + '@' + alias_mod
# Strip ':'
if locale[-1] == ':':
locale = locale[:-1]
# Lower-case locale
locale = locale.lower()
# Ignore one letter locale mappings (except for 'c')
if len(locale) == 1 and locale != 'c':
continue
if '@' in locale and '@' not in alias:
# Do not simply remove the "@euro" modifier.
# Glibc generates separate locales with the "@euro" modifier, and
# not always generates a locale without it with the same encoding.
# It can also affect collation.
if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'):
alias += '@euro'
# Normalize encoding, if given
if '.' in locale:
lang, encoding = locale.split('.')[:2]
encoding = encoding.replace('-', '')
encoding = encoding.replace('_', '')
locale = lang + '.' + encoding
data[locale] = alias
# Conflict with glibc.
data.pop('el_gr@euro', None)
data.pop('uz_uz@cyrillic', None)
data.pop('uz_uz.utf8@cyrillic', None)
return data
def parse_glibc_supported(filename):
with open(filename, encoding='latin1') as f:
lines = list(f)
data = {}
for line in lines:
line = line.strip()
if not line:
continue
if line[:1] == '#':
continue
line = line.replace('/', ' ').strip()
line = line.rstrip('\\').rstrip()
words = line.split()
if len(words) != 2:
continue
alias, alias_encoding = words
# Lower-case locale
locale = alias.lower()
# Normalize encoding, if given
if '.' in locale:
lang, encoding = locale.split('.')[:2]
encoding = encoding.replace('-', '')
encoding = encoding.replace('_', '')
locale = lang + '.' + encoding
# Add an encoding to alias
alias, _, modifier = alias.partition('@')
alias = _locale._replace_encoding(alias, alias_encoding)
if modifier:
alias += '@' + modifier
data[locale] = alias
return data
def pprint(data):
items = sorted(data.items())
for k, v in items:
print(f" {k!a:<40}{v!a},")
def print_differences(data, olddata):
items = sorted(olddata.items())
for k, v in items:
if k not in data:
print(f'# removed {k!a}')
elif olddata[k] != data[k]:
print(f'# updated {k!a} -> {olddata[k]!a} to {data[k]!a}')
# Additions are not mentioned
def optimize(data):
locale_alias = locale.locale_alias
locale.locale_alias = data.copy()
for k, v in data.items():
del locale.locale_alias[k]
if locale.normalize(k) != v:
locale.locale_alias[k] = v
newdata = locale.locale_alias
errors = check(data)
locale.locale_alias = locale_alias
if errors:
sys.exit(1)
return newdata
def check(data):
# Check that all alias definitions from the X11 file
# are actually mapped to the correct alias locales.
errors = 0
for k, v in data.items():
if locale.normalize(k) != v:
print(f'ERROR: {k!a} -> {locale.normalize(k)!a} != {v!a}',
file=sys.stderr)
errors += 1
return errors
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--locale-alias', default=LOCALE_ALIAS,
help='location of the X11 alias file '
f'(default: {LOCALE_ALIAS})')
parser.add_argument('--glibc-supported', default=SUPPORTED,
help='location of the glibc SUPPORTED locales file '
f'(default: {SUPPORTED})')
args = parser.parse_args()
data = locale.locale_alias.copy()
data.update(parse_glibc_supported(args.glibc_supported))
data.update(parse(args.locale_alias))
# Hardcode 'c.utf8' -> 'C.UTF-8' because 'en_US.UTF-8' does not exist
# on all platforms.
data['c.utf8'] = 'C.UTF-8'
while True:
# Repeat optimization while the size is decreased.
n = len(data)
data = optimize(data)
if len(data) == n:
break
print_differences(data, locale.locale_alias)
print()
print('locale_alias = {')
pprint(data)
print('}')