From 2f530f89e0ee5ed5253125d0e9319b07103173f0 Mon Sep 17 00:00:00 2001 From: Justin Lu Date: Fri, 23 May 2025 17:44:39 +0000 Subject: [PATCH] 8357281: sun.util.Locale.LanguageTag should be immutable Reviewed-by: naoto, liach --- .../share/classes/java/util/Locale.java | 12 +- .../util/locale/InternalLocaleBuilder.java | 16 +- .../classes/sun/util/locale/LanguageTag.java | 252 +++++++----------- 3 files changed, 111 insertions(+), 169 deletions(-) diff --git a/src/java.base/share/classes/java/util/Locale.java b/src/java.base/share/classes/java/util/Locale.java index 14f3fe13918..fa13afcc310 100644 --- a/src/java.base/share/classes/java/util/Locale.java +++ b/src/java.base/share/classes/java/util/Locale.java @@ -1691,37 +1691,37 @@ public final class Locale implements Cloneable, Serializable { LanguageTag tag = LanguageTag.parseLocale(baseLocale, localeExtensions); StringBuilder buf = new StringBuilder(); - String subtag = tag.getLanguage(); + String subtag = tag.language(); if (!subtag.isEmpty()) { buf.append(LanguageTag.canonicalizeLanguage(subtag)); } - subtag = tag.getScript(); + subtag = tag.script(); if (!subtag.isEmpty()) { buf.append(LanguageTag.SEP); buf.append(LanguageTag.canonicalizeScript(subtag)); } - subtag = tag.getRegion(); + subtag = tag.region(); if (!subtag.isEmpty()) { buf.append(LanguageTag.SEP); buf.append(LanguageTag.canonicalizeRegion(subtag)); } - Listsubtags = tag.getVariants(); + Listsubtags = tag.variants(); for (String s : subtags) { buf.append(LanguageTag.SEP); // preserve casing buf.append(s); } - subtags = tag.getExtensions(); + subtags = tag.extensions(); for (String s : subtags) { buf.append(LanguageTag.SEP); buf.append(LanguageTag.canonicalizeExtension(s)); } - subtag = tag.getPrivateuse(); + subtag = tag.privateuse(); if (!subtag.isEmpty()) { if (buf.length() > 0) { buf.append(LanguageTag.SEP); diff --git a/src/java.base/share/classes/sun/util/locale/InternalLocaleBuilder.java b/src/java.base/share/classes/sun/util/locale/InternalLocaleBuilder.java index 894a49e8efe..5da725d59c8 100644 --- a/src/java.base/share/classes/sun/util/locale/InternalLocaleBuilder.java +++ b/src/java.base/share/classes/sun/util/locale/InternalLocaleBuilder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,18 +347,18 @@ public final class InternalLocaleBuilder { */ public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) { clear(); - if (!langtag.getExtlangs().isEmpty()) { - language = langtag.getExtlangs().get(0); + if (!langtag.extlangs().isEmpty()) { + language = langtag.extlangs().get(0); } else { - String lang = langtag.getLanguage(); + String lang = langtag.language(); if (!lang.equals(LanguageTag.UNDETERMINED)) { language = lang; } } - script = langtag.getScript(); - region = langtag.getRegion(); + script = langtag.script(); + region = langtag.region(); - List bcpVariants = langtag.getVariants(); + List bcpVariants = langtag.variants(); if (!bcpVariants.isEmpty()) { StringBuilder var = new StringBuilder(bcpVariants.get(0)); int size = bcpVariants.size(); @@ -368,7 +368,7 @@ public final class InternalLocaleBuilder { variant = var.toString(); } - setExtensions(langtag.getExtensions(), langtag.getPrivateuse()); + setExtensions(langtag.extensions(), langtag.privateuse()); return this; } diff --git a/src/java.base/share/classes/sun/util/locale/LanguageTag.java b/src/java.base/share/classes/sun/util/locale/LanguageTag.java index 5c905372c5a..a88937b7538 100644 --- a/src/java.base/share/classes/sun/util/locale/LanguageTag.java +++ b/src/java.base/share/classes/sun/util/locale/LanguageTag.java @@ -42,30 +42,21 @@ import java.util.Map; import java.util.Set; import java.util.StringJoiner; -public class LanguageTag { - // - // static fields - // +// List fields are unmodifiable +public record LanguageTag(String language, String script, String region, String privateuse, + List extlangs, List variants, List extensions) { + public static final String SEP = "-"; public static final String PRIVATEUSE = "x"; public static final String UNDETERMINED = "und"; public static final String PRIVUSE_VARIANT_PREFIX = "lvariant"; + private static final String EMPTY_SUBTAG = ""; + private static final List EMPTY_SUBTAGS = Collections.emptyList(); - // - // Language subtag fields - // - private String language = ""; // language subtag - private String script = ""; // script subtag - private String region = ""; // region subtag - private String privateuse = ""; // privateuse - - private List extlangs = Collections.emptyList(); // extlang subtags - private List variants = Collections.emptyList(); // variant subtags - private List extensions = Collections.emptyList(); // extensions // Map contains legacy language tags and its preferred mappings from // http://www.ietf.org/rfc/rfc5646.txt // Keys are lower-case strings. - private static final Map LEGACY = new HashMap<>(); + private static final Map LEGACY; static { // grandfathered = irregular ; non-redundant tags registered @@ -128,14 +119,12 @@ public class LanguageTag { {"zh-min-nan", "nan"}, {"zh-xiang", "hsn"}, }; + LEGACY = HashMap.newHashMap(entries.length); for (String[] e : entries) { LEGACY.put(LocaleUtils.toLowerString(e[0]), e); } } - private LanguageTag() { - } - /* * BNF in RFC5646 * @@ -195,17 +184,27 @@ public class LanguageTag { itr = new StringTokenIterator(languageTag, SEP); } - LanguageTag tag = new LanguageTag(); - + String language = parseLanguage(itr, pp); + List extlangs; + String script; + String region; + List variants; + List extensions; // langtag must start with either language or privateuse - if (tag.parseLanguage(itr, pp)) { - tag.parseExtlangs(itr, pp); - tag.parseScript(itr, pp); - tag.parseRegion(itr, pp); - tag.parseVariants(itr, pp); - tag.parseExtensions(itr, pp, errorMsg); + if (!language.isEmpty()) { + extlangs = parseExtlangs(itr, pp); + script = parseScript(itr, pp); + region = parseRegion(itr, pp); + variants = parseVariants(itr, pp); + extensions = parseExtensions(itr, pp, errorMsg); + } else { + extlangs = EMPTY_SUBTAGS; + script = EMPTY_SUBTAG; + region = EMPTY_SUBTAG; + variants = EMPTY_SUBTAGS; + extensions = EMPTY_SUBTAGS; } - tag.parsePrivateuse(itr, pp, errorMsg); + String privateuse = parsePrivateuse(itr, pp, errorMsg); if (!itr.isDone() && pp.getErrorIndex() == -1) { String s = itr.current(); @@ -221,110 +220,94 @@ public class LanguageTag { throw new IllformedLocaleException(errorMsg.toString(), pp.getErrorIndex()); } - return tag; + return new LanguageTag(language, script, region, privateuse, extlangs, variants, extensions); } // // Language subtag parsers // - private boolean parseLanguage(StringTokenIterator itr, ParsePosition pp) { + private static String parseLanguage(StringTokenIterator itr, ParsePosition pp) { if (itr.isDone() || pp.getErrorIndex() != -1) { - return false; + return EMPTY_SUBTAG; } - boolean found = false; - String s = itr.current(); if (isLanguage(s)) { - found = true; - language = s; pp.setIndex(itr.currentEnd()); itr.next(); + return s; } - return found; + return EMPTY_SUBTAG; } - private boolean parseExtlangs(StringTokenIterator itr, ParsePosition pp) { + private static List parseExtlangs(StringTokenIterator itr, ParsePosition pp) { if (itr.isDone() || pp.getErrorIndex() != -1) { - return false; + return EMPTY_SUBTAGS; } - - boolean found = false; - + List extlangs = null; while (!itr.isDone()) { String s = itr.current(); if (!isExtlang(s)) { break; } - found = true; - if (extlangs.isEmpty()) { + if (extlangs == null) { extlangs = new ArrayList<>(3); } extlangs.add(s); pp.setIndex(itr.currentEnd()); itr.next(); - if (extlangs.size() == 3) { // Maximum 3 extlangs break; } } - - return found; + return extlangs == null ? EMPTY_SUBTAGS : + Collections.unmodifiableList(extlangs); } - private boolean parseScript(StringTokenIterator itr, ParsePosition pp) { + private static String parseScript(StringTokenIterator itr, ParsePosition pp) { if (itr.isDone() || pp.getErrorIndex() != -1) { - return false; + return EMPTY_SUBTAG; } - boolean found = false; - String s = itr.current(); if (isScript(s)) { - found = true; - script = s; pp.setIndex(itr.currentEnd()); itr.next(); + return s; } - return found; + return EMPTY_SUBTAG; } - private boolean parseRegion(StringTokenIterator itr, ParsePosition pp) { + private static String parseRegion(StringTokenIterator itr, ParsePosition pp) { if (itr.isDone() || pp.getErrorIndex() != -1) { - return false; + return EMPTY_SUBTAG; } - - boolean found = false; - String s = itr.current(); if (isRegion(s)) { - found = true; - region = s; pp.setIndex(itr.currentEnd()); itr.next(); + return s; } - return found; + return EMPTY_SUBTAG; } - private boolean parseVariants(StringTokenIterator itr, ParsePosition pp) { + private static List parseVariants(StringTokenIterator itr, ParsePosition pp) { if (itr.isDone() || pp.getErrorIndex() != -1) { - return false; + return EMPTY_SUBTAGS; } - - boolean found = false; + List variants = null; while (!itr.isDone()) { String s = itr.current(); if (!isVariant(s)) { break; } - found = true; - if (variants.isEmpty()) { + if (variants == null) { variants = new ArrayList<>(3); } variants.add(s); @@ -332,16 +315,16 @@ public class LanguageTag { itr.next(); } - return found; + return variants == null ? EMPTY_SUBTAGS : + Collections.unmodifiableList(variants); } - private boolean parseExtensions(StringTokenIterator itr, ParsePosition pp, + private static List parseExtensions(StringTokenIterator itr, ParsePosition pp, StringBuilder err) { if (itr.isDone() || pp.getErrorIndex() != -1) { - return false; + return EMPTY_SUBTAGS; } - - boolean found = false; + List extensions = null; while (!itr.isDone()) { String s = itr.current(); @@ -368,26 +351,24 @@ public class LanguageTag { break; } - if (extensions.isEmpty()) { + if (extensions == null) { extensions = new ArrayList<>(4); } extensions.add(sb.toString()); - found = true; } else { break; } } - return found; + return extensions == null ? EMPTY_SUBTAGS : + Collections.unmodifiableList(extensions); } - private boolean parsePrivateuse(StringTokenIterator itr, ParsePosition pp, + private static String parsePrivateuse(StringTokenIterator itr, ParsePosition pp, StringBuilder err) { if (itr.isDone() || pp.getErrorIndex() != -1) { - return false; + return EMPTY_SUBTAG; } - boolean found = false; - String s = itr.current(); if (isPrivateusePrefix(s)) { int start = itr.currentStart(); @@ -410,12 +391,11 @@ public class LanguageTag { pp.setErrorIndex(start); err.append("Incomplete privateuse"); } else { - privateuse = sb.toString(); - found = true; + return sb.toString(); } } - return found; + return EMPTY_SUBTAG; } public static String caseFoldTag(String tag) { @@ -462,48 +442,50 @@ public class LanguageTag { } public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) { - LanguageTag tag = new LanguageTag(); - String language = baseLocale.getLanguage(); - String script = baseLocale.getScript(); - String region = baseLocale.getRegion(); - String variant = baseLocale.getVariant(); + String language = EMPTY_SUBTAG; + String script = EMPTY_SUBTAG; + String region = EMPTY_SUBTAG; + + String baseLanguage = baseLocale.getLanguage(); + String baseScript = baseLocale.getScript(); + String baseRegion = baseLocale.getRegion(); + String baseVariant = baseLocale.getVariant(); boolean hasSubtag = false; String privuseVar = null; // store ill-formed variant subtags - if (isLanguage(language)) { + if (isLanguage(baseLanguage)) { // Convert a deprecated language code to its new code - if (language.equals("iw")) { - language = "he"; - } else if (language.equals("ji")) { - language = "yi"; - } else if (language.equals("in")) { - language = "id"; - } - tag.language = language; + baseLanguage = switch (baseLanguage) { + case "iw" -> "he"; + case "ji" -> "yi"; + case "in" -> "id"; + default -> baseLanguage; + }; + language = baseLanguage; } - if (isScript(script)) { - tag.script = canonicalizeScript(script); + if (isScript(baseScript)) { + script = canonicalizeScript(baseScript); hasSubtag = true; } - if (isRegion(region)) { - tag.region = canonicalizeRegion(region); + if (isRegion(baseRegion)) { + region = canonicalizeRegion(baseRegion); hasSubtag = true; } // Special handling for no_NO_NY - use nn_NO for language tag - if (tag.language.equals("no") && tag.region.equals("NO") && variant.equals("NY")) { - tag.language = "nn"; - variant = ""; + if (language.equals("no") && region.equals("NO") && baseVariant.equals("NY")) { + language = "nn"; + baseVariant = EMPTY_SUBTAG; } - if (!variant.isEmpty()) { - List variants = null; - StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP); + List variants = null; + if (!baseVariant.isEmpty()) { + StringTokenIterator varitr = new StringTokenIterator(baseVariant, BaseLocale.SEP); while (!varitr.isDone()) { String var = varitr.current(); if (!isVariant(var)) { @@ -516,7 +498,6 @@ public class LanguageTag { varitr.next(); } if (variants != null) { - tag.variants = variants; hasSubtag = true; } if (!varitr.isDone()) { @@ -556,7 +537,6 @@ public class LanguageTag { } if (extensions != null) { - tag.extensions = extensions; hasSubtag = true; } @@ -570,59 +550,21 @@ public class LanguageTag { } } - if (privateuse != null) { - tag.privateuse = privateuse; - } - - if (tag.language.isEmpty() && (hasSubtag || privateuse == null)) { + if (language.isEmpty() && (hasSubtag || privateuse == null)) { // use lang "und" when 1) no language is available AND // 2) any of other subtags other than private use are available or // no private use tag is available - tag.language = UNDETERMINED; + language = UNDETERMINED; } - return tag; - } + privateuse = privateuse == null ? EMPTY_SUBTAG : privateuse; + extensions = extensions == null ? EMPTY_SUBTAGS : + Collections.unmodifiableList(extensions); + variants = variants == null ? EMPTY_SUBTAGS : + Collections.unmodifiableList(variants); - // - // Getter methods for language subtag fields - // - - public String getLanguage() { - return language; - } - - public List getExtlangs() { - if (extlangs.isEmpty()) { - return Collections.emptyList(); - } - return Collections.unmodifiableList(extlangs); - } - - public String getScript() { - return script; - } - - public String getRegion() { - return region; - } - - public List getVariants() { - if (variants.isEmpty()) { - return Collections.emptyList(); - } - return Collections.unmodifiableList(variants); - } - - public List getExtensions() { - if (extensions.isEmpty()) { - return Collections.emptyList(); - } - return Collections.unmodifiableList(extensions); - } - - public String getPrivateuse() { - return privateuse; + // extlangs always empty for locale parse + return new LanguageTag(language, script, region, privateuse, EMPTY_SUBTAGS, variants, extensions); } //