Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17459 Add units for grammar #3682

Merged
merged 2 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/maven.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: run CLDR console check
run: java -DCLDR_GITHUB_ANNOTATIONS=true -DCLDR_DIR=$(pwd) -Xmx6g -jar tools/cldr-code/target/cldr-code.jar check -S common,seed -e -z FINAL_TESTING
run: java -DCLDR_GITHUB_ANNOTATIONS=true -DCLDR_DIR=$(pwd) -Xmx6g -jar tools/cldr-code/target/cldr-code.jar check -S common,seed -e -z BUILD
deploy:
# don't run deploy on manual builds!
if: github.repository == 'unicode-org/cldr' && github.event_name == 'push' && github.ref == 'refs/heads/main' && github.event.inputs.git-ref == ''
Expand Down
2 changes: 1 addition & 1 deletion common/supplemental/units.xml
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ For terms of use, see http://www.unicode.org/copyright.html
<convertUnit source='pascal' baseUnit='kilogram-per-meter-square-second' systems="si metric prefixable"/>
<convertUnit source='bar' baseUnit='kilogram-per-meter-square-second' factor='100000' systems="si_acceptable metric prefixable"/>
<convertUnit source='atmosphere' baseUnit='kilogram-per-meter-square-second' factor='101325' systems="ussystem uksystem "/>
<convertUnit source='gasoline-energy-density' baseUnit='kilogram-per-meter-square-second' factor='33.705 * 3600 * 1000/gal_to_m3' systems="metric_adjacent ussystem uksystem" description="Constructed so that 1 gallon-gasoline-energy-density = 33.705 kWh as per https://www3.epa.gov/otaq/gvg/learn-more-technology.htm"/>
<convertUnit source='gasoline-energy-density' baseUnit='kilogram-per-meter-square-second' factor='33.705 * 3600 * 1000/gal_to_m3' systems="ussystem uksystem" description="Constructed so that 1 gallon-gasoline-energy-density = 33.705 kWh as per https://www3.epa.gov/otaq/gvg/learn-more-technology.htm"/>

<!-- pressure-per-length -->
<convertUnit source='ofhg' baseUnit='kilogram-per-square-meter-square-second' factor='13595.1*gravity' systems="metric_adjacent uksystem ussystem"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -745,21 +745,19 @@ public static Set<String> getGrammarLocales() {
"month",
"year");

// To see a list of the results for double-checking, run TestUnits with TestUnitsToTranslate -v
static final Set<String> EXCLUDE_GRAMMAR =
Set.of(
"point",
"dunam",
"dot",
"astronomical-unit",
"nautical-mile",
"knot",
"dalton",
"dot", // fallback is pixel
"dot-per-centimeter", // fallback is pixel
"dunam", // language-specific
"astronomical-unit", // specialized
"nautical-mile", // US/UK specific
"knot", // US/UK specific
"dalton", // specialized
"electronvolt", // specialized
"kilocalorie",
"electronvolt",
// The following may be reinstated after 45.
"dot-per-centimeter",
"millimeter-ofhg",
"milligram-ofglucose-per-deciliter");
"point");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if these were alphabetized this might be less confusing


public static Set<String> getSpecialsToTranslate() {
return INCLUDE_OTHER;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R2;
import com.ibm.icu.impl.Row.R4;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.number.UnlocalizedNumberFormatter;
import com.ibm.icu.text.PluralRules;
Expand All @@ -29,6 +31,7 @@
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
Expand All @@ -48,7 +51,9 @@
import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
import org.unicode.cldr.util.Rational.FormatStyle;
import org.unicode.cldr.util.Rational.RationalParser;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
import org.unicode.cldr.util.Validity.Status;

public class UnitConverter implements Freezable<UnitConverter> {
public static boolean DEBUG = false;
Expand Down Expand Up @@ -80,7 +85,7 @@ public class UnitConverter implements Freezable<UnitConverter> {
private Multimap<String, UnitSystem> sourceToSystems = TreeMultimap.create();
private Set<String> baseUnits;
private Multimap<String, Continuation> continuations = TreeMultimap.create();
private Comparator<String> quantityComparator;
private MapComparator<String> quantityComparator;

private Map<String, String> fixDenormalized;
private ImmutableMap<String, UnitId> idToUnitId;
Expand All @@ -92,6 +97,17 @@ public class UnitConverter implements Freezable<UnitConverter> {

public TargetInfoComparator targetInfoComparator;

private final MapComparator<String> LongUnitIdOrder = new MapComparator<>();
private final MapComparator<String> ShortUnitIdOrder = new MapComparator<>();

public Comparator<String> getLongUnitIdComparator() {
return LongUnitIdOrder;
}

public Comparator<String> getShortUnitIdComparator() {
return ShortUnitIdOrder;
}

/** Warning: ordering is important; determines the normalized output */
public static final Set<String> BASE_UNITS =
ImmutableSet.of(
Expand Down Expand Up @@ -198,6 +214,74 @@ public UnitConverter freeze() {
}
}
idToUnitId = ImmutableMap.copyOf(_idToUnitId);

// build the map comparators

Set<R4<Integer, UnitSystem, Rational, String>> all = new TreeSet<>();
Set<String> baseSeen = new HashSet<>();
for (String longUnit :
Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) {
Output<String> base = new Output<>();
String shortUnit = getShortId(longUnit);
ConversionInfo conversionInfo = parseUnitId(shortUnit, base, false);
if (conversionInfo == null) {
if (longUnit.equals("temperature-generic")) {
conversionInfo = parseUnitId("kelvin", base, false);
}
}
String quantity = getQuantityFromUnit(base.value, false);
Integer quantityNumericOrder = quantityComparator.getNumericOrder(quantity);
if (quantityNumericOrder == null) { // try the inverse
if (base.value.equals("meter-per-cubic-meter")) { // HACK
quantityNumericOrder = quantityComparator.getNumericOrder("consumption");
}
if (quantityNumericOrder == null) {
throw new IllegalArgumentException(
"Missing quantity for: " + base.value + ", " + shortUnit);
}
}

final EnumSet<UnitSystem> systems = EnumSet.copyOf(getSystemsEnum(shortUnit));

// to sort the right items together items together, put together a sort key
UnitSystem sortingSystem = systems.iterator().next();
switch (sortingSystem) {
case metric:
case si:
case si_acceptable:
case astronomical:
case metric_adjacent:
case person_age:
sortingSystem = UnitSystem.metric;
break;
// country specific
case other:
case ussystem:
case uksystem:
case jpsystem:
sortingSystem = UnitSystem.other;
break;
default:
throw new IllegalArgumentException(
"Add new unitSystem to a grouping: " + sortingSystem);
}
R4<Integer, UnitSystem, Rational, String> sortKey =
Row.of(
quantityNumericOrder,
sortingSystem,
conversionInfo.factor,
shortUnit);
all.add(sortKey);
}
LongUnitIdOrder.setErrorOnMissing(true);
ShortUnitIdOrder.setErrorOnMissing(true);
for (R4<Integer, UnitSystem, Rational, String> item : all) {
String shortId = item.get3();
ShortUnitIdOrder.add(shortId);
LongUnitIdOrder.add(getLongId(shortId));
}
LongUnitIdOrder.freeze();
ShortUnitIdOrder.freeze();
}
return this;
}
Expand Down Expand Up @@ -649,7 +733,7 @@ private void addToSourceToTarget(
}
}

private Comparator<String> getQuantityComparator(
private MapComparator<String> getQuantityComparator(
Map<String, String> baseUnitToQuantity2, Map<String, String> baseUnitToStatus2) {
// We want to sort all the quantities so that we have a natural ordering within compound
// units. So kilowatt-hour, not hour-kilowatt.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3544,7 +3544,22 @@ enum TranslationStatus {
has_grammar_X,
add_grammar,
skip_grammar,
skip_trans
skip_trans("\t— specific langs poss.)");

private TranslationStatus() {
outName = name();
}

private final String outName;

private TranslationStatus(String extra) {
outName = name() + extra;
}

@Override
public String toString() {
return outName;
}
}

/**
Expand All @@ -3555,7 +3570,8 @@ public void TestUnitsToTranslate() {
Set<String> toTranslate = GrammarInfo.getUnitsToAddGrammar();
final CLDRConfig config = CLDRConfig.getInstance();
final UnitConverter converter = config.getSupplementalDataInfo().getUnitConverter();
Map<String, TranslationStatus> shortUnitToTranslationStatus40 = new TreeMap<>();
Map<String, TranslationStatus> shortUnitToTranslationStatus40 =
new TreeMap<>(converter.getShortUnitIdComparator());
for (String longUnit :
Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) {
String shortUnit = converter.getShortId(longUnit);
Expand Down Expand Up @@ -3588,9 +3604,9 @@ public void TestUnitsToTranslate() {
TranslationStatus status40 = entry.getValue();
if (isVerbose())
System.out.println(
shortUnit
converter.getQuantityFromUnit(shortUnit, false)
+ "\t"
+ converter.getQuantityFromUnit(shortUnit, false)
+ shortUnit
+ "\t"
+ converter.getSystemsEnum(shortUnit)
+ "\t"
Expand Down
Loading