Skip to content

Commit

Permalink
build: i18n: Autodownload ICU, Add a test.
Browse files Browse the repository at this point in the history
This is to implement
nodejs#7676 (comment)

* make `--with-intl=none` the default
 * Download, verify (md5), unpack ICU's zip if not there
 * update docs
* add a test

There's a "list" of URLs being used, but right now only the
first is picked up. The logic works something like this:

* if there is no directory `deps/icu`,
 * if no zip file (currently `icu4c-54_1-src.zip`),
  * download zip file (icu-project.org -> sf.net)
 * verify the MD5 sum of the zipfile
  * if bad, print error and exit
 * unpack the zipfile into `deps/icu`
* if `deps/icu` now exists, use it, else fail with help text

Also:
* refactor some code into tools/configure.d/nodedownload.py
* add `intl-none` option for `vcbuild.bat`

To rebuild `deps/icu-small` - (not currently checked in)
```
bash tools/icu/prepare-icu-source.sh
```

Reduce space by about 1MB with ICU 54 (over without this patch).
Also trims a few other source files, but only conditional on the exact ICU
version used. This is to future-proof - a file that is unneeded now may
be needed in future ICUs.
  • Loading branch information
srl295 committed Dec 11, 2014
1 parent 8708c7a commit 8e93274
Show file tree
Hide file tree
Showing 10 changed files with 415 additions and 19 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ ipch/
email.md
deps/v8-*
deps/icu
deps/icu*.zip
deps/icu*.tgz
./node_modules
.svn/

Expand Down
64 changes: 60 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,20 +83,76 @@ make doc
man doc/node.1
```

### To build `Intl` (ECMA-402) support:
### `Intl` (ECMA-402) support:

*Note:* more docs, including how to reduce disk footprint, are on
[Intl](https://github.com/joyent/node/wiki/Intl) support is not
enabled by default.

#### "small" (English only) support

This option will build with "small" (English only) support, but
the full `Intl` (ECMA-402) APIs. It will download the ICU library
as needed.

Unix/Macintosh:

```sh
./configure --with-intl=small-icu
```

Windows:

```sh
vcbuild small-icu
```

The `small-icu` mode builds
with English-only data. You can add full data at runtime.

*Note:* more docs are on
[the wiki](https://github.com/joyent/node/wiki/Intl).

#### Build with full ICU support (all locales supported by ICU):

*Note*, this may download ICU if you don't have an ICU in `deps/icu`

Unix/Macintosh:

```sh
./configure --with-intl=full-icu
```

Windows:

```sh
vcbuild full-icu
```

#### Build with no Intl support `:-(`

The `Intl` object will not be available.

Unix/Macintosh:

```sh
./configure --with-intl=none
```

Windows:

```sh
vcbuild intl-none
```

#### Use existing installed ICU (Unix/Macintosh only):

```sh
pkg-config --modversion icu-i18n && ./configure --with-intl=system-icu
```

#### Build ICU from source:
#### Build with a specific ICU:

First: Unpack latest ICU
First: Unpack latest ICU to `deps/icu`
[icu4c-**##.#**-src.tgz](http://icu-project.org/download) (or `.zip`)
as `deps/icu` (You'll have: `deps/icu/source/...`)

Expand Down
73 changes: 66 additions & 7 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@ import re
import shlex
import subprocess
import sys
import shutil

CC = os.environ.get('CC', 'cc')

root_dir = os.path.dirname(__file__)
sys.path.insert(0, os.path.join(root_dir, 'tools', 'gyp', 'pylib'))
from gyp.common import GetFlavor

# imports in tools/configure.d
sys.path.insert(0, os.path.join(root_dir, 'tools', 'configure.d'))
import nodedownload

# parse our options
parser = optparse.OptionParser()

Expand Down Expand Up @@ -712,6 +717,34 @@ def glob_to_var(dir_base, dir_sub):
return list

def configure_intl(o):
icus = [
{
'url': 'http://download.icu-project.org/files/icu4c/54.1/icu4c-54_1-src.zip',
# from https://ssl.icu-project.org/files/icu4c/54.1/icu4c-src-54_1.md5:
'md5': '6b89d60e2f0e140898ae4d7f72323bca',
},
]
def icu_download(path):
# download ICU, if needed
for icu in icus:
url = icu['url']
md5 = icu['md5']
local = url.split('/')[-1]
targetfile = os.path.join(root_dir, 'deps', local)
if not os.path.isfile(targetfile):
nodedownload.retrievefile(url, targetfile)
else:
print ' Re-using existing %s' % targetfile
if os.path.isfile(targetfile):
sys.stdout.write(' Checking file integrity with MD5:\r')
gotmd5 = nodedownload.md5sum(targetfile)
print ' MD5: %s %s' % (gotmd5, targetfile)
if (md5 == gotmd5):
return targetfile
else:
print ' Expected: %s *MISMATCH*' % md5
print '\n ** Corrupted ZIP? Delete %s to retry download.\n' % targetfile
return None
icu_config = {
'variables': {}
}
Expand All @@ -723,7 +756,6 @@ def configure_intl(o):
write(icu_config_name, do_not_edit +
pprint.pformat(icu_config, indent=2) + '\n')

# small ICU is off by default.
# always set icu_small, node.gyp depends on it being defined.
o['variables']['icu_small'] = b(False)

Expand All @@ -739,6 +771,8 @@ def configure_intl(o):
o['variables']['icu_gyp_path'] = options.with_icu_path
return
# --with-intl=<with_intl>
if with_intl is None:
with_intl = 'none' # The default mode of Intl
if with_intl == 'none' or with_intl is None:
o['variables']['v8_enable_i18n_support'] = 0
return # no Intl
Expand Down Expand Up @@ -769,20 +803,45 @@ def configure_intl(o):
# Note: non-ICU implementations could use other 'with_intl'
# values.

icu_parent_path = os.path.join(root_dir, 'deps')
icu_full_path = os.path.join(icu_parent_path, 'icu')
icu_small_path = os.path.join(icu_parent_path, 'icu-small')
icu_small_tag = os.path.join(icu_full_path, 'is-small-icu.txt')

## Use (or not) an embedded small-icu.
if with_intl == 'small-icu':
if not os.path.isdir(icu_full_path) and os.path.isdir(icu_small_path):
# deps/small-icu -> deps/icu
print 'Copying small ICU %s to %s' % (icu_small_path, icu_full_path)
shutil.copytree(icu_small_path, icu_full_path)
#else:
# print 'Not copying %s to %s' % (icu_small_path, icu_full_path)
elif os.path.isfile(icu_small_tag):
print 'deleting small-icu %s for --with-intl=%s' % (icu_full_path, with_intl)
shutil.rmtree(icu_full_path)

# ICU mode. (icu-generic.gyp)
byteorder = sys.byteorder
o['variables']['icu_gyp_path'] = 'tools/icu/icu-generic.gyp'
# ICU source dir relative to root
icu_full_path = os.path.join(root_dir, 'deps/icu')
o['variables']['icu_path'] = icu_full_path
if not os.path.isdir(icu_full_path):
print 'Error: ICU path is not a directory: %s' % (icu_full_path)
print '* ECMA-402 (Intl) support didn\'t find ICU in %s..' % (icu_full_path)
# can we download (or find) a zipfile?
localzip = icu_download(icu_full_path)
if localzip:
nodedownload.unpack(localzip, icu_parent_path)
if not os.path.isdir(icu_full_path):
print ' Cannot build Intl without ICU in %s.' % (icu_full_path)
print ' (Fix, or disable with "--with-intl=none" )'
sys.exit(1)
else:
print '* Using ICU in %s' % (icu_full_path)
# Now, what version of ICU is it? We just need the "major", such as 54.
# uvernum.h contains it as a #define.
uvernum_h = os.path.join(icu_full_path, 'source/common/unicode/uvernum.h')
if not os.path.isfile(uvernum_h):
print 'Error: could not load %s - is ICU installed?' % uvernum_h
print ' Error: could not load %s - is ICU installed?' % uvernum_h
sys.exit(1)
icu_ver_major = None
matchVerExp = r'^\s*#define\s+U_ICU_VERSION_SHORT\s+"([^"]*)".*'
Expand All @@ -792,7 +851,7 @@ def configure_intl(o):
if m:
icu_ver_major = m.group(1)
if not icu_ver_major:
print 'Could not read U_ICU_VERSION_SHORT version from %s' % uvernum_h
print ' Could not read U_ICU_VERSION_SHORT version from %s' % uvernum_h
sys.exit(1)
icu_endianness = sys.byteorder[0]; # TODO(srl295): EBCDIC should be 'e'
o['variables']['icu_ver_major'] = icu_ver_major
Expand All @@ -819,8 +878,8 @@ def configure_intl(o):
# this is the icudt*.dat file which node will be using (platform endianness)
o['variables']['icu_data_file'] = icu_data_file
if not os.path.isfile(icu_data_path):
print 'Error: ICU prebuilt data file %s does not exist.' % icu_data_path
print 'See the README.md.'
print ' Error: ICU prebuilt data file %s does not exist.' % icu_data_path
print ' See the README.md.'
# .. and we're not about to build it from .gyp!
sys.exit(1)
# map from variable name to subdirs
Expand Down
62 changes: 62 additions & 0 deletions test/simple/test-intl.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright Joyent, Inc. and other Node contributors.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to permit
// persons to whom the Software is furnished to do so, subject to the
// following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
// USE OR OTHER DEALINGS IN THE SOFTWARE.

var common = require('../common');
var assert = require('assert');

var enablei18n = process.config.variables.v8_enable_i18n_support;
if (enablei18n === undefined) {
enablei18n = false;
}

var haveIntl = ( global.Intl != undefined );

if (!haveIntl) {
assert.equal(enablei18n, false, '"Intl" object is NOT present but v8_enable_i18n_support is ' + enablei18n);
console.log('Skipping Intl tests because Intl object not present.');
} else {
assert.equal(enablei18n, true, '"Intl" object is present but v8_enable_i18n_support is ' + enablei18n + '. Is this test out of date?');

// Check with toLocaleString
var date0 = new Date(0);
var GMT = 'Etc/GMT';
var optsGMT = {timeZone: GMT};
var localeString0 = date0.toLocaleString(['en'], optsGMT);
var expectString0 = '1/1/1970, 12:00:00 AM'; // epoch
assert.equal(localeString0, expectString0);

// check with a Formatter
var dtf = new Intl.DateTimeFormat(['en'], {timeZone: GMT, month: 'short', year: '2-digit'});
var localeString1 = dtf.format(date0);
assert.equal(localeString1, 'Jan 70');

// number format
assert.equal(new Intl.NumberFormat(['en']).format(12345.67890), '12,345.679');

var coll = new Intl.Collator(['en'],{sensitivity:'base',ignorePunctuation:true});

assert.equal(coll.compare('blackbird', 'black-bird'), 0, 'ignore punctuation failed');

assert.equal(coll.compare('blackbird', 'red-bird'), -1, 'compare less failed');
assert.equal(coll.compare('bluebird', 'blackbird'), 1, 'compare greater failed');
assert.equal(coll.compare('Bluebird', 'bluebird'), 0, 'ignore case failed');
assert.equal(coll.compare('\ufb03', 'ffi'), 0, 'ffi ligature (contraction) failed');
}
57 changes: 57 additions & 0 deletions tools/configure.d/nodedownload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python
# Moved some utilities here from ../../configure

import urllib
import hashlib
import sys
import zipfile

def formatSize(amt):
"""Format a size as a string in MB"""
return "{:.1f}".format(amt / 1024000.)

def spin(c):
"""print out a spinner based on 'c'"""
# spin = "\\|/-"
spin = ".:|'"
return (spin[c % len(spin)])

class ConfigOpener(urllib.FancyURLopener):
"""fancy opener used by retrievefile. Set a UA"""
# append to existing version (UA)
version = '%s node.js/configure' % urllib.URLopener.version

def reporthook(count, size, total):
"""internal hook used by retrievefile"""
sys.stdout.write(' Fetch: %c %sMB total, %sMB downloaded \r' %
(spin(count),
formatSize(total),
formatSize(count*size)))

def retrievefile(url, targetfile):
"""fetch file 'url' as 'targetfile'. Return targetfile or throw."""
try:
sys.stdout.write(' <%s>\nConnecting...\r' % url)
sys.stdout.flush()
msg = ConfigOpener().retrieve(url, targetfile, reporthook=reporthook)
print '' # clear the line
return targetfile
except:
print ' ** Error occurred while downloading\n <%s>' % url
raise

def md5sum(targetfile):
"""md5sum a file. Return the hex digest."""
digest = hashlib.md5()
with open(targetfile, 'rb') as f:
chunk = f.read(1024)
while chunk != "":
digest.update(chunk)
chunk = f.read(1024)
return digest.hexdigest()

def unpack(packedfile, parent_path):
"""Unpack packedfile into parent_path. Assumes .zip."""
with zipfile.ZipFile(packedfile, 'r') as icuzip:
print ' Extracting source zip: %s' % packedfile
icuzip.extractall(parent_path)
Loading

0 comments on commit 8e93274

Please sign in to comment.