Skip to content

Commit

Permalink
Add spec data for features w/ Specifications table
Browse files Browse the repository at this point in the history
This change adds spec data in the `*.json` sources for all features that
have an `mdn_url` for an MDN article with a **Specification(s)** table —
modulo the following exceptions:

* no spec data is added for `"status": "deprecated"` features

* no spec data is added for any cases where a URL found in an MDN
  **Specification(s)** table has no fragment-ID part

Here’s an example of the data it adds for a particular feature, the
`api.History` feature:

```
"specs": [
  {
    "name": "HTML WHATWG",
    "url": "https://html.spec.whatwg.org/multipage/browsers.html#the-history-interface"
  },
  {
    "name": "HTML5 W3C",
    "url": "https://www.w3.org/TR/html50/browsers.html#the-history-interface"
  },
  {
    "name": "Custom Scroll Restoration",
    "url": "https://majido.github.io/scroll-restoration-proposal/history-based-api.html#web-idl"
  }
]
```

The change also includes an `add-specs.py` script that can be used to
(re)generate all the spec data and update all the `*.json` sources.
(The script works by scraping MDN **Specification(s)** tables.)
  • Loading branch information
sideshowbarker committed Oct 14, 2018
1 parent c6d80e5 commit 1e76365
Show file tree
Hide file tree
Showing 1,409 changed files with 45,573 additions and 4,885 deletions.
165 changes: 165 additions & 0 deletions add-specs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#!/usr/bin/env python2
import certifi
import io
import json
import os.path
import sys
import time
import urllib3
from collections import OrderedDict
from lxml.html import parse
from termcolor import cprint
from urlparse import urlparse


def alarm(message):
cprint('Alarm: %s' % message, 'red', attrs=['bold'])


def getSpecsArray(mdn_url, sectionname, spec_urls, http):
url = 'https://developer.mozilla.org' + urlparse(mdn_url).path + \
'?raw&macros&section=' + sectionname
print 'Trying %s' % url
response = http.request('GET', url)
if response.status == 404:
return []
if response.status > 499:
sys.stderr.write('50x for %s. Will retry after 60s...\n' % url)
time.sleep(61)
print 'Retrying %s' % url
response = http.request('GET', url)
if response.status == 404:
return []
if response.status > 499:
sys.stderr.write('50x for %s. Giving up.\n' % url)
return []
html = response.data.decode('utf-8')
if html == '':
return []
try:
doc = parse(io.StringIO(unicode(html)))
rows = doc.xpath('//table[1]//tr[td]')
if not(rows):
return []
specs = []
for row in rows:
hrefs = row.xpath('td[1]/a/@href')
if not(hrefs):
continue
spec_url = hrefs[0]
if not(urlparse(spec_url).fragment):
alarm(mdn_url + ' has spec URL with no fragment: ' + spec_url)
continue
if not(urlparse(spec_url).hostname):
alarm(mdn_url + ' has spec URL with no hostname: ' + spec_url)
continue
spec_name = ''
for base_url in spec_urls:
if spec_url.startswith(base_url):
spec_name = spec_urls[base_url]['name']
cprint('Adding %s (%s)' % (spec_url, spec_name), 'green')
spec = OrderedDict()
spec['name'] = spec_name
spec['url'] = spec_url
specs.append(spec)
return specs
except Exception, e:
sys.stderr.write('Something went wrong: %s\n' % str(e))
return []


def walkBaseData(basedata, filename, spec_urls, http, basename, sectionname,
bcd_data):
for featurename in basedata:
feature_data = basedata[featurename]
path = '%s.%s.%s' % (sectionname, basename, featurename)
bcd_data[sectionname][basename][featurename] = \
processTarget(feature_data, filename, spec_urls, http, path)
for subfeaturename in feature_data:
subfeaturedata = feature_data[subfeaturename]
path = '%s.%s.%s.%s' % (sectionname, basename, featurename,
subfeaturename)
bcd_data[sectionname][basename][featurename][subfeaturename] = \
processTarget(subfeaturedata, filename, spec_urls, http, path)


def processTarget(target, filename, spec_urls, http, path):
try:
if not('__compat' in target):
return target
target_data = target['__compat']
if not('mdn_url' in target_data):
if '_' not in path:
alarm('%s in %s has no mdn_url' % (path, filename))
return target
if target_data['status']['deprecated']:
return target
if 'specs' in target_data:
if not(len(sys.argv) > 1 and sys.argv[1] == 'fullupdate'):
return target
mdn_url = target_data['mdn_url']
specs = getSpecsArray(mdn_url, 'Specifications', spec_urls, http)
if not(specs):
specs = getSpecsArray(mdn_url, 'Specification', spec_urls, http)
if not(specs):
return target
target['__compat']['specs'] = specs
except TypeError:
pass
return target


def main():
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED',
ca_certs=certifi.where())
response = http.request('GET', 'https://raw.githubusercontent.com/mdn/' +
'kumascript/master/macros/SpecData.json')
spec_data = json.loads(response.data, object_pairs_hook=OrderedDict)
spec_urls = {}
for spec_name in spec_data:
url = spec_data[spec_name]['url']
spec_urls[url] = {}
spec_urls[url]['name'] = spec_name
dirnames = \
[
'api',
'css',
'html',
'http',
'javascript',
'mathml',
'svg',
'webdriver',
'xpath',
'xslt'
]
for dirname in dirnames:
files = [os.path.join(dirpath, filename)
for (dirpath, dirs, files)
in os.walk(dirname)
for filename in (dirs + files)]
files.sort()
for filename in files:
if os.path.splitext(filename)[1] != '.json':
continue
f = io.open(filename, 'r+', encoding='utf-8')
bcd_data = json.load(f, object_pairs_hook=OrderedDict)
for sectionname in bcd_data:
for basename in bcd_data[sectionname]:
basedata = bcd_data[sectionname][basename]
path = '%s.%s' % (sectionname, basename)
path = sectionname + '.' + basename
bcd_data[sectionname][basename] = \
processTarget(basedata, filename, spec_urls, http, path)
if basedata:
walkBaseData(basedata, filename, spec_urls, http,
basename, sectionname, bcd_data)
f.seek(0)
f.write(unicode(json.dumps(bcd_data, indent=2,
separators=(',', ': '),
ensure_ascii=False) + '\n'))
f.truncate()
f.close()


main()
32 changes: 28 additions & 4 deletions api/AbortController.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@
"experimental": true,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "DOM WHATWG",
"url": "https://dom.spec.whatwg.org/#interface-abortcontroller"
}
]
},
"AbortController": {
"__compat": {
Expand Down Expand Up @@ -99,7 +105,13 @@
"experimental": true,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "DOM WHATWG",
"url": "https://dom.spec.whatwg.org/#dom-abortcontroller-abortcontroller"
}
]
}
},
"signal": {
Expand Down Expand Up @@ -150,7 +162,13 @@
"experimental": true,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "DOM WHATWG",
"url": "https://dom.spec.whatwg.org/#dom-abortcontroller-signal"
}
]
}
},
"abort": {
Expand Down Expand Up @@ -201,7 +219,13 @@
"experimental": true,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "DOM WHATWG",
"url": "https://dom.spec.whatwg.org/#dom-abortcontroller-abort"
}
]
}
}
}
Expand Down
24 changes: 21 additions & 3 deletions api/AbortSignal.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@
"experimental": true,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "DOM WHATWG",
"url": "https://dom.spec.whatwg.org/#interface-AbortSignal"
}
]
},
"aborted": {
"__compat": {
Expand Down Expand Up @@ -98,7 +104,13 @@
"experimental": true,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "DOM WHATWG",
"url": "https://dom.spec.whatwg.org/#dom-abortsignal-onabort"
}
]
}
},
"onabort": {
Expand Down Expand Up @@ -149,7 +161,13 @@
"experimental": true,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "DOM WHATWG",
"url": "https://dom.spec.whatwg.org/#dom-abortsignal-aborted"
}
]
}
}
}
Expand Down
16 changes: 14 additions & 2 deletions api/AbsoluteOrientationSensor.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,13 @@
"experimental": false,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "Orientation Sensor",
"url": "https://www.w3.org/TR/orientation-sensor/#absoluteorientationsensor-interface"
}
]
},
"AbsoluteOrientationSensor": {
"__compat": {
Expand Down Expand Up @@ -81,7 +87,13 @@
"experimental": false,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "Orientation Sensor",
"url": "https://www.w3.org/TR/orientation-sensor/#dom-absoluteorientationsensor-absoluteorientationsensor"
}
]
}
}
}
Expand Down
16 changes: 14 additions & 2 deletions api/AbstractWorker.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@
"experimental": false,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "HTML WHATWG",
"url": "https://html.spec.whatwg.org/multipage/#abstractworker"
}
]
},
"onerror": {
"__compat": {
Expand Down Expand Up @@ -98,7 +104,13 @@
"experimental": false,
"standard_track": true,
"deprecated": false
}
},
"specs": [
{
"name": "HTML WHATWG",
"url": "https://html.spec.whatwg.org/multipage/#handler-abstractworker-onerror"
}
]
}
}
}
Expand Down
Loading

0 comments on commit 1e76365

Please sign in to comment.