You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
48 lines
1.4 KiB
48 lines
1.4 KiB
const fs = require('fs');
|
|
const request = require('request');
|
|
const classes = require('./classes');
|
|
const UnicodeTrieBuilder = require('unicode-trie/builder');
|
|
|
|
// this loads the LineBreak.txt file for Unicode and parses it to
|
|
// combine ranges and generate JavaScript
|
|
request('http://www.unicode.org/Public/13.0.0/ucd/LineBreak.txt', function (err, res, data) {
|
|
const matches = data.match(/^[0-9A-F]+(\.\.[0-9A-F]+)?;[A-Z][A-Z0-9]([A-Z])?/gm);
|
|
|
|
let start = null;
|
|
let end = null;
|
|
let type = null;
|
|
const trie = new UnicodeTrieBuilder(classes.XX);
|
|
|
|
// collect entries in the linebreaking table into ranges
|
|
// to keep things smaller.
|
|
for (let match of matches) {
|
|
var rangeEnd, rangeType;
|
|
match = match.split(/;|\.\./);
|
|
const rangeStart = match[0];
|
|
|
|
if (match.length === 3) {
|
|
rangeEnd = match[1];
|
|
rangeType = match[2];
|
|
} else {
|
|
rangeEnd = rangeStart;
|
|
rangeType = match[1];
|
|
}
|
|
|
|
if ((type != null) && (rangeType !== type)) {
|
|
trie.setRange(parseInt(start, 16), parseInt(end, 16), classes[type], true);
|
|
type = null;
|
|
}
|
|
|
|
if (type == null) {
|
|
start = rangeStart;
|
|
type = rangeType;
|
|
}
|
|
|
|
end = rangeEnd;
|
|
}
|
|
|
|
trie.setRange(parseInt(start, 16), parseInt(end, 16), classes[type], true);
|
|
|
|
// write the trie to a file
|
|
fs.writeFileSync(__dirname + '/classes.trie', trie.toBuffer());
|
|
});
|