You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
70 lines
2.0 KiB
70 lines
2.0 KiB
var Token = require("./Token");
|
|
var StringSource = require("./StringSource");
|
|
|
|
exports.RegexTokeniser = RegexTokeniser;
|
|
|
|
function RegexTokeniser(rules) {
|
|
rules = rules.map(function(rule) {
|
|
return {
|
|
name: rule.name,
|
|
regex: new RegExp(rule.regex.source, "g")
|
|
};
|
|
});
|
|
|
|
function tokenise(input, description) {
|
|
var source = new StringSource(input, description);
|
|
var index = 0;
|
|
var tokens = [];
|
|
|
|
while (index < input.length) {
|
|
var result = readNextToken(input, index, source);
|
|
index = result.endIndex;
|
|
tokens.push(result.token);
|
|
}
|
|
|
|
tokens.push(endToken(input, source));
|
|
return tokens;
|
|
}
|
|
|
|
function readNextToken(string, startIndex, source) {
|
|
for (var i = 0; i < rules.length; i++) {
|
|
var regex = rules[i].regex;
|
|
regex.lastIndex = startIndex;
|
|
var result = regex.exec(string);
|
|
|
|
if (result) {
|
|
var endIndex = startIndex + result[0].length;
|
|
if (result.index === startIndex && endIndex > startIndex) {
|
|
var value = result[1];
|
|
var token = new Token(
|
|
rules[i].name,
|
|
value,
|
|
source.range(startIndex, endIndex)
|
|
);
|
|
return {token: token, endIndex: endIndex};
|
|
}
|
|
}
|
|
}
|
|
var endIndex = startIndex + 1;
|
|
var token = new Token(
|
|
"unrecognisedCharacter",
|
|
string.substring(startIndex, endIndex),
|
|
source.range(startIndex, endIndex)
|
|
);
|
|
return {token: token, endIndex: endIndex};
|
|
}
|
|
|
|
function endToken(input, source) {
|
|
return new Token(
|
|
"end",
|
|
null,
|
|
source.range(input.length, input.length)
|
|
);
|
|
}
|
|
|
|
return {
|
|
tokenise: tokenise
|
|
}
|
|
}
|
|
|
|
|
|
|