/*jshint node:true */
|
/*
|
|
The MIT License (MIT)
|
|
Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
|
|
Permission is hereby granted, free of charge, to any person
|
obtaining a copy of this software and associated documentation files
|
(the "Software"), to deal in the Software without restriction,
|
including without limitation the rights to use, copy, modify, merge,
|
publish, distribute, sublicense, and/or sell copies of the Software,
|
and to permit persons to whom the Software is furnished to do so,
|
subject to the following conditions:
|
|
The above copyright notice and this permission notice shall be
|
included in all copies or substantial portions of the Software.
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
SOFTWARE.
|
*/
|
|
'use strict';
|
|
var InputScanner = require('../core/inputscanner').InputScanner;
|
var Token = require('../core/token').Token;
|
var TokenStream = require('../core/tokenstream').TokenStream;
|
var WhitespacePattern = require('./whitespacepattern').WhitespacePattern;
|
|
var TOKEN = {
|
START: 'TK_START',
|
RAW: 'TK_RAW',
|
EOF: 'TK_EOF'
|
};
|
|
var Tokenizer = function(input_string, options) {
|
this._input = new InputScanner(input_string);
|
this._options = options || {};
|
this.__tokens = null;
|
|
this._patterns = {};
|
this._patterns.whitespace = new WhitespacePattern(this._input);
|
};
|
|
Tokenizer.prototype.tokenize = function() {
|
this._input.restart();
|
this.__tokens = new TokenStream();
|
|
this._reset();
|
|
var current;
|
var previous = new Token(TOKEN.START, '');
|
var open_token = null;
|
var open_stack = [];
|
var comments = new TokenStream();
|
|
while (previous.type !== TOKEN.EOF) {
|
current = this._get_next_token(previous, open_token);
|
while (this._is_comment(current)) {
|
comments.add(current);
|
current = this._get_next_token(previous, open_token);
|
}
|
|
if (!comments.isEmpty()) {
|
current.comments_before = comments;
|
comments = new TokenStream();
|
}
|
|
current.parent = open_token;
|
|
if (this._is_opening(current)) {
|
open_stack.push(open_token);
|
open_token = current;
|
} else if (open_token && this._is_closing(current, open_token)) {
|
current.opened = open_token;
|
open_token.closed = current;
|
open_token = open_stack.pop();
|
current.parent = open_token;
|
}
|
|
current.previous = previous;
|
previous.next = current;
|
|
this.__tokens.add(current);
|
previous = current;
|
}
|
|
return this.__tokens;
|
};
|
|
|
Tokenizer.prototype._is_first_token = function() {
|
return this.__tokens.isEmpty();
|
};
|
|
Tokenizer.prototype._reset = function() {};
|
|
Tokenizer.prototype._get_next_token = function(previous_token, open_token) { // jshint unused:false
|
this._readWhitespace();
|
var resulting_string = this._input.read(/.+/g);
|
if (resulting_string) {
|
return this._create_token(TOKEN.RAW, resulting_string);
|
} else {
|
return this._create_token(TOKEN.EOF, '');
|
}
|
};
|
|
Tokenizer.prototype._is_comment = function(current_token) { // jshint unused:false
|
return false;
|
};
|
|
Tokenizer.prototype._is_opening = function(current_token) { // jshint unused:false
|
return false;
|
};
|
|
Tokenizer.prototype._is_closing = function(current_token, open_token) { // jshint unused:false
|
return false;
|
};
|
|
Tokenizer.prototype._create_token = function(type, text) {
|
var token = new Token(type, text,
|
this._patterns.whitespace.newline_count,
|
this._patterns.whitespace.whitespace_before_token);
|
return token;
|
};
|
|
Tokenizer.prototype._readWhitespace = function() {
|
return this._patterns.whitespace.read();
|
};
|
|
|
|
module.exports.Tokenizer = Tokenizer;
|
module.exports.TOKEN = TOKEN;
|