seologs-alpine
Version:
Alpine - the Apache Log Parser
329 lines (278 loc) • 9.16 kB
JavaScript
/**
*
* Alpine, the Apache Log Parser
*
* Created by blarsen on 02.10.14.
*/
var Buffer = require('./buffer');
var byline = require('byline');
var _ = require("underscore.string");
var through2 = require('through2');
var Alpine = function (logformat) {
this.isJson = isJson;
this.setLogFormat = setLogFormat;
this.getLogFormat = getLogFormat;
this.setStopOnError = setStopOnError;
this.getStopOnError = getStopOnError;
this.parseLine = parseLine;
this.getObjectStream = getObjectStream;
this.getStringStream = getStringStream;
this.parseReadStream = parseReadStream;
if (logformat) {
this.setLogFormat(logformat);
} else {
this.setLogFormat(Alpine.LOGFORMATS.COMBINED);
}
};
function isJson(firstLogLine) {
const first = firstLogLine.charAt(0);
console.log(first); // 👉️ a
const last = firstLogLine.charAt(firstLogLine.length - 1);
console.log(last); //
if ('{' === first && '}' === last) {
return true;
} else {
return false;
}
}
function getObjectStream() {
var thisAlpine = this;
return through2.obj(function(chunk, enc, callback) {
var data = thisAlpine.parseLine(chunk);
this.push(data);
callback();
});
}
function getStringStream() {
var thisAlpine = this;
return through2.obj(function(chunk, enc, callback) {
var data = thisAlpine.parseLine(chunk);
this.push(JSON.stringify(data));
callback();
});
}
function parseReadStream(stream, callback) {
var thisAlpine = this;
var stream = byline.createStream(stream);
stream.pipe(through2.obj(function(chunk, enc, t2callback) {
var data = thisAlpine.parseLine(chunk.toString());
callback(data);
t2callback();
}))
}
function getLogFormat() {
return this.logformat;
}
function setLogFormat(logformat) {
this.logformat = logformat;
this.formatfields = parseLogFormat(logformat);
//console.log('setLogFormat()');
//console.log(this.logformat);
}
function getStopOnError() {
return this.stopOnError;
}
function setStopOnError(stopOnError) {
this.stopOnError = stopOnError;
}
function parseLine(line) {
//console.log('parseLine()');
//console.log('line:');
//console.log(line);
var result = {
originalLine: line
};
if (this.isJson(line)) {
//console.log('isJson(line) ---> true');
var logFormat = this.getLogFormat();
//console.log('logFormat:');
//console.log(logFormat);
const jsonObj = JSON.parse(line);
//console.log('jsonObj:');
//console.log(jsonObj);
/**
* key: '%v'
* value: 'vhost'
* line: '{"vhost":"www.gruppojollyautomobili.com","port":"443","remote":"66.249.76.58","http_x_forwarded_for":"-","user":"-","access_time":"09/Mar/2022:06:33:15 +0100","method":"GET","path":"/marca-veicoli-commerciali/xbus/?body_type=123|124|127|135|120|121|126|125&sort-by=model.desc","protocol":"HTTP/1.1","status":"200","size":"35168","sec_req_time":"1.299","referer":"-","agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","http_x_real_ip":"-","http_x_motorksitespect":"-","http_cf_connecting_ip":"-","geoip_country_code": "US","geoip_country_code3": "USA","geoip_country_name": "United States","geoip_city": "-","geoip_city_continent_code": "NA","geoip_city_country_code": "US","geoip_city_country_code3": "USA","geoip_city_country_name": "United States","geoip_latitude": "37.7510","geoip_longitude": "-97.8220","geoip_region": "00","geoip_region_name": "-"}'
*/
for (const [key, value] of Object.entries(logFormat)) {
/**
* {
* field: 'v',
* name: 'canonicalServerName',
* isQuoted: false,
* isDate: false,
* hasColon: false
* }
*/
const formatfield = parseLogFormat(key);
if (jsonObj.hasOwnProperty(value)) {
result[formatfield[0]['name']] = jsonObj[value];
}
}
return result;
}
var buf = new Buffer(line, 0);
var stopOnError = this.stopOnError;
this.formatfields.forEach(function(field) {
buf.skipSpaces();
var val;
if (field.isQuoted) {
if (!(buf.lookingAt() === '"')) {
if (stopOnError) {
throw new Error("Field defined as quoted was not quoted");
}
}
buf.skip();
val = buf.getUpto('"');
buf.skip();
} else if (field.isDate) {
if (!(buf.lookingAt() === '[')) {
if (stopOnError) {
throw new Error("Time field is not enclosed in brackets");
}
}
buf.skip();
val = buf.getUpto(']');
buf.skip();
} else if (field.hasColon) {
val = buf.getUpto(':');
buf.skip();
} else {
val = buf.getUpto(' ');
}
result[field.name] = val;
})
return result;
}
function parseLogFormat(logformat) {
var fields = [];
var buf = new Buffer(logformat, 0);
while (buf.hasMore()) {
buf.skipSpaces();
var field = buf.getUpto(" ");
//var isQuoted = field[0] === '"';
//var field = stripQuotes(field);
var test = hasColon(field);
if (false !== test) {
var i = 0;
for (var field of test) {
var fieldObject = addField(field)
if (0 === i) {
fieldObject.hasColon = true;
} else {
fieldObject.hasColon = false;
}
fields.push(fieldObject);
i++;
}
continue;
}
var fieldObject = addField(field)
fieldObject.hasColon = false;
fields.push(fieldObject);
}
return fields;
}
function stripQuotes(text) {
if ((_.startsWith(text, '"') && _.endsWith(text, '"'))
|| (_.startsWith(text, '[')) && _.endsWith(text, ']'))
return text.substr(1, text.length-2);
return text;
}
function hasColon(str) {
var index = str.indexOf(':');
if (index > -1) {
return str.split(':');
}
return false;
}
function addField(field) {
var isQuoted = field[0] === '"';
field = stripQuotes(field);
// Check that this is a field definition (starting with %) and remove the prefix
if (!(field[0] === "%")) {
throw new Error("Field does not start with %: "+field);
}
field = field.substring(1);
// Remove modifiers
if (field.indexOf("{") > 0) {
field = field.replace(/^[0-9!]+//g, "");
}
field = field.replace(/[<>]/g, "");
var fieldName = FIELDS[field];
// Handle parameterized fields
if (field.indexOf('{') >= 0) {
var matches = (/{(.*)}(.*)/).exec(field);
var value = matches[1];
var field = matches[2];
if (!PARAMFIELDS[field]) {
throw new Error("The field "+field+" should not be parameterized");
}
fieldName = PARAMFIELDS[field] + ' ' + value;
}
if (!FIELDS[field]) {
throw new Error("Unknown log format field " + field);
}
return {
field: field,
name: fieldName,
isQuoted: isQuoted,
isDate: field === 't'
};
}
Alpine.LOGFORMATS = {
COMBINED: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"",
CLF: "%h %l %u %t \"%r\" %>s %b",
CLF_VHOST: "%v %h %l %u %t \"%r\" %>s %b"
}
var FIELDS = {
'a': 'remoteIP',
'A': 'localIP',
'B': 'size',
'b': 'sizeCLF',
'D': 'serveTime',
'f': 'filename',
'h': 'remoteHost',
'H': 'requestProtocol',
'k': 'keepaliveRequests',
'l': 'logname',
'm': 'requestMethod',
'p': 'port',
'P': 'pid',
'q': 'queryString',
'r': 'request',
'R': 'responseHandler',
's': 'status',
't': 'time',
'T': 'serveTime',
'u': 'remoteUser',
'U': 'urlPath',
'v': 'canonicalServerName',
'V': 'serverName',
'X': 'connectionStatus',
'I': 'bytesReceived',
'O': 'bytesSent',
'C': 'cookie',
'e': 'environment',
'i': 'requestHeader',
'n': 'note',
'o': 'responseHeader',
'p': 'formatPort',
'P': 'pidFormat',
'^ti': 'requestTrailerLine',
'^to': 'responseTrailerLine'
}
PARAMFIELDS = {
"c": "Cookie",
"e": "Environment",
"i": "RequestHeader",
"n": "Note",
"o": "ResponseHeader",
"p": "Port",
"P": "PID",
"t": "Time",
'^ti': 'RequestTrailerLine',
'^to': 'ResponseTrailerLine'
}
module.exports = Alpine;