需求说明

  1. 客户端日志包含多个项目
  2. 不同项目request_body分为kv和json格式

nginx日志格式

# request_body为kv模式的数据
# access_log  /var/log/nginx/projecta.zaza.com.access.json.log request_body_json;
log_format request_body_kv '{"@timestamp":"$time_iso8601",'
             '"host":"$server_addr",'
             '"clientip":"$remote_addr",'
             '"size":$body_bytes_sent,'
             '"responsetime":$request_time,'
             '"upstreamtime":"$upstream_response_time",'
             '"upstreamhost":"$upstream_addr",'
             '"http_host":"$host",'
             '"url":"$uri",'
             '"request_uri":"$request_uri",'
             '"request_method":"$request_method",'
             '"request_body":"$request_body",'
             '"xff":"$http_x_forwarded_for",'
             '"referer":"$http_referer",'
             '"agent":"$http_user_agent",'
             '"status":"$status"}';
             
# request_body为json的数据
# access_log  /var/log/nginx/projectb.zaza.access.json.log request_body_json;
log_format request_body_json escape=json '{"@timestamp":"$time_iso8601",'
             '"host":"$server_addr",'
             '"clientip":"$remote_addr",'
             '"size":$body_bytes_sent,'
             '"responsetime":$request_time,'
             '"upstreamtime":"$upstream_response_time",'
             '"upstreamhost":"$upstream_addr",'
             '"http_host":"$host",'
             '"url":"$uri",'
             '"request_uri":"$request_uri",'
             '"request_method":"$request_method",'
             '"request_body":"$request_body",'
             '"xff":"$http_x_forwarded_for",'
             '"referer":"$http_referer",'
             '"agent":"$http_user_agent",'
             '"request_body_type":"json",'  # 这里方便区别请求的request_body是json格式还是kv模式
             '"status":"$status"}';

filebeat配置

#=========================== Filebeat prospectors =============================
filebeat.inputs:
- type: log
  enabled: true
  paths:
    # 匹配json的数据
    - /data/log/nginx/*.json.log
  exclude_lines: ['"agent":"clb-healthcheck"', 'favicon.ico']
  fields:
    # 此名称将用于索引名称,请注意命名规则
    document_type: client
#----------------------------- Logstash output --------------------------------
output.logstash:
  hosts: ["127.0.0.1:5026"]

logstash配置

input {
    beats{
    port => "5026"
    host => "0.0.0.0"
    }
}

filter {
    # logtype:这个是项目简称(projecta)
    grok{
        # filebeat7的日志路径:[log][file][path]
        # /data/log/nginx/projecta.zaza.com.access.json.log logtype获取到:projecta
        match=> { "[log][file][path]" => ".*/(?<logtype>.*?)\..*" }
    }
    # message对应的nginx原始日志
    json {
        source => "message"
    }

    # request_body兼容字符串和json类型
    # log_format request_body_json escape=json,处理\x22值
    # nginx日志新增:'"request_body_type":"json",'  # 这里方便区别请求的request_body是json格式还是kv模式
    if [request_body_type] == "json" {
        # request_body是解析message后生成的另一个json数据,这个是我们需要解析的关键数据
        json {
            source => "request_body"
        }
    } else {
        kv {
            source => "request_body"
            field_split => "&"
            value_split => "="
        }
    }
    urldecode {
        all_fields => true
    }
    mutate {
      convert => {
          "status" => "integer"
      }
      remove_field => [ "message", "request_body" ]
    }
}

output {
  elasticsearch {
    hosts => "127.0.0.1:9200"
    manage_template => false
    # 索引名称:projecta-client-2021-11
    index => "%{logtype}-%{[fields][document_type]}-%{+YYYY-MM}"
    user => elastic
    password => "password"
  }
  #if [fields][document_type] == "projecta" {
  #  stdout{codec => rubydebug}
  #}
  # 需要查看filebeat数据,可以把filter注释即可
  #stdout{codec => rubydebug}
}