Pages

Monday, 12 February 2018

JSON Logging with MDC using Log4j2 in Clojure

This grew out of necessity and illustrates JSON logging with MDC in Clojure. Also, it is a general understanding that log4j2 async performance is better than any other logging libraries out there at this point in time.

The problem statement is application and the included library logs must output JSON formatted logs which can be directly given to Logstash endpoint. So the output format should be compatible with the defined Elasticsearch format. In my case there are some mandatory fields defined without which Elasticsearch will discard the logs. The approach is simple. Use pattern layout to log as JSON and have additional pattern converter keys defined as suited so that the necessary data object can be marshalled in the logs.
Below is the log4j2.xml.
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="debug" xmlns="http://logging.apache.org/log4j/2.0/config" packages="com.example.core.logger">
  <Properties>
    <!-- get from env instead -->
    <Property name="application">appName</Property>
    <Property name="app-version">0.0.1</Property>
    <Property name="host">localhost</Property>
    <Property name="env">localhost</Property>
  </Properties>
  <Appenders>
    <Console name="console" target="SYSTEM_OUT">
      <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
    </Console>
    <RollingRandomAccessFile name="plain-log" fileName="logs/app_plain.log" filePattern="app_plain.log.%i" append="false" immediateFlush="true" bufferSize="262144">
        <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg %ex%n"/>
        <Policies>
          <SizeBasedTriggeringPolicy size="1GB"/>
        </Policies>
        <DefaultRolloverStrategy fileIndex="max" min="1" max="100" compressionLevel="3"/>
    </RollingRandomAccessFile>
    <RollingRandomAccessFile name="json-log" fileName="logs/app.log" filePattern="app.log.%i" append="true" immediateFlush="true" bufferSize="262144">
        <PatternLayout pattern='{"@timestamp":"%d{ISO8601}","thread":"%t","level":"%p","logger":"%c","description":"%m %ex","correlation_id":"%mdc{correlationid}","headers_data":%hd,"endpoint":"%mdc{endpoint}","environment":${env},"application":"${application}","application_version":"${app-version}","type":"log","host":"${host}","data_version":2}%n'/>
        <Policies>
          <SizeBasedTriggeringPolicy size="1GB"/>
        </Policies>
        <DefaultRolloverStrategy fileIndex="max" min="1" max="100" compressionLevel="3"/>
    </RollingRandomAccessFile>
  </Appenders>
  <Loggers>
    <Logger name="com.example.core" level="debug" additivity="false">
      <AppenderRef ref="console" level="info"/>
      <AppenderRef ref="json-log"/>
      <AppenderRef ref="plain-log"/>
    </Logger>
    <Root level="info">
      <AppenderRef ref="console"/>
      <AppenderRef ref="json-log"/>
      <AppenderRef ref="plain-log"/>
    </Root>
  </Loggers>
</Configuration>
RollingRandomAccessFile has PatternLayout specified in JSON format with the necessary keys. Here headers_data is a key with a custom converter pattern %hd. This pattern is defined in a class HeadersDataConverter.java as follows.
package com.example.logger;

import org.apache.logging.log4j.core.LogEvent;
import org.apache.logging.log4j.core.config.plugins.Plugin;
import org.apache.logging.log4j.core.pattern.ConverterKeys;
import org.apache.logging.log4j.core.pattern.LogEventPatternConverter;
import org.apache.logging.log4j.util.ReadOnlyStringMap;

import com.example.logger.bean.RequestHeaderData;

/** headers_data converter pattern */
@Plugin(name="HeadersDataConverter", category="Converter")
@ConverterKeys({"hd", "headersData"})
public class HeadersDataConverter extends LogEventPatternConverter {

    protected HeadersDataConverter(String name, String style) {
        super(name, style);
    }

    public static HeadersDataConverter newInstance(String[] options) {
        return new HeadersDataConverter("requestHeader", Thread.currentThread().getName());
    }

    private RequestHeaderData setHeaderData(LogEvent event) {
        ReadOnlyStringMap ctx = event.getContextData();
        RequestHeaderData hd = new RequestHeaderData();

        hd.setAccept(ctx.getValue("accept"));
        hd.setAcceptEncoding(ctx.getValue("accept-encoding"));
        hd.setAcceptLanguage(ctx.getValue("accept-language"));
        // ...
        hd.setxPoweredBy(ctx.getValue("x-powered-by"));
        return hd;
    }

    @Override
    public void format(LogEvent event, StringBuilder toAppendTo) {
        toAppendTo.append(setHeaderData(event));
    }
}
The RequestHeaderData is a Java bean which can be serialized with an overrided toString() method that marshalls object to string using ObjectMapper
package com.example.logger.bean;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import com.fasterxml.jackson.databind.annotation.JsonNaming;

import java.io.Serializable;

/** headers_data bean */
@JsonIgnoreProperties(ignoreUnknown = true)
@JsonNaming(PropertyNamingStrategy.SnakeCaseStrategy.class)
public class RequestHeaderData implements Serializable {

    private static final long serialVersionUID = 3559298447657197997L;
    private final ObjectMapper mapper = new ObjectMapper();

    private String accept;
    private String acceptEncoding;
    private String acceptLanguage;
    // ...
    private String xPoweredBy;

    public RequestHeaderData() {}
    
    // Generate getters and setters. Eclipse or any other IDE can do that for us.

    @Override
    public String toString() {
        String str = "";
        try {
            str = mapper.writeValueAsString(this);
        } catch (Exception ex) {}
        return str;
    }
}
SnakeCaseStrategy is the conversion strategy used which automatically converts all camelcase words to underscore ones. Overrides can be specified using @JsonProperty("override_string_here"). That is all there is. Specifying packages="com.example.logger" in the log4j2.xml will allow us to use the HeadersDataConverter plugin registered with hd as one of the pattern converter keys.

Now we have logs in the format
{"@timestamp":"2018-02-08T18:40:07,793","thread":"main","level":"INFO","logger":"com.example.web","description":"Service started. ","correlation_id":"","headers_data":{"accept":null,"accept_encoding":null,"accept_language":null,"cache_control":null,"client_ip":null,"correlationid":null,"connection":null,"content_length":null,"content_type":null,"dnt":null,"host":null,"remote_addr":null,"request_method":null,"path_info":null,"pragma":null,"query_string":null,"true_client_ip":null,"url":null,"upgrade_insecure_requests":null,"user_agent":null,"via":null,"x_forwarded_for":null,"x_forwarded_host":null,"x_forwarded_port":null,"x_forwarded_proto":null,"x_orig_host":null,"x_powered_by":null},"endpoint":"","environment":localhost,"application":"appName","application_version":"0.0.1","type":"log","host":"localhost","data_version":2}

The project.clj should contain the following dependencies.
; ...
; logging
[org.clojure/tools.logging "0.4.0"]
[org.apache.logging.log4j/log4j-core "2.9.0"]
[org.apache.logging.log4j/log4j-api "2.9.0"]
[org.apache.logging.log4j/log4j-slf4j-impl "2.9.0"]
; custom json logging
[com.fasterxml.jackson.core/jackson-core "2.9.2"]
[com.fasterxml.jackson.core/jackson-annotations "2.9.2"]
[com.fasterxml.jackson.core/jackson-databind "2.9.2"]
[org.slf4j/slf4j-api "1.7.24"]
; ....
:source-paths ["src"]
:test-paths ["test"]
:java-source-paths ["src-java"]
:javac-options ["-target" "1.8" "-source" "1.8" "-Xlint:unchecked" "-Xlint:deprecation"]
; ...
tools.logging is the Clojure library which provides macros that delegates logging to the underlying logging library used (log4j2). The slf4j-api is an interface that can work with different logging libraries and most well known libraries implement this. So any third-party library that uses a different logging library like logback will work. But we need a converter log4j-slf4j-impl which will capture all logs that works with SLF4J to be routed to log4j2. And since we defined a custom pattern, it works for all the logs. Simple it is.

The only caveat here is the custom pattern converter requires a well defined class. If the object is not know at compile time, as in if we are logging arbitrary JSON, then it is easier to extend the Layout instead.

ThreadContext (MDC)
ThreadContext is the local data that can be added to a particular thread in log4j2. SLF4j calls this MDC (Message Diagnostic Context). The point is, when a server gets a request which is handled by a thread or handed over to subsequent threads, any logs that happens during the execution of that request be identified with some unique identifier so that we can easily correlate all the logs for that particular request. Further more, if we have multiple services, we can correlate them using a unique correlationId if set. This can be done by setting appropriate values in the thread local context map.

Let's see how to do this with Aleph server in Clojure.
(ns com.example.web
  "Web Layer"
  (:require [aleph.http :as http]
            [manifold.stream :as stream]
            [compojure.core :as compojure :refer [GET POST defroutes]]
            [compojure.response :refer [Renderable]]
            [ring.middleware.params :refer [wrap-params]]
            [ring.middleware.keyword-params :refer [wrap-keyword-params]]
            [clojure.core.async :as async]
            [clojure.java.io :as io]
            [clojure.tools.logging :as log])
  (:import [org.apache.logging.log4j ThreadContext]))

(extend-protocol Renderable
  manifold.deferred.IDeferred
  (render [d _] d))

(defn say-hi [req]
  {:status 200
   :body "hi"})

(defmacro with-thread-context [ctx-coll & body]
  `(do
    (ThreadContext/putAll ~ctx-coll)  ;Set thread context
    ~@body))

(defn wrap-logging-context [handler]
  (fn [request]     
    ;; Set request map and other info in the current thread context
    (ThreadContext/putAll (merge {"endpoint" (:uri request)
                                  "remote-addr" (:remote-addr request)
                                  "query-string" (:query-string request)}
                                  (:headers request)))
    (handler request)))

(defn http-response [response options]
  (ThreadContext/clearAll)  ;Clears thread context
  response)

(defn wrap-http-response
  {:arglists '([handler] [handler options])}
  [handler & [{:as options}]]
  (fn 
    ([request]
      (http-response (handler request) options))
    ([request respond raise]
      (handler request (fn [response] (respond (http-response response options))) raise))))

(defn say-hi-handler [req]
  (let [ctx (ThreadContext/getContext)]  ;Get current thread context
    (stream/take!
      (stream/->
        (async/go
          (let [_ (async/<! (async/timeout 1000))]
            (with-thread-context ctx
              (say-hi req))))))))

(defroutes app-routes
  (POST ["/hi/"] {} say-hi-handler))

(def app
  (-> app-routes
      (wrap-logging-context)
      (wrap-keyword-params)
      (wrap-params)
      (wrap-http-response)))

(defn -main []
  (http/start-server #'app {:port 8080})
  (log/info "Service started."))
Here we wrap the ring handler with wrap-logging-context middleware which will set the request map to the server thread handling the particular request. Since we use aleph async threads for each Compojure routes, we need to pass the context to these threads. For that we get the context ctx in the say-hi-handler and use with-thread-context macro to do the job. That's all there is to logging with thread context.

Sidenote: Getting log4j2 to read the config is a big pile of mess when building a standalone jar because of Clojure, Java interop and compilation nuances. Makes me hate everything in this universe.