Pages

Monday, 12 February 2018

JSON Logging with MDC using Log4j2 in Clojure

This grew out of necessity and illustrates JSON logging with MDC in Clojure. Also, it is a general understanding that log4j2 async performance is better than any other logging libraries out there at this point in time.

The problem statement is application and the included library logs must output JSON formatted logs which can be directly given to Logstash endpoint. So the output format should be compatible with the defined Elasticsearch format. In my case there are some mandatory fields defined without which Elasticsearch will discard the logs. The approach is simple. Use pattern layout to log as JSON and have additional pattern converter keys defined as suited so that the necessary data object can be marshalled in the logs.
Below is the log4j2.xml.
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="debug" xmlns="http://logging.apache.org/log4j/2.0/config" packages="com.example.core.logger">
  <Properties>
    <!-- get from env instead -->
    <Property name="application">appName</Property>
    <Property name="app-version">0.0.1</Property>
    <Property name="host">localhost</Property>
    <Property name="env">localhost</Property>
  </Properties>
  <Appenders>
    <Console name="console" target="SYSTEM_OUT">
      <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
    </Console>
    <RollingRandomAccessFile name="plain-log" fileName="logs/app_plain.log" filePattern="app_plain.log.%i" append="false" immediateFlush="true" bufferSize="262144">
        <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg %ex%n"/>
        <Policies>
          <SizeBasedTriggeringPolicy size="1GB"/>
        </Policies>
        <DefaultRolloverStrategy fileIndex="max" min="1" max="100" compressionLevel="3"/>
    </RollingRandomAccessFile>
    <RollingRandomAccessFile name="json-log" fileName="logs/app.log" filePattern="app.log.%i" append="true" immediateFlush="true" bufferSize="262144">
        <PatternLayout pattern='{"@timestamp":"%d{ISO8601}","thread":"%t","level":"%p","logger":"%c","description":"%m %ex","correlation_id":"%mdc{correlationid}","headers_data":%hd,"endpoint":"%mdc{endpoint}","environment":${env},"application":"${application}","application_version":"${app-version}","type":"log","host":"${host}","data_version":2}%n'/>
        <Policies>
          <SizeBasedTriggeringPolicy size="1GB"/>
        </Policies>
        <DefaultRolloverStrategy fileIndex="max" min="1" max="100" compressionLevel="3"/>
    </RollingRandomAccessFile>
  </Appenders>
  <Loggers>
    <Logger name="com.example.core" level="debug" additivity="false">
      <AppenderRef ref="console" level="info"/>
      <AppenderRef ref="json-log"/>
      <AppenderRef ref="plain-log"/>
    </Logger>
    <Root level="info">
      <AppenderRef ref="console"/>
      <AppenderRef ref="json-log"/>
      <AppenderRef ref="plain-log"/>
    </Root>
  </Loggers>
</Configuration>
RollingRandomAccessFile has PatternLayout specified in JSON format with the necessary keys. Here headers_data is a key with a custom converter pattern %hd. This pattern is defined in a class HeadersDataConverter.java as follows.
package com.example.logger;

import org.apache.logging.log4j.core.LogEvent;
import org.apache.logging.log4j.core.config.plugins.Plugin;
import org.apache.logging.log4j.core.pattern.ConverterKeys;
import org.apache.logging.log4j.core.pattern.LogEventPatternConverter;
import org.apache.logging.log4j.util.ReadOnlyStringMap;

import com.example.logger.bean.RequestHeaderData;

/** headers_data converter pattern */
@Plugin(name="HeadersDataConverter", category="Converter")
@ConverterKeys({"hd", "headersData"})
public class HeadersDataConverter extends LogEventPatternConverter {

    protected HeadersDataConverter(String name, String style) {
        super(name, style);
    }

    public static HeadersDataConverter newInstance(String[] options) {
        return new HeadersDataConverter("requestHeader", Thread.currentThread().getName());
    }

    private RequestHeaderData setHeaderData(LogEvent event) {
        ReadOnlyStringMap ctx = event.getContextData();
        RequestHeaderData hd = new RequestHeaderData();

        hd.setAccept(ctx.getValue("accept"));
        hd.setAcceptEncoding(ctx.getValue("accept-encoding"));
        hd.setAcceptLanguage(ctx.getValue("accept-language"));
        // ...
        hd.setxPoweredBy(ctx.getValue("x-powered-by"));
        return hd;
    }

    @Override
    public void format(LogEvent event, StringBuilder toAppendTo) {
        toAppendTo.append(setHeaderData(event));
    }
}
The RequestHeaderData is a Java bean which can be serialized with an overrided toString() method that marshalls object to string using ObjectMapper
package com.example.logger.bean;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import com.fasterxml.jackson.databind.annotation.JsonNaming;

import java.io.Serializable;

/** headers_data bean */
@JsonIgnoreProperties(ignoreUnknown = true)
@JsonNaming(PropertyNamingStrategy.SnakeCaseStrategy.class)
public class RequestHeaderData implements Serializable {

    private static final long serialVersionUID = 3559298447657197997L;
    private final ObjectMapper mapper = new ObjectMapper();

    private String accept;
    private String acceptEncoding;
    private String acceptLanguage;
    // ...
    private String xPoweredBy;

    public RequestHeaderData() {}
    
    // Generate getters and setters. Eclipse or any other IDE can do that for us.

    @Override
    public String toString() {
        String str = "";
        try {
            str = mapper.writeValueAsString(this);
        } catch (Exception ex) {}
        return str;
    }
}
SnakeCaseStrategy is the conversion strategy used which automatically converts all camelcase words to underscore ones. Overrides can be specified using @JsonProperty("override_string_here"). That is all there is. Specifying packages="com.example.logger" in the log4j2.xml will allow us to use the HeadersDataConverter plugin registered with hd as one of the pattern converter keys.

Now we have logs in the format
{"@timestamp":"2018-02-08T18:40:07,793","thread":"main","level":"INFO","logger":"com.example.web","description":"Service started. ","correlation_id":"","headers_data":{"accept":null,"accept_encoding":null,"accept_language":null,"cache_control":null,"client_ip":null,"correlationid":null,"connection":null,"content_length":null,"content_type":null,"dnt":null,"host":null,"remote_addr":null,"request_method":null,"path_info":null,"pragma":null,"query_string":null,"true_client_ip":null,"url":null,"upgrade_insecure_requests":null,"user_agent":null,"via":null,"x_forwarded_for":null,"x_forwarded_host":null,"x_forwarded_port":null,"x_forwarded_proto":null,"x_orig_host":null,"x_powered_by":null},"endpoint":"","environment":localhost,"application":"appName","application_version":"0.0.1","type":"log","host":"localhost","data_version":2}

The project.clj should contain the following dependencies.
; ...
; logging
[org.clojure/tools.logging "0.4.0"]
[org.apache.logging.log4j/log4j-core "2.9.0"]
[org.apache.logging.log4j/log4j-api "2.9.0"]
[org.apache.logging.log4j/log4j-slf4j-impl "2.9.0"]
; custom json logging
[com.fasterxml.jackson.core/jackson-core "2.9.2"]
[com.fasterxml.jackson.core/jackson-annotations "2.9.2"]
[com.fasterxml.jackson.core/jackson-databind "2.9.2"]
[org.slf4j/slf4j-api "1.7.24"]
; ....
:source-paths ["src"]
:test-paths ["test"]
:java-source-paths ["src-java"]
:javac-options ["-target" "1.8" "-source" "1.8" "-Xlint:unchecked" "-Xlint:deprecation"]
; ...
tools.logging is the Clojure library which provides macros that delegates logging to the underlying logging library used (log4j2). The slf4j-api is an interface that can work with different logging libraries and most well known libraries implement this. So any third-party library that uses a different logging library like logback will work. But we need a converter log4j-slf4j-impl which will capture all logs that works with SLF4J to be routed to log4j2. And since we defined a custom pattern, it works for all the logs. Simple it is.

The only caveat here is the custom pattern converter requires a well defined class. If the object is not know at compile time, as in if we are logging arbitrary JSON, then it is easier to extend the Layout instead.

ThreadContext (MDC)
ThreadContext is the local data that can be added to a particular thread in log4j2. SLF4j calls this MDC (Message Diagnostic Context). The point is, when a server gets a request which is handled by a thread or handed over to subsequent threads, any logs that happens during the execution of that request be identified with some unique identifier so that we can easily correlate all the logs for that particular request. Further more, if we have multiple services, we can correlate them using a unique correlationId if set. This can be done by setting appropriate values in the thread local context map.

Let's see how to do this with Aleph server in Clojure.
(ns com.example.web
  "Web Layer"
  (:require [aleph.http :as http]
            [manifold.stream :as stream]
            [compojure.core :as compojure :refer [GET POST defroutes]]
            [compojure.response :refer [Renderable]]
            [ring.middleware.params :refer [wrap-params]]
            [ring.middleware.keyword-params :refer [wrap-keyword-params]]
            [clojure.core.async :as async]
            [clojure.java.io :as io]
            [clojure.tools.logging :as log])
  (:import [org.apache.logging.log4j ThreadContext]))

(extend-protocol Renderable
  manifold.deferred.IDeferred
  (render [d _] d))

(defn say-hi [req]
  {:status 200
   :body "hi"})

(defmacro with-thread-context [ctx-coll & body]
  `(do
    (ThreadContext/putAll ~ctx-coll)  ;Set thread context
    ~@body))

(defn wrap-logging-context [handler]
  (fn [request]     
    ;; Set request map and other info in the current thread context
    (ThreadContext/putAll (merge {"endpoint" (:uri request)
                                  "remote-addr" (:remote-addr request)
                                  "query-string" (:query-string request)}
                                  (:headers request)))
    (handler request)))

(defn http-response [response options]
  (ThreadContext/clearAll)  ;Clears thread context
  response)

(defn wrap-http-response
  {:arglists '([handler] [handler options])}
  [handler & [{:as options}]]
  (fn 
    ([request]
      (http-response (handler request) options))
    ([request respond raise]
      (handler request (fn [response] (respond (http-response response options))) raise))))

(defn say-hi-handler [req]
  (let [ctx (ThreadContext/getContext)]  ;Get current thread context
    (stream/take!
      (stream/->
        (async/go
          (let [_ (async/<! (async/timeout 1000))]
            (with-thread-context ctx
              (say-hi req))))))))

(defroutes app-routes
  (POST ["/hi/"] {} say-hi-handler))

(def app
  (-> app-routes
      (wrap-logging-context)
      (wrap-keyword-params)
      (wrap-params)
      (wrap-http-response)))

(defn -main []
  (http/start-server #'app {:port 8080})
  (log/info "Service started."))
Here we wrap the ring handler with wrap-logging-context middleware which will set the request map to the server thread handling the particular request. Since we use aleph async threads for each Compojure routes, we need to pass the context to these threads. For that we get the context ctx in the say-hi-handler and use with-thread-context macro to do the job. That's all there is to logging with thread context.

Sidenote: Getting log4j2 to read the config is a big pile of mess when building a standalone jar because of Clojure, Java interop and compilation nuances. Makes me hate everything in this universe.

Thursday, 18 January 2018

Rapid Prototyping in Clojure with boot-clj

I find boot-clj to be great in doing rapid prototypes. It can be considered analogous to GroovyConsole. We can dynamically add dependencies, write, modify code, run, experiment all within a single file without having to create a project as with default lein setup. Create a folder for experiments and add boot.properties in it.
#https://github.com/boot-clj/boot
BOOT_CLOJURE_NAME=org.clojure/clojure
BOOT_VERSION=2.7.1
BOOT_CLOJURE_VERSION=1.8.0
Then we can create out prototype files, say pilot.clj with the below example template.
#!/usr/bin/env boot

;; To add some repository
(merge-env! :repositories [["clojars" {:url "https://clojars.org/repo/" :snapshots true}]])

(defn deps [new-deps]
  "Add dependencies to the namespace."
  (merge-env! :dependencies new-deps))

;; Add clojure
(deps '[[org.clojure/clojure "1.8.0"]])

;; Require
(require '[clojure.string :as str])
;; Import
(import '[javax.crypto.spec SecretKeySpec])

(println (str/upper-case "hi"))  ;; HI 
For faster startup of the boot-clj, add the following to the shell profile (.zshrc). Tune according to the machine.
# boot-clj faster startup
export BOOT_JVM_OPTIONS='
  -client
  -XX:+TieredCompilation
  -XX:TieredStopAtLevel=1
  -Xmx2g
  -XX:+UseConcMarkSweepGC
  -XX:+CMSClassUnloadingEnabled
  -Xverify:none'

Friday, 8 December 2017

Sublime Text for Clojure

Sublime Text actually changed the text editor landscape. Before it was all TextMate which was stealing the thunder. But that is somehow a begone era. In this post we will have a sneak peak on using Sublime Text 3 for Clojure development. At this time there is no Emacs to cider kind of integration right from within Sublime Text, but it is possible to write one if required, however, connecting to nrepl from command line would just do fine. Also Clojure is not Java development where you would require to add breakpoint and step code execution as things are fairly straight forward to understand as opposed to having bazillion class files when it comes typical enterprise Java projects. However if one needs, you can resort to Eclipse with Counterclockwise plugin which works just fine.

1. Have Package Control installed.
2. In the beginning there were no parenthesis. It all started with a Big Bang, after which the amount of parenthesis started expanding. Since Clojure has the eternal brackets from the beginning of universe, we need a better bracket highlighter.
Add the following config to the Sublime user settings, so that the bracket highlighter works properly and does not conflict with the editor's own matching.
{
    "auto_match_enabled": false,
    "match_brackets": false,
    "match_brackets_angle": false,
    "match_brackets_braces": false,
    "match_brackets_content": false,
    "match_brackets_square": false,
    "match_tags": false
}
This package is pretty cool as it will show the matched brackets in the gutter.
3. To keep the sanity in own pattern matching of brackets and since most Lisp programmers are borderline hippie, to commemorate that let us use Rainbowth. Once installed add the following palette in the path ~/Library/Application Support/Sublime Text 3/Packages/User/Rainbowth.sublime-settings which is taken from Emacs's package to make the highlighting easier on eyes when used in conjunction with Solarized Dark theme.
{
  "palettes": {
    "default": ["#707183", "#7388d6", "#909183", "#709870", "#907373", "#6276ba", "#858580", "#80a880", "#887070"]
  }
}
But Rainbowth needs to update the colour scheme so we need to do some extraction of the desired scheme. Let's say we are using Solarized Dark theme, then do the following.
$ cd "/Applications/Sublime Text.app"
$ mkdir -p ~/temp/subl
# Solarized theme is inside the legacy package 
$ cp "Contents/MacOS/Packages/Color Scheme - Legacy.sublime-package" ~/temp/subl
# For some other themes which are not in the above, use
# $ cp "Contents/MacOS/Packages/Color Scheme - Default.sublime-package" ~/temp/subl
$ cd ~/temp/subl && unzip "Color Scheme - Legacy.sublime-package"
$ cp "Solarized (Dark).tmTheme" "~/Library/Application Support/Sublime Text 3/Packages/"
4. Add Clojure.sublime-settings in the same directory with the below contents so that by default, the source uses two spaces, which is the standard as per style guide and since we use `-` to separate words instead of camel case or `_`, the word separator helps us to easily select symbols with double click. Else, doing a double click on a word will select up to the separator.
{
    "tab_size": 2,
    "word_separators": "./\\()\"':,.;<>~!@#$%^&*|+=[]{}`~?"
}
That's all there is specific to Clojure development.

Theming
Next we can look into few enhancements and theming to make it easy on eyes. I generally prefer dark themes as it looks cool and tends towards Solarized Dark. While generally most articles say to use a lighter background with a darker text, it depends on various other conditions like the room lighting, and of course personal preference. Refer the Dark UI section of the post for citations.

1. Install Ayu colour scheme. We will be using Ayu Mirage with Solarized Dark theme. Add the following config under user settings.
{
    "color_scheme": "Packages/Solarized (Dark).tmTheme",
    "theme": "ayu-mirage.sublime-theme",
    "ui_big_tabs": false,
    "ui_fix_tab_labels": true,
    "ui_font_size_small": true,
    "ui_font_source_code_pro": true,
    "ui_separator": true,
    "ui_wide_scrollbars": true
}
2. Additional UI settings follows. Since I got used to Monaco font from early days of OS X, I tend to stick with it instead of newer ones like Menlo.
{
    "font_face": "Monaco",
    "font_options": [
        "subpixel_antialias"
    ],
    "font_size": 16,
    "gpu_window_buffer": true,
    "highlight_modified_tabs": true,
    "scroll_past_end": true,
    "show_full_path": true,
    "translate_tabs_to_spaces": true,
    "word_wrap": false
}

Other Enhancements
1. GitGutter to see source control changes.
2. SideBarEnhancements plugin which adds more options to the side bar right click menu.
3. EDN for Clojure EDN syntax highlighting.

Once done, the editor will look elegant as in the screenshot.



Dark UI
[1] Negative contrast computer displays (dark letters on a light background) generally provide a more legible image than positive contrast displays (light letters on a dark background). Either black characters on a white background or white characters on a black background have been found to be more visible than green, yellow-orange, blue or red characters.20

[2] The greatest difference among groups is on the black & white pair: while the majority of people without dyslexia prefer it (32.67%), only 13.64% of the participants with dyslexia chose black text on white background.
Participants without dyslexia tend to prefer color pairs with a higher color and brightness contrast while people with dyslexia read faster when color pairs have lower contrasts. For instance, the color pair which was the fastest to read by the participants with dyslexia was black & creme (mean of 0.214 for the fixation duration) while black text over yellow background presents the largest fixation duration mean (0.239 seconds). Although the pair off-black & off-white is the one recommended in Web design for dyslexics (Bradford, 2011), none of the users with dyslexia selected it. This pair presents the highest fixation duration mean for the participants without dyslexia (0.193 seconds).

[3] Maureen did mention two situations when a dark background makes sense.

However, if you are using a display in a dark environment, it’s better to use a dark background as it lets you keep your eyes dark adapted. That’s why controls for airplanes and GPS units for cars switch to a dark background at night. Usually, however, the results don’t look like symbols and text floating in the darkness of space…there’s still a sense of there being a dark surface to ground the view. So the concerns above are somewhat mitigated.

I’ve seen recommendations that white on black is better for aging eyes, and for people with low vision because it reduces the amount of light scattering and distorting the image.

[4] Chief among the reasons many of you have expressed for preferring dark backgrounds is the reduced strain placed on the eyes when staring at the screen for many hours. Many developers state that light text on a dark background is easier to read over longer periods of time than dark text on a light background.

Thursday, 7 December 2017

Integration Tests with pytest

pytest is a unit testing library and coupled with requests package can be used for writing integration tests. With integration tests, couple of things we would need is a way to distinguish between environments like local, staging, production, different datacenters and run tests in all or some specific tests per env and such.

1. We need a way to pass env during test runs. We will use pytest-variables for that which makes it easy to get variables passed in from command line available to the tests. Another way is to write conftest. So our requirements.txt file will now have pytest-variables[hjson]==1.5.1 apart from other packages.
2. We will name our pytest file starting which starts with test as something like test_api.py
3. Now on writing the tests. I would recommend that the python code be modular and well organised instead of spaghetti coding. Read Modular Programming with Python by Erik Westra if you are interested. We could separate parts into different file or have things organised under class and keep it under the same file. The point is separation of concerns, readability and maintainability (especially by other programmers). A thing with OOP is that go for composition as opposed to inheritance when possible.
4. We will go with how to write pytests in an object oriented fashion rather than just writing test_ functions. Let's call the below module as test_api.py
import pytest
import logging
import requests

__version__ = "0.0.42"  # corresponds to app version
logging.basicConfig(level=logging.DEBUG)
__timeout = 90  # seconds

def get_headers(props):
    """Return header map."""
    # ...

def http_post(params, data, headers=None):
    """Send an HTTP POST request."""
    return requests.post(params['server'] + params['endpoint'], data=data, headers=headers)

class FooAPI:
    """Sample Foo API helper class.""""

    def __init__:
        pass

@pytest.mark.timeout(__timeout)
class TestFooAPI:
    """Test Foo API service."""

    pytestmark = [pytest.mark.local, pytest.mark.ny1, pytest.mark.ams1, pytest.mark.ny1_green, pytest.mark.stg1]

    @pytest.fixture()
    def foo(self):
        """Return Foo object."""
        return FooAPI()

    def gen_name():
        """Standalone function that not pytest functions."""
        pass

    def test_add_user(self, variables, foo):
        """Test adding of user."""
        log = logging.getLogger('test_add_user')
        resp = foo.create_request(variables)
        headers = get_headers(variables)
        http_resp = http_post(variables, data, headers)
        assert http_resp.status_code == 200

    @pytest.mark.ams1_beta
    def test_beta_endpoint(self, variables, foo):
        """Test beta endpoint."""
        log = logging.getLogger("test_beta_endpoint")
        data = {'users': base64.b64encode('something'), 'bar': 'baz'}
        start = time.time()
        headers = get_headers(variables)
        log.info("\nheaders: %s", headers)
        http_resp = http_post(variables, data, headers)
        rtt = time.time() - start
        log.debug("\nrtt: %s\n", rtt)
        body = json.loads(http_resp.content)
        assert body['status'] is True
        assert body['code'] == 0
        assert http_resp.status_code == 200, 'Expecting 200 OK status code.'

class TestBarAPI:
    # ....

Any class that starts with Test and any function that starts with test will be executed by pytest by default. However these behaviour can be configured. Fixtures are variables that are passed to each test as argument before it runs. There are various pytest markers. If whole pytest functions within a pytest class needs to executed by many different environments, we can pass specify them as pytestmark = [pytest.mark.en1, pytest.mark.env2, ...]. The env1, env2 correspond to env config file name. And if we need to execute specific tests against specific env then, exclude them from the above list and annotate it above that particular function as @pytest.mark.ams1_beta, which means, if the env passed via command line is ams1_beta, only then this test will execute and since the env is not specified common in the pytestmark list, other tests will be skipped. Example usecase is running sanity tests (a subset of tests) against productions at a specific interval, where we do not have to run the full testsuite. The variables argument contains the config values defined in the config json which will be available to the test methods as a python dictionary.
5. The configuration files are under config directory. Example local.json is below. The name of the file corresponds to the env which we will be passing to the pytest via command line.
{
    "server": "http://localhost:8080",
    "endpoint": "/foo/test/",
    "key": "local_key.pem",
    "cert": "local_cert.pem",
    "env": "local",
    "json": true
}
6. Tying it all together using shell script. It is better to use virtualenv. So below script does a bit of housekeeping before running the tests.
#!/bin/bash

# Integration test with pytest.

TEST_DIR=test_api
VENV_DIR=venv
CONFIG_DIR=config

function activateVirtualEnv {
    echo "Activating venv .."
    if ! hash virtualenv 2>/dev/null; then
        echo "virtualenv not found. Installing .."
        pip install virtualenv
    fi
    virtualenv $VENV_DIR
    source $VENV_DIR/bin/activate
}

function installDeps {
    echo "Installing dependencies .."
    pip install -r ../requirements.txt
}

function prepareForTest {
    echo "Running integration test .."
    activateVirtualEnv
    installDeps
}

function deactivateVirtualEnv {
    echo "Deactivating venv .."
    deactivate
}

function displayhelp {
    echo "Usage: $0 options"
    echo "where options include:"
    echo "  local    run test in local env"
    echo "  ams1     run test in Amsterdam production datacenter 1"
    echo "  stg1     run test in staging enviroment 1"
}

# $1 result xml file name
# $2 env variable name
function run {
    echo "Running test against $2"
    # -n xdist flag
    # -s capture output
    # -v verbose
    # --variables The config file
    # -m marker
    py.test -n 4 -s -v --junitxml="$1" --variables "$CONFIG_DIR/$2.json" -m "$2" test_api.py
}

# $1 results.xml
# $2 env : local, rqa2, etc. corresponds to the filename in config
# To run against all machines in staging and production, use env as stg and prod respectively
# For integration test
# ./runtests.sh results.xml local
function main {
    result="$1"
    env="$2"
    echo "result: $result"
    echo "env: $env"
    echo "config: $CONFIG_DIR"

    cd $TEST_DIR
    prepareForTest
    if [[ $env == "stg" ]]; then
        run stg1.xml stg1
        run stg2.xml stg2
        python merge_junit_results.py stg1.xml stg2.xml > $result
    elif [[ $env == "prod" ]]; then
        run ny1.xml ny1
        run ams1.xml ams1
        python merge_junit_results.py ny1.xml ams2.xml > $result
    else
        # individual
        run $result $env
    fi
    return_code=$?
    deactivateVirtualEnv
}

main $@

exit $return_code
The merge_junit_results.py can be obtained from the cgoldber gist.
To run the test we run the command ./runtests.sh ${result}.xml ${env-config-file-name}, which will check for virtualenv and such and run the test. To run the tests in parallel use xdist.
7. Sample reqirements.text file would look like
pytest==3.0.7
pytest-variables[hjson]==1.5.1
pytest-xdist==1.15.0
requests==2.13.0
pytest-timeout==1.2.0
8. To integrate the results with jenkins, it needs to know the final result of the testsuite which will be present in the results.xml file. To parse that use the code below.
#!/bin/python

"""Extract test result failure, error values for Jenkins job."""

import xml.etree.ElementTree as ET
import sys


def parse_result(file):
    """Parse the given results.xml file."""
    root = ET.parse(file).getroot()
    if root.attrib["failures"] == "0" and root.attrib["errors"] == "0":
        print "ALLTESTSUCCESS"
    else:
        print "ERROR"
        sys.exit(1)


if __name__ == '__main__':
    print "junit filename: ", sys.argv[1]
    parse_result(sys.argv[1])

Now we have a full blown integration testsuite which can be integrated with CI/CD pipeline.

For clarity, sample folder structure will look like below.
test
├── requirements.txt
├── runtests.sh
└── test_api
    ├── __init__.py
    ├── local.xml
    ├── config
    │   ├── ams1.json
    │   ├── ams1_beta.json
    │   ├── local.json
    │   ├── local_beta.json
    │   ├── ny1.json
    │   ├── ny1_beta.json
    │   ├── ny1_green.json
    │   └── vm.json
    ├── merge_junit_results.py
    ├── parse_xml.py
    ├── result.xml
    ├── test_api.py
    └── venv

Wednesday, 6 December 2017

Passcode Lock iPhone even with TouchID Enabled

I found a way to force lock iPhone that has TouchID enabled. If done as below, then the phone requires passcode to unlock even when TouchID is configured to unlock the device.
1. Enable AssistiveTouch from Accessibility.
2. And under AssistiveTouch->Custom Actions, say for Long Press set it to SOS.

Now when we need to passphrase lock, long press the assistive touch button, which will dial the emergency SOS number. Cancel the call immediately and the phone gets locked. The only problem is that it makes the emergency sound when dialling.

Or second option is to press the unlock button and tap the wrong finger five times and the phone will lock itself. Whichever is easy works. Now how would the second option work with FaceID?

Tuesday, 28 November 2017

Dynamic Binary Analysis Using Hopper Disassembler on OS X

Dynamic code analysis in reverse engineering binaries is where we execute the binary for analysis, adding breakpoints, inspecting processor state and such. This is very useful to understand the program's algorithm and values much easier than static binary analysis. However when we analyse malware, running them is dangerous. But we can have an isolated machine where the program can be run and analysed remotely. This is an explanation of these techniques as well as a tutorial on how to reverse OS X crackme challenge (1-Sandwitch.zip | mirror).
This is a small app (by HAWKE, 2009) which requires us to find the correct serial key. The goal is to produce a keygen as opposed to patching. Patching is just easy as changing one instruction.

1. Load the Sandwich.app in Hopper Disassembler, choose the default and proceed to disassembly screen.
2. Instead of remote server, we will connect to the current system using Hopper Debugger Server. Download, install and run the app which will launch the debug server locally.

3. Now in the Hopper, navigate to Debug->Select Debugger... and the server will be listed under local servers section. Double click to launch the server window.
4. The path will be auto filled to something like /Users/.../Sandwich.app/Contents/MacOS/Sandwich. Click the Play icon under controls which will run the Sandwich.app under the debugger.
5. Check the left pane for the available methods. We can see -[SandwichAppDelegate validateSerial:] method. Check the pseudo code to get an idea of the algorithm.

6. Now is the time to add breakpoints so that we can step and check register values as the program runs. So add to the initial address 00001b2d 55 push ebp under that section.
7. Go back to the running program and enter some serial number and click validate, which will make the debugger stop at the breakpoint.
8. The first conditional check is cmp eax, 0x13 which checks if the serial number is 19 decimals long.

9. Second check is mov dword [esp+0x28+var_20], 0x2034 ; @"-" which then checks the number of components separated by - and compares it with cmp eax, 0x4, which means, the serial key has four dashes in it. So the format is xxxx-xxxx-xxxx-xxxxx

10. If all the above passes, it gets each block (chunk separated by dash) and check if length is four.

11. Next it takes the first block 0x0 gets the int value and place it into esi register mov esi, eax.

12. Then it takes the second block 0x1, get the int value and adds it to the block 1 esi value from above step and place it into esi register itself.

13. There is no check for third block which means it can be any random characters of length four.
14. The last block is constructed by computing shift arithmetic right by two of the current esi sar esi, 0x2 from step 12 and then subtracting it from 0x19c5 the HEX for 6597 loaded into edx and then comparing the obtained value to the last block from the serial key loaded into eax

15. If it match, the serial key is correct and a success dialog will be shown. Now that we know the algorithm, we can write the keygen.


The keygen code
#!/usr/bin/python

"""Sandwich OS X crackme keygen by kadaj.

App: https://reverse.put.as/wp-content/uploads/2010/05/1-Sandwich.zip
Tue 28 Nov 17
"""

import random

class SandwichKeygen:
    """Sandwich key generator."""

    __block_3_mn = 6597  # block 3 magic number

    def _gen_random(self):
        """Return random number in range of 4 digits."""
        return random.randrange(1000, 9999)

    def _get_block(self):
        """Get a block containing 4 digits."""
        return str(self._gen_random())

    def _get_block_3(self, block_1, block_2):
        """Generate the last block from block 1."""
        sar_2 = (int(block_1) + int(block_2)) >> 2
        return str(self.__block_3_mn - sar_2)

    def generate(self):
        """Generates the key."""
        block_0 = self._get_block()
        block_1 = self._get_block()
        return '-'.join([block_0, block_1, str(self._gen_random()), self._get_block_3(block_0, block_1)])

if __name__ == '__main__':
    keygen = SandwichKeygen()
    print keygen.generate()

# Few keys
1111-2222-0000-5764
2681-8944-2259-3691
6773-1497-8343-4530
5903-2519-1731-4492
9613-1094-3343-3921

Tuesday, 14 November 2017

Cherry Blossoms with CherryPy

CherryPy is a python web framework which I really like. It is minimalistic in nature, is very pleasant to work with, and still we can accomplish much. Below I will detail a basic web app setup with CherryPy demonstrating basic usage which one can expand further.
# blossoms.py
"""A blossoms server with CherryPy."""

import cherrypy
import os

class CherryBlossom(object):
    """The server route handler methods."""

    def __init__(self):
        pass

    @cherrypy.expose
    def index(self):
        """The root url, when not mapped to static response, this action will be invoked."""
        return self.about()

    @cherrypy.expose
    def about(self):
        return 'Enjoy cherry blossoms.'

    @cherrypy.expose(['greet'])
    @cherrypy.tools.json_out()
    def get_greeting_message(self, name):
        """Return greetings to the given name in JSON format."""
        return {'msg': 'Greetings %s' % name'}

    @cherrypy.tools.register('before_finalize', priority=60)
    def secureheaders():
        """Update server response headers."""
        headers = cherrypy.response.headers
        headers['X-Frame-Options'] = 'DENY'
        headers['X-XSS-Protection'] = '1; mode=block'
        headers['X-Powered-By'] = 'Cherry Blossoms'
        headers['Server'] = 'Sakura'

class Blossoms(object):
    """Server configuration and initialisation."""

    def __init__(self):
        pass

    def start(self):
        """Start the service."""
        blossom = CherryBlossom()
        # routes
        dispatcher = cherrypy.dispatch.RoutesDispatcher()
        dispatcher.explicit = False
        dispatcher.connect('home', '/', blossom.index)
        dispatcher.connect('about', '/about/', controller=blossom, action='about', conditions=dict(method=['GET']))
        dispatcher.connect('greetings', '/greet/{name}/', controller=blossom, action='greet', conditions=dict(method=['GET']))
        # config
        root_dir = os.path.dirname(os.path.abspath(__file__))
        global_conf = root_dir + '/blossoms.conf'
        cherrypy.config.update(global_conf)
        conf = {
            '/': {
                'request.dispatch' : dispatcher,
                'tools.staticdir.dir': root_dir
            }
        }
        app = cherrypy.tree.mount(None, '/', global_conf)
        app.merge(conf)
        if hasattr(cherrypy.engine, "signal_handler"):
            cherrypy.engine.signal_handler.subscribe()
        if hasattr(cherrypy.engine, "console_control_handler"):
            cherrypy.engine.console_control_handler.subscribe()
        cherrypy.engine.start()
        cherrypy.engine.block()
# pyserver.py
from blossoms import Blossoms

blossom = Blossoms()
blossom.start()
The configuration are present in the blossoms.conf file.
[global]
server.socket_host: '127.0.0.1'
server.socket_port: 1729
server.thread_pool: 8

[/]
tools.gzip.on: True
tools.staticdir.on: True
tools.staticdir.index: 'index.html'
tools.staticdir.debug: True,
tools.secureheaders.on = True,
tools.response_headers.on: True,
tools.response.stream: True,
tools.sessions.on = True
tools.sessions.secure = True
tools.sessions.httponly = True
log.screen: True
Calling the /, FQDN alone is configured to serve static content. The GET endpoints are /greet/{name} and /about/. The RoutesDispatcher uses routes package to do the dispatch. The requirements.txt is below.
cherrypy==11.1.0
routes==2.4.1
We load the config and merge it with few updated setting from the code before starting the server. We have a server that handles static content, does REST based routing, custom response headers, JSON, html responses, with configurable settings. Enjoy cherry blossoms.