Oniguruma

Regular expressions engine

History

Unclear.

FAQ

What is it?

Database connector to Postgresql

How cool is it?

Rather.

Competitors?

re2, Boost

When to use it?

When you work with encodings or named groups.

Is it dead?

No, it seems to be used in ruby.

Where is it?

https://web.archive.org/web/20150807014439/http://www.geocities.jp/kosako3/oniguruma/

How to use it

The documentation is fragmented but the examples are rather good.

Download

file: 1_download.sh

#!/bin/bash

mkdir -p tmp
cd ./tmp
#wget -q https://web.archive.org/web/20150807014439/http://www.geocities.jp/kosako3/oniguruma/archive/onig-5.9.6.tar.gz
#tar -xaf onig-5.9.6.tar.gz
cd onig-5.9.6
./configure --enable-static --disable-shared
make

Init

The great thing is that that you can use name groups directly. Here is one examples, albeit a little modified.

file: main.c

#include <stdio.h>
#include <string.h>
#include "oniguruma.h"

static int name_callback(const UChar* name, const UChar* name_end,
    int ngroup_num, int* group_nums, regex_t* reg, void* arg) {
    int i, gn, ref;
    char* s;
    OnigRegion *region = (OnigRegion* )arg;

    for (i = 0; i < ngroup_num; i++) {
        gn = group_nums[i];
        ref = onig_name_to_backref_number(reg, name, name_end, region);
        s = (ref == gn ? "*" : "");
        fprintf(stderr, "%s (%d): ", name, gn);
        fprintf(stderr, "(%d-%d) %s\n", region->beg[gn], region->end[gn], s);
    }

    return 0;  /* 0: continue */
}

int main(int argc, char* argv[]) {
    int r;
    unsigned char *start, *range, *end;
    regex_t* reg;
    OnigErrorInfo einfo;
    OnigRegion *region;

    static UChar* pattern = (UChar* )"(?<yyyy>\\d{4}*)-(?<mm>\\d{1,2})-(?<dd>\\d{1,2}) (?<HH>\\d{1,2}):(?<MM>\\d{1,2}):(?<SS>\\d{1,2}) (?<tz>.*)";
    static UChar* str = (UChar* )"2117-31-12 13:11:11 CET";

    r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
    ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
    if (r != ONIG_NORMAL) {
        char s[ONIG_MAX_ERROR_MESSAGE_LEN];
        onig_error_code_to_str(s, r, &einfo);
        fprintf(stderr, "ERROR: %s\n", s);

        return -1;
    }

    fprintf(stderr, "number of names: %d\n", onig_number_of_names(reg));

    region = onig_region_new();

    end   = str + strlen((char* )str);
    start = str;
    range = end;
    r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
    if (r >= 0) {
        fprintf(stderr, "match at %d\n\n", r);
        r = onig_foreach_name(reg, name_callback, (void* )region);
    }
    else if (r == ONIG_MISMATCH) {
        fprintf(stderr, "search fail\n");
    } else { /* error */
        char s[ONIG_MAX_ERROR_MESSAGE_LEN];
        onig_error_code_to_str(s, r);
        return -1;
    }

    onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
    onig_free(reg);
    onig_end();

    return 0;
}

Compile and run

file: 2_compile_and_run.sh

#!/bin/bash
clang -o tmp/main \
    -Wall -Wextra -Weverything \
        -Wno-padded \
        -Wno-unused-parameter \
        -Wno-strict-prototypes \
    main.c \
    -I./tmp/onig-5.9.6 \
    -L./tmp/onig-5.9.6/.libs -lonig \

./tmp/main

This gives

number of names: 7
match at 0

dd (3): (8-10) *
MM (5): (14-16) *
mm (2): (5-7) *
HH (4): (11-13) *
SS (6): (17-19) *
tz (7): (20-23) *
yyyy (1): (0-4) *

Further reading:

We only tipped the top of the iceberg here. There is much more:

Check the examples to see how you can work with the encoding.

If you want to know more about named groups in regular expressions you could check the python documentation

If you are not quite familiar with regular expressions, you should read the excellent book by Jeffrey E.F. Friedl.