QuickCheck for C using STL set

In QuickCheck version 1.42.1 we released eqc_cpp, a library to help testing C++ code with Quviq’s QuickCheck.

QuickCheck already had support for automatic generation of tests for the C language. Functions and types defined in C got automatically their counterpart in Erlang, such that QuickCheck could call these functions with the right arguments.

This has now been extended to C++. With eqc_cpp C++ is handled by auto-generating C-wrapper code and then using the exisiting C-binding in QuickCheck. Since it is infeasible to write and maintain a C++ parser we have based the C++ support on the SWIG Tool, a widely used tool for creating wrapper code for C/C++ (although using it to access C++ from C is most likely not the normal use case). (Note: For template instantiation to work you need to use SWIG >= 3.0.12)

STL set

As a running example we are going to test some of the functionality of the Standard Template Library (STL) set implementation This example is complicated enough to illustrate the concept, while not having to write and explain lots of C++ code. We decided to test the following functions in STL set:

  • bool empty() const; – Check whether the set is empty.
  • size_t size() const; – Return the size of (number of elements in) the set.
  • iterator find(const Elem &x); – Look for a particular value in the set. If the element is present an iterator pointing to the element is returned, otherwise an iterator pointing to set<Elem>::end() is returned.
  • pair<iterator, bool> insert(const Elem &x); – Insert an element into the set. The result is a pair. The first component of the pair is an iterator pointing to the inserted element (or to the already existing element if it was already present). The second component is true if the element was inserted, and false if it was already present.
  • size_t erase(const Elem &x); – Remove an element from the set, returns 1 if the element was removed, and 0 otherwise.
  • iterator begin(); – Returns an iterator referring to the first element in the set container (or the past-the-end element if the set is empty).
  • iterator end(); – Returns an iterator referring to the past-the-end element in the set container.

Calling C++ from Erlang

For eqc_cpp to generate wrapper code for STL set we need to produce a SWIG-file that contains the necessary definitions. A first attempt could look like this (where we name an instance of a set of integers set_int):

// Module name is not used by eqc_cpp, but SWIG dictates it should be present
%module unused

%{
  #include <set>
%}

namespace std {

  template <class T, class U> class pair{};

  template <class Elem>
  class set {
    public:

      class iterator;

      set();
      bool empty() const;
      size_t size() const;
      void clear();
      iterator begin();
      iterator end();
      iterator find(const Elem &x);
      pair<iterator, bool> insert(const Elem &x);
      size_t erase(const Elem &x);
  };
}

%template(set_int) std::set<int>;

Note that we need to include a declaration of the pair template, since it is used in the return type of insert. With the SWIG-file you define what part of the C++ API you want to create wrappers for. It should completely describe this API.

This SWIG-file can be given to the Erlang module eqc_cpp (for example in an Erlang shell):

284> eqc_cpp:start(set, [{swig_file, "stl_set.swg}, verbose]).

+++ Called swig                                                  [   136ms ]
SWIG Parse result:
  - Class instances:
      set_int              ==> std::set<int>
      set_int_iterator     ==> std::set<int>::iterator
      pair                 ==> std::pair<std::set<int>::iterator, bool>  (auto instantiated)

+++ Called eqc_c:start:
  eqc_c:start(set,
              [{c_src, "/tmp/__eqc_tmp1504519839758850_wrap.h"},{additional_files, "/tmp/__eqc_tmp1504519839759607_wrap.o"},{cflags, "-lstdc++"},
               {rename, []}, verbose]).
< ... >
ok

And this results in the creation of an Erlang module set with Erlang functions for the defined API:

289> set:module_info(exports).
[{pair_new,0},
 {set_int_clear,1},
 {set_int_delete,1},
 {set_int_empty,1},
 {set_int_begin,1},
 {set_int_end,1},
 {set_int_iterator_delete,1},
 {set_int_iterator_new,0},
 {module_info,0},
 {module_info,1},
 {set_int_new,0},
 {set_int_size,1},
 {set_int_erase,2},
 {set_int_find,2},
 {set_int_insert,2}]

Some quick experiments in the shell shows that the wrapping seems to work, but also reveals a problem:

292> S = set:set_int_new().
{ptr,"set_int",140682019733552}
293> set:set_int_size(S).
0
295> set:set_int_insert(S, 7).
{ptr,"pair",140682020782096}
296> set:set_int_find(S, 7).
{ptr,"set_int_iterator",140682019733600}

Since we cannot directly access objects in C the wrapping translates all functions that return an object (or a reference to an object) into a function that returns a pointer to something opaque. Thus there is not really much we can do with the result from insert or from find, since all we have is a pointer to something that represents an iterator or a pair respectively. The solution is to provide more detail to the SWIG-file, and thus reveal more details to eqc_cpp. We expand the definitions of iterator and pair into:

template <class T, class U>
class pair {
  public:
    T first;
    U second;
};

class iterator {
  public:
    bool operator ==(const iterator &i) const;
    iterator operator ++() const;
    Elem operator *() const;
};

The result are functions that can be used to destruct a pair (pair_get_first and pair_get_second) and functions that can do something sensible with iterators (set_int_iterator==, set_int_iterator++, and set_int_iterator*).

Now we have created a link between Erlang and C++ such that from Erlang we can call C++ functions and inspect the results.

Modelling sets

We now have everything that we need to be able to write a simple QuickCheck state machine model for STL set. QuickCheck models are written in Erlang and are used to automatically generate test cases from.

In order to keep the model simple each generate test case makes use of a single set. The state is a simple list-representation of a set. The tested property is a pretty standard eqc_statem property where the eqc_cpp is started in ?SETUP and the C-binding is restarted before each test, lastly the set is created and passed to run_commands.

-module(stl_set_eqc).

-compile([export_all, nowarn_export_all]).

-include_lib("eqc/include/eqc.hrl").
-include_lib("eqc/include/eqc_statem.hrl").

-define(SWG_FILE, "./stl_set.swg").

%% -- State ------------------------------------------------------------------

initial_state() -> [].

%% -- Property ---------------------------------------------------------------

%% The property.
prop_ok() ->
  ?SETUP(fun() -> start(), fun() -> ok end end,
  ?FORALL(Cmds, commands(?MODULE),
  begin
    eqc_c:restart(),
    S = set:set_int_new(),
    HSR={_, _, Res} = run_commands(Cmds, [{set, S}]),
    pretty_commands(?MODULE, Cmds, HSR,
    check_command_names(?MODULE, Cmds,
      Res == ok))
  end)).

start() ->
  eqc_cpp:start(set, [{swig_file, ?SWG_FILE}, verbose]).

The only thing missing now is to model each operation. The easiest operation to model is size. It takes a single parameter, the set, it does not affect the model state (no _next) and the postcondition just compares the size of the set with the length of our model state list:

%% --- size ---

size_args(_) -> [{var, set}].

size(S) -> set:set_int_size(S).

size_post(Xs, [_S], V) ->
  eq(V, length(Xs)).

Of course it is quite boring to measure the size of the empty set, we better insert something into the set. Calls to insert take the set and a random (small) integer. The model state is updated by adding the possibly new number – since we use lists:umerge we will keep an ordered list (we come back to this in a short while) without duplicates. The postcondition we check is that the model and the system under test (SUT) agree on whether the inserted number was new or not.

%% --- insert ---

insert_args(_) -> [{var, set}, int()].

insert(S, N) ->
  P = set:set_int_insert(S, N),
  set:pair_get_second(P).

insert_next(Xs, _V, [_S, X]) ->
  lists:umerge([X], Xs).

insert_post(Xs, [_S, X], V) ->
  eq(V, not lists:member(X, Xs)).

Running the property with these two operations works well:

372> eqc:quickcheck(stl_set_eqc:prop_ok()).
....................................................................................................
OK, passed 100 tests

50.8% {eqc_cpp_stl_set_eqc,size,1}
49.2% {eqc_cpp_stl_set_eqc,insert,2}
true

Modelling the remaining operations, find and erase, proceeds similarly. The most interesting bit is getting the postcondition for find right – we need to compare the result of set:set_int_find with set:set_int_end using the overloaded iterator== operator. We choose to perform these operations in the find call rather than the postcondition to make test cases easier to read.

%% --- find ---

find_args(_) -> [{var, set}, int()].

find(S, X) ->
  not set:'set_int_iterator=='(set:set_int_find(S, X), set:set_int_end(S)).

find_post(Xs, [_Set, X], V) ->
  eq(V, lists:member(X, Xs)).

%% --- erase ---

erase_args(_) -> [{var, set}, int()].

erase(S, X) -> set:set_int_erase(S, X).

erase_next(Xs, _V, [_S, X]) ->
  Xs -- [X].

erase_post(Xs, [_S, X], V) ->
  eq(V, length([ Y || Y <- Xs, X == Y ])).

As mentioned above we do keep the model list sorted. There is a good reason for this, if we read the manual for STL set we find the following information “Internally, the elements in a set are always sorted following a specific strict weak ordering criterion…”. To test if this is really true we need to compare the model state (that should be sorted correctly) with the result of iterating through the C++ set. We model this as the operation to_list, that iterate from set_int_begin to set_int_end checking that the set is indeed sorted.

%% --- to_list ---

to_list_args(_) -> [{var, set}].

to_list(S) -> iterate(set:set_int_begin(S), set:set_int_end(S)).

to_list_post(Xs, [_S], V) ->
  eq(V, Xs).

iterate(I, End) -> iterate(I, End, []).

iterate(I, End, Acc) ->
  case set:'set_int_iterator=='(I, End) of
    true  -> lists:reverse(Acc);
    false ->
      X = set:'set_int_iterator*'(I),
      iterate(set:'set_int_iterator++'(I), End, [X | Acc])
  end.

At times it is rather awkward to do C/C++ operations on the Erlang side. In these cases one can use the SWIG extend-functionality, which makes it possible to write bits of code that are included in the wrapping as if it was part of the C++ code. For example, we saw that it was a bit cumbersome to compare the result of find with end to conclude whether the number was found in the set or not. A better approach could be to add an is_member function “to” the set object. This can be achieved by adding the following to the SWIG file (inside the set class):

%extend{
  bool is_member(const Elem &x){
    return $self->find(x) != $self->end();
  }
}

$self is the SWIG syntax for this and we simply do the comparison on the C++ side (which is trivial because of the overloaded ==) and end up with a function that just returns a boolean. Internally eqc_cpp handle this by adding a C++ function with this functionality, and then proceeds to wrap the added function just like the other member functions. (Note: This is not a real member function, thus there is no access to private class variables etc.)

If we now add is_member to the model we could remove the find function from it if we like. We are able to test the find function through the simpler to write is_member function:

%% --- is_member ---

is_member_args(_) -> [{var, set}, int()].

is_member(S, X) ->
  set:set_int_is_member(S, X).

is_member_post(Xs, [_Set, X], V) ->
  eq(V, lists:member(X, Xs)).

Tests generated by the resulting model run just fine, after all the STL set functionality is rather well tested and we did not really expect to find any issues.

381> eqc:quickcheck(stl_set_eqc:prop_ok()).
....................................................................................................
OK, passed 100 tests

17.9% {eqc_cpp_stl_set_eqc,erase,2}
17.5% {eqc_cpp_stl_set_eqc,is_member,2}
16.9% {eqc_cpp_stl_set_eqc,find,2}
16.2% {eqc_cpp_stl_set_eqc,insert,2}
15.8% {eqc_cpp_stl_set_eqc,size,1}
15.7% {eqc_cpp_stl_set_eqc,to_list,1}
true

To make sure we could find issues if there were any, we can plant a bug in the specification:

to_list_post(Xs, [_S], V) ->
  eq(V, lists:reverse(Xs)).

Instead of requiring that to_list return the elements in sorted order we specify that they should be returned in reverse sorted order. Now running the tests quickly finds a counterexample:

381> eqc:quickcheck(stl_set_eqc:prop_ok()).
....Failed! After 4 tests.
< ... >
Shrinking xxxxxxx..xx...xxx.xxxxx(6 times)
[{model,eqc_cpp_stl_set_eqc},
 {set,{var,1},{call,eqc_cpp_stl_set_eqc,insert,[{var,set},0]}},
 {set,{var,2},{call,eqc_cpp_stl_set_eqc,insert,[{var,set},1]}},
 {set,{var,3},{call,eqc_cpp_stl_set_eqc,to_list,[{var,set}]}}]

eqc_cpp_stl_set_eqc:insert(Set, 0) -> true
eqc_cpp_stl_set_eqc:insert(Set, 1) -> true
eqc_cpp_stl_set_eqc:to_list(Set) -> [0, 1]

Reason:
  Post-condition failed:
  [0, 1] /= [1, 0]
false

Final version

Below are the final versions of the SWIG-file and the QuickCheck state machine model (without the bug).

// Module name is not used by eqc_cpp, but SWIG dictates it should be present
%module unused

%{
  #include <set>
%}

namespace std {

  template <class T, class U>
  class pair {
    public:
      T first;
      U second;
  };

  template <class Elem>
  class set {
    public:
      class iterator {
        public:
          bool operator ==(const iterator &i) const;
          iterator operator ++() const;
          Elem operator *() const;
      };

      set();
      bool empty() const;
      size_t size() const;
      void clear();
      iterator begin();
      iterator end();
      iterator find(const Elem &x);
      pair<iterator, bool> insert(const Elem &x);
      size_t erase(const Elem &x);

      %extend{
        bool is_member(const Elem &x){
          return $self->find(x) != $self->end();
        }
      }
  };

}

%template(set_int) std::set<int>;
%%% File        : stl_set_eqc.erl
%%% Author      : Ulf Norell
%%% Description : Testing STL set implementation
%%% Created     : 28 Aug 2017 by Ulf Norell
-module(stl_set_eqc).

-compile([export_all, nowarn_export_all]).


-include_lib("eqc/include/eqc.hrl").
-include_lib("eqc/include/eqc_statem.hrl").

-define(SWG_FILE, "../examples/eqc_cpp/stl_set/stl_set.swg").

%% -- State ------------------------------------------------------------------

initial_state() -> [].

%% -- Operations -------------------------------------------------------------

%% --- insert ---

insert_args(_) -> [{var, set}, int()].

insert(S, N) ->
  P = set:set_int_insert(S, N),
  set:pair_get_second(P).

insert_next(Xs, _V, [_S, X]) ->
  lists:umerge([X], Xs).

insert_post(Xs, [_S, X], V) ->
  eq(V, not lists:member(X, Xs)).

%% --- find ---

find_args(_) -> [{var, set}, int()].

find(S, X) ->
  not set:'set_int_iterator=='(set:set_int_find(S, X), set:set_int_end(S)).

find_post(Xs, [_Set, X], V) ->
  eq(V, lists:member(X, Xs)).

%% --- is_member ---

is_member_args(_) -> [{var, set}, int()].

is_member(S, X) ->
  set:set_int_is_member(S, X).

is_member_post(Xs, [_Set, X], V) ->
  eq(V, lists:member(X, Xs)).

%% --- erase ---

erase_args(_) -> [{var, set}, int()].

erase(S, X) -> set:set_int_erase(S, X).

erase_next(Xs, _V, [_S, X]) ->
  Xs -- [X].

erase_post(Xs, [_S, X], V) ->
  eq(V, length([ Y || Y <- Xs, X == Y ])).

%% --- size ---

size_args(_) -> [{var, set}].

size(S) -> set:set_int_size(S).

size_post(Xs, [_S], V) ->
  eq(V, length(Xs)).

%% --- to_list ---

to_list_args(_) -> [{var, set}].

to_list(S) -> iterate(set:set_int_begin(S), set:set_int_end(S)).

to_list_post(Xs, [_S], V) ->
  eq(V, Xs).

%% -- Common -----------------------------------------------------------------

iterate(I, End) -> iterate(I, End, []).

iterate(I, End, Acc) ->
  case set:'set_int_iterator=='(I, End) of
    true  -> lists:reverse(Acc);
    false ->
      X = set:'set_int_iterator*'(I),
      iterate(set:'set_int_iterator++'(I), End, [X | Acc])
  end.

%% -- Property ---------------------------------------------------------------

%% The property.
prop_ok(V) ->
  ?SETUP(fun() -> start(), fun() -> ok end end,
  ?FORALL(Cmds, commands(?MODULE),
  begin
    eqc_c:restart(),
    S = set:set_int_new(),
    HSR={_, _, Res} = run_commands(?MODULE, Cmds, [{set, S}]),
    pretty_commands(?MODULE, Cmds, HSR,
    check_command_names(?MODULE, Cmds,
      Res == ok))
  end)).

start() ->
  start([]).

start(ExtraOpts) ->
  eqc_cpp:start(set, [{swig_file, ?SWG_FILE}] ++ ExtraOpts).