/*
 *  $Id: cmap_classify.c 29036 2025-12-22 19:19:15Z yeti-dn $
 *  Copyright (C) 2021 David Necas (Yeti).
 *  E-mail: yeti@gwyddion.net.
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include "config.h"
#include <string.h>
#include <gtk/gtk.h>
#include <libgwyddion/gwymacros.h>
#include <libgwyddion/gwymath.h>
#include <libgwyddion/gwythreads.h>
#include <libprocess/gwyprocess.h>
#include <libgwydgets/gwydataview.h>
#include <libgwydgets/gwystock.h>
#include <libgwymodule/gwymodule-cmap.h>
#include <app/gwyapp.h>
#include <app/gwymoduleutils.h>
#include "libgwyddion/gwyomp.h"

#define RUN_MODES (GWY_RUN_INTERACTIVE)

#define PURCRIT 1e-2
#define CDEBUG 0

enum {
    PREVIEW_SIZE = 360,
    /* 16 is good for current processors; increasing it to 32 might not hurt in the future. */
    BLOCK_SIZE = 16,
    NCRITERIA = 10,
    MAXRULES = 100,
    MAXBRANCHES = 20, //10
    MIN_N = 1,
    MAX_N = 100,
    RESPONSE_SHOW_TREE = 1000,
};

typedef enum {
    CLASSIFY_QUANTITY_VALUE = 0,
    CLASSIFY_QUANTITY_LOG   = 1,
    CLASSIFY_QUANTITY_SLOPE = 2,
    CLASSIFY_QUANTITY_NTYPES
} ClassifyQuantityType;

typedef enum {
    CLASSIFY_DISPLAY_MASK_A   = 0,
    CLASSIFY_DISPLAY_MASK_B   = 1,
    CLASSIFY_DISPLAY_RESULT_A = 2,
    CLASSIFY_DISPLAY_RESULT_B = 3,
    CLASSIFY_DISPLAY_NTYPES
} ClassifyDisplayType;

enum {
    PARAM_MASK_A,
    PARAM_MASK_B,
    PARAM_DISPLAY,
    PARAM_ENABLE_ABSCISSA,
    PARAM_ABSCISSA,
    PARAM_OUTLIERS,
    PARAM_N,
    PARAM_SUBSAMPLING,
    PARAM_THRESHOLD,
    PARAM_SEED,
    INFO_RESULT,
    PARAM_USE,
    PARAM_CURVE = PARAM_USE + NCRITERIA,
    PARAM_ENABLE_SEGMENT = PARAM_CURVE + NCRITERIA,
    PARAM_SEGMENT = PARAM_ENABLE_SEGMENT + NCRITERIA,
    PARAM_QUANTITY = PARAM_SEGMENT + NCRITERIA,
};

typedef gdouble (*LineStatFunc)(GwyDataLine *dataline);

typedef struct {
    const gchar *name;
    GwyLineStatQuantity quantity;
    LineStatFunc func;
} ClassifyQuantityInfo;

typedef struct
{
    int nrules;
    int rule_parameter[MAXRULES];   //which parameter (dfield) to use for decision
    double rule_threshold[MAXRULES]; //threshold for decision
    int rule_goto_high[MAXRULES];    //points to either result (-1, -2) or next rule
    int rule_goto_low[MAXRULES];     //points to either result (-1, -2) or next rule
} CTree;

typedef struct {
    CTree *ct;
    int ntrees;
    gdouble threshold;
} Classifier;


typedef struct {
    GwyParams *params;
    GwyLawn *lawn;
    GwyDataField *result_a;
    GwyDataField *result_b;
    GwyDataField *mask_a;
    GwyDataField *mask_b;
    gint nsegments;
    Classifier *cl;
} ModuleArgs;

typedef struct {
    ModuleArgs *args;
    GtkWidget *dialog;
    GwyParamTable *table_options;
    GwyContainer *data;
    GtkWidget **combo_curve;
    GtkWidget **combo_segment;
    GtkWidget **combo_quantity;
    GtkWidget **toggle_segment_enable;
    GtkWidget **toggle_use;
} ModuleGUI;

static gboolean                    module_register        (void);
static GwyParamDef*                define_module_params   (void);
static void                        classify               (GwyContainer *data,
                                                           GwyRunType runtype);
static GwyDialogOutcome            run_gui                (ModuleArgs *args,
                                                           GwyContainer *data,
                                                           gint id);
static void                        param_changed          (ModuleGUI *gui,
                                                           gint id);
static void                        dialog_response        (GwyDialog *dialog,
                                                           gint response,
                                                           ModuleGUI *gui);
static void                        preview                (gpointer user_data);
static gdouble                     get_data_line_range    (GwyDataLine *dataline);
static gdouble                     get_data_line_Rz       (GwyDataLine *dataline);
static gdouble                     get_data_line_Rt       (GwyDataLine *dataline);
static gdouble                     get_data_line_slope    (GwyDataLine *dataline);
static gdouble                     get_max_pos_abscissa   (GwyDataLine *dataline,
                                                           GwyDataLine *abscissa);
static gdouble                     get_min_pos_abscissa   (GwyDataLine *dataline,
                                                           GwyDataLine *abscissa);
static gint                        extract_data_line      (GwyLawn *lawn,
                                                           GwyDataLine *target,
                                                           gint col,
                                                           gint row,
                                                           gint curveno,
                                                           gint segment);
static const ClassifyQuantityInfo* find_quantity          (GwyLineStatQuantity quantity);
static void                        use_changed            (GtkToggleButton *check,
                                                           ModuleGUI *gui);
static void                        curve_changed          (GtkComboBox *combo,
                                                           ModuleGUI *gui);
static void                        segment_changed        (GtkComboBox *combo,
                                                           ModuleGUI *gui);
static void                        quantity_changed       (GtkComboBox *combo,
                                                           ModuleGUI *gui);
static void                        enable_segment_changed (GtkToggleButton *check,
                                                           ModuleGUI *gui);
static void                        sanitise_params        (ModuleArgs *args);

static void                        run_classification     (ModuleGUI *gui);
static gchar*                      create_report          (ModuleGUI *gui);
static Classifier*                 classifier_init        (gint ntrees,
                                                           gdouble threshold);
static void                        classifier_destroy     (Classifier *cl);
static gboolean                    classifier_train_full  (Classifier *cl,
                                                           GwyDataField **cldata,
                                                           gint ncriteria,
                                                           gdouble subsampling,
                                                           gint seed,
                                                           GwyDataField *mask_a,
                                                           GwyDataField *mask_b);
static void                        classifier_run         (Classifier *cl,
                                                           GwyDataField **cldata,
                                                           GwyDataField *result_a,
                                                           GwyDataField *result_b);
static void                        ctree_run              (CTree *ct,
                                                           GwyDataField **cldata,
                                                           GwyDataField *result_a,
                                                           GwyDataField *result_b);

/* XXX: This is more or less identical to tools/linestat.c. */
static const ClassifyQuantityInfo quantities[] =  {
    { N_("Mean"),            GWY_LINE_STAT_MEAN,         gwy_data_line_get_avg,       },
    { N_("Median"),          GWY_LINE_STAT_MEDIAN,       gwy_data_line_get_median,    },
    { N_("Minimum"),         GWY_LINE_STAT_MINIMUM,      gwy_data_line_get_min,       },
    { N_("Maximum"),         GWY_LINE_STAT_MAXIMUM,      gwy_data_line_get_max,       },
    { N_("Rq (RMS)"),        GWY_LINE_STAT_RMS,          gwy_data_line_get_rms,       },
    { N_("Developed length"),   GWY_LINE_STAT_LENGTH,    gwy_data_line_get_length,    },
    { N_("Slope"),              GWY_LINE_STAT_SLOPE,     get_data_line_slope,         },
    { N_("tan β<sub>0</sub>"),  GWY_LINE_STAT_TAN_BETA0, gwy_data_line_get_tan_beta0, },
    { N_("Ra"),              GWY_LINE_STAT_RA,           gwy_data_line_get_ra,        },
    { N_("Rz"),              GWY_LINE_STAT_RZ,           get_data_line_Rz,            },
    { N_("Rt"),              GWY_LINE_STAT_RT,           get_data_line_Rt,            },
    { N_("Skew"),            GWY_LINE_STAT_SKEW,         gwy_data_line_get_skew,      },
    { N_("Excess kurtosis"), GWY_LINE_STAT_KURTOSIS,     gwy_data_line_get_kurtosis,  },
    { N_("Range"),           GWY_LINE_STAT_RANGE,        get_data_line_range,         },
    { N_("Variation"),          GWY_LINE_STAT_VARIATION, gwy_data_line_get_variation, },
    { N_("Min. position"),   GWY_LINE_STAT_MINPOS,       gwy_data_line_min_pos_i,     },
    { N_("Max. position"),   GWY_LINE_STAT_MAXPOS,       gwy_data_line_max_pos_i,     },
};

static GwyModuleInfo module_info = {
    GWY_MODULE_ABI_VERSION,
    &module_register,
    N_("Classifies curves in a curve map"),
    "Petr Klapetek <klapetek@gwyddion.net>",
    "1.0",
    "Petr Klapetek & David Nečas (Yeti)",
    "2025",
};

GWY_MODULE_QUERY2(module_info, cmap_classify)

static gboolean
module_register(void)
{
    gwy_curve_map_func_register("cmap_classify",
                                (GwyCurveMapFunc)&classify,
                                N_("/_Classify..."),
                                NULL,
                                RUN_MODES,
                                GWY_MENU_FLAG_CURVE_MAP,
                                N_("Classify curves"));

    return TRUE;
}

static GwyParamDef*
define_module_params(void)
{
    guint i;
    static GwyEnum *functions = NULL;
    static GwyParamDef *paramdef = NULL;

    static const GwyEnum displays[] = {
        { N_("Mask A"),   CLASSIFY_DISPLAY_MASK_A   },
        { N_("Mask B"),   CLASSIFY_DISPLAY_MASK_B   },
        { N_("Result A"), CLASSIFY_DISPLAY_RESULT_A },
        { N_("Result B"), CLASSIFY_DISPLAY_RESULT_B },
    };

    if (paramdef)
        return paramdef;

    functions = gwy_enum_fill_from_struct(NULL, G_N_ELEMENTS(quantities), quantities, sizeof(ClassifyQuantityInfo),
                                          G_STRUCT_OFFSET(ClassifyQuantityInfo, name),
                                          G_STRUCT_OFFSET(ClassifyQuantityInfo, quantity));

    paramdef = gwy_param_def_new();
    gwy_param_def_set_function_name(paramdef, gwy_curve_map_func_current());
    gwy_param_def_add_int(paramdef, PARAM_N, "n", _("Populatio_n"), MIN_N, MAX_N, 10);
    gwy_param_def_add_percentage(paramdef, PARAM_SUBSAMPLING, "n", _("_Subsampling"), 0.6);
    gwy_param_def_add_percentage(paramdef, PARAM_THRESHOLD, "threshold", _("_Success threshold"), 0.6);
    gwy_param_def_add_image_id(paramdef, PARAM_MASK_A, "mask_a", _("Mask A"));
    gwy_param_def_add_image_id(paramdef, PARAM_MASK_B, "mask_b", _("Mask B"));
    gwy_param_def_add_lawn_curve(paramdef, PARAM_ABSCISSA, "abscissa", _("Abscissa"));
    gwy_param_def_add_boolean(paramdef, PARAM_ENABLE_ABSCISSA, "enable_abscissa", NULL, FALSE);
    gwy_param_def_add_boolean(paramdef, PARAM_OUTLIERS, "outliers", _("Ignore _outliers"), FALSE);
    gwy_param_def_add_seed(paramdef, PARAM_SEED, "seed", NULL);
    gwy_param_def_add_gwyenum(paramdef, PARAM_DISPLAY, "display", _("_Display"),
                              displays, G_N_ELEMENTS(displays), CLASSIFY_DISPLAY_MASK_A);

    for (i = 0; i < NCRITERIA; i++)
        gwy_param_def_add_boolean(paramdef, PARAM_USE + i, g_strdup_printf("use%u", i),
                                  NULL, FALSE);
    for (i = 0; i < NCRITERIA; i++)
        gwy_param_def_add_lawn_curve(paramdef, PARAM_CURVE + i, g_strdup_printf("curve%u", i), NULL);

    for (i = 0; i < NCRITERIA; i++)
        gwy_param_def_add_lawn_segment(paramdef, PARAM_SEGMENT + i, g_strdup_printf("segment%u", i), NULL);

    for (i = 0; i < NCRITERIA; i++)
        gwy_param_def_add_boolean(paramdef, PARAM_ENABLE_SEGMENT + i, g_strdup_printf("enable_segment%u", i),
                                  NULL, FALSE);
    for (i = 0; i < NCRITERIA; i++)
        gwy_param_def_add_gwyenum(paramdef, PARAM_QUANTITY + i, g_strdup_printf("quantity%u", i), NULL,
                                  functions, G_N_ELEMENTS(quantities), GWY_LINE_STAT_MEAN);

    return paramdef;
}

static void
classify(GwyContainer *data, GwyRunType runtype)
{
    ModuleArgs args;
    GwyLawn *lawn = NULL;
    GwyDialogOutcome outcome = GWY_DIALOG_PROCEED;
    gint oldid, newid;

    g_return_if_fail(runtype & RUN_MODES);
    g_return_if_fail(g_type_from_name("GwyLayerPoint"));

    gwy_app_data_browser_get_current(GWY_APP_LAWN, &lawn,
                                     GWY_APP_LAWN_ID, &oldid,
                                     0);
    g_return_if_fail(GWY_IS_LAWN(lawn));
    args.lawn = lawn;
    args.mask_a = NULL;
    args.mask_b = NULL;
    args.cl = NULL;
    args.result_a = gwy_data_field_new(gwy_lawn_get_xres(lawn), gwy_lawn_get_yres(lawn),
                                       gwy_lawn_get_xreal(lawn), gwy_lawn_get_yreal(lawn), TRUE);
    gwy_data_field_set_xoffset(args.result_a, gwy_lawn_get_xoffset(lawn));
    gwy_data_field_set_yoffset(args.result_a, gwy_lawn_get_yoffset(lawn));
    gwy_si_unit_assign(gwy_data_field_get_si_unit_xy(args.result_a), gwy_lawn_get_si_unit_xy(lawn));
    args.result_b = gwy_data_field_new_alike(args.result_a, TRUE);

    args.nsegments = gwy_lawn_get_n_segments(lawn);
    args.params = gwy_params_new_from_settings(define_module_params());

    sanitise_params(&args);

    if (runtype == GWY_RUN_INTERACTIVE) {
        outcome = run_gui(&args, data, oldid);
        gwy_params_save_to_settings(args.params);
        if (outcome == GWY_DIALOG_CANCEL)
            goto end;
    }
    if (outcome == GWY_DIALOG_HAVE_RESULT) {
        newid = gwy_app_data_browser_add_data_field(args.result_a, data, TRUE);
        gwy_app_set_data_field_title(data, newid, _("Classify result A"));
        gwy_app_channel_log_add(data, -1, newid, "cmap::cmap_classify", NULL);

        newid = gwy_app_data_browser_add_data_field(args.result_b, data, TRUE);
        gwy_app_set_data_field_title(data, newid, _("Classify result B"));
        gwy_app_channel_log_add(data, -1, newid, "cmap::cmap_classify", NULL);
     }

end:
    g_object_unref(args.result_a);
    g_object_unref(args.result_b);
//    g_object_unref(args.mask);
    g_object_unref(args.params);
}

static gboolean
mask_filter(GwyContainer *data, gint id, gpointer user_data)
{
    GwyDataField *preview = (GwyDataField*)user_data;
    GwyDataField *field = gwy_container_get_object(data, gwy_app_get_data_key_for_id(id));
    GwyDataField *mask = NULL;

    if (!gwy_container_gis_object(data, gwy_app_get_mask_key_for_id(id), &mask))
        return FALSE;

    return !gwy_data_field_check_compatibility(preview, field,
                                               GWY_DATA_COMPATIBILITY_LATERAL);
}

static GwyDialogOutcome
run_gui(ModuleArgs *args, GwyContainer *data, gint id)
{
    GtkWidget *hbox, *label, *dataview, *align, *combo, *table, *toggle;
    GwyParamTable *param_table;
    GwyParams *params = args->params;
    GwyDialog *dialog;
    ModuleGUI gui;
    GwyDataField *field;
    GwyDialogOutcome outcome;
    const guchar *gradient;
    guint i, row = 0, vshift = 0;
    static const GwyEnum functions[] = {
    { N_("Mean"),              GWY_LINE_STAT_MEAN       },
    { N_("Median"),            GWY_LINE_STAT_MEDIAN     },
    { N_("Minimum"),           GWY_LINE_STAT_MINIMUM    },
    { N_("Maximum"),           GWY_LINE_STAT_MAXIMUM    },
    { N_("Rq (RMS)"),          GWY_LINE_STAT_RMS        },
    { N_("Developed length"),  GWY_LINE_STAT_LENGTH,    },
    { N_("Slope"),             GWY_LINE_STAT_SLOPE,     },
    { N_("tan β<sub>0</sub>"), GWY_LINE_STAT_TAN_BETA0, },
    { N_("Ra"),                GWY_LINE_STAT_RA         },
    { N_("Rz"),                GWY_LINE_STAT_RZ         },
    { N_("Rt"),                GWY_LINE_STAT_RT         },
    { N_("Skew"),              GWY_LINE_STAT_SKEW       },
    { N_("Excess kurtosis"),   GWY_LINE_STAT_KURTOSIS   },
    { N_("Range"),             GWY_LINE_STAT_RANGE      },
    { N_("Variation"),         GWY_LINE_STAT_VARIATION, },
    { N_("Min. position"),     GWY_LINE_STAT_MINPOS     },
    { N_("Max. position"),     GWY_LINE_STAT_MAXPOS     },
    };

    gwy_clear(&gui, 1);
    gui.args = args;
    gui.data = gwy_container_new();
    field = gwy_container_get_object(data, gwy_app_get_lawn_preview_key_for_id(id));
    gwy_container_set_object(gui.data, gwy_app_get_data_key_for_id(0), field);
    if (gwy_container_gis_string(data, gwy_app_get_lawn_palette_key_for_id(id), &gradient))
        gwy_container_set_const_string(gui.data, gwy_app_get_data_palette_key_for_id(0), gradient);

    gui.dialog = gwy_dialog_new(_("Classify"));
    dialog = GWY_DIALOG(gui.dialog);
//    gwy_dialog_add_buttons(dialog, GWY_RESPONSE_UPDATE, GWY_RESPONSE_RESET, GTK_RESPONSE_CANCEL, GTK_RESPONSE_OK, 0);

    gtk_dialog_add_button(GTK_DIALOG(dialog), _("_Update"), GWY_RESPONSE_UPDATE);
    gtk_dialog_add_button(GTK_DIALOG(dialog), _("_Show tree"), RESPONSE_SHOW_TREE);
    gtk_dialog_add_button(GTK_DIALOG(dialog), _("_Reset"), GWY_RESPONSE_RESET);
    gtk_dialog_add_button(GTK_DIALOG(dialog), GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL);
    gtk_dialog_add_button(GTK_DIALOG(dialog), GTK_STOCK_OK, GTK_RESPONSE_OK);
    gtk_dialog_set_default_response(GTK_DIALOG(dialog), GTK_RESPONSE_OK);

    hbox = gwy_hbox_new(0);
    gwy_dialog_add_content(GWY_DIALOG(gui.dialog), hbox, TRUE, TRUE, 0);

    align = gtk_alignment_new(0.0, 0.0, 0.0, 0.0);
    gtk_box_pack_start(GTK_BOX(hbox), align, FALSE, FALSE, 0);

    dataview = gwy_create_preview(gui.data, 0, PREVIEW_SIZE, TRUE);
    gtk_container_add(GTK_CONTAINER(align), dataview);

    param_table = gui.table_options = gwy_param_table_new(args->params);
    gwy_param_table_append_image_id(param_table, PARAM_MASK_A);
    gwy_param_table_data_id_set_filter(param_table, PARAM_MASK_A, mask_filter, field, NULL);

    gwy_param_table_append_image_id(param_table, PARAM_MASK_B);
    gwy_param_table_data_id_set_filter(param_table, PARAM_MASK_B, mask_filter, field, NULL);

    gwy_param_table_append_lawn_curve(param_table, PARAM_ABSCISSA, args->lawn);
    gwy_param_table_add_enabler(param_table, PARAM_ENABLE_ABSCISSA, PARAM_ABSCISSA);
    gwy_param_table_append_checkbox(param_table, PARAM_OUTLIERS);
    gwy_param_table_append_slider(param_table, PARAM_N);
    gwy_param_table_append_slider(param_table, PARAM_SUBSAMPLING);
    gwy_param_table_append_slider(param_table, PARAM_THRESHOLD);
    gwy_param_table_append_seed(param_table, PARAM_SEED);

    gwy_param_table_append_info(param_table, INFO_RESULT, _("Result:"));

    gwy_param_table_append_combo(param_table, PARAM_DISPLAY);

    gtk_box_pack_start(GTK_BOX(hbox), gwy_param_table_widget(param_table), TRUE, TRUE, 0);
    gwy_dialog_add_param_table(dialog, param_table);

    hbox = gwy_hbox_new(20);
    gwy_dialog_add_content(GWY_DIALOG(gui.dialog), hbox, TRUE, TRUE, 4);

    table = gtk_table_new(12, 12, FALSE);
    gtk_box_pack_start(GTK_BOX(hbox), table, TRUE, TRUE, 0);

    label = gwy_label_new_header(_("Curve"));
    gtk_misc_set_alignment(GTK_MISC(label), 0.0, 0.5);
    gtk_table_attach(GTK_TABLE(table), label, 1, 2, row, row+1,
                     GTK_FILL, 0, 0, 0);

    label = gwy_label_new_header(_("Segment"));
    gtk_misc_set_alignment(GTK_MISC(label), 0.0, 0.5);
    gtk_table_attach(GTK_TABLE(table), label, 2, 4, row, row+1,
                     GTK_FILL, 0, 0, 0);

    label = gwy_label_new_header(_("Quantity"));
    gtk_misc_set_alignment(GTK_MISC(label), 0.0, 0.5);
    gtk_table_attach(GTK_TABLE(table), label, 4, 5, row, row+1,
                     GTK_FILL, 0, 0, 0);

    gtk_table_set_col_spacing(GTK_TABLE(table), 5, 20);

    label = gwy_label_new_header(_("Curve"));
    gtk_misc_set_alignment(GTK_MISC(label), 0.0, 0.5);
    gtk_table_attach(GTK_TABLE(table), label, 7, 8, row, row+1,
                     GTK_FILL, 0, 0, 0);

    label = gwy_label_new_header(_("Segment"));
    gtk_misc_set_alignment(GTK_MISC(label), 0.0, 0.5);
    gtk_table_attach(GTK_TABLE(table), label, 8, 10, row, row+1,
                     GTK_FILL, 0, 0, 0);

    label = gwy_label_new_header(_("Quantity"));
    gtk_misc_set_alignment(GTK_MISC(label), 0.0, 0.5);
    gtk_table_attach(GTK_TABLE(table), label, 10, 11, row, row+1,
                     GTK_FILL, 0, 0, 0);
    row++;

    gui.combo_curve = g_new(GtkWidget*, NCRITERIA);
    gui.combo_segment = g_new(GtkWidget*, NCRITERIA);
    gui.combo_quantity = g_new(GtkWidget*, NCRITERIA);
    gui.toggle_use = g_new(GtkWidget*, NCRITERIA);
    gui.toggle_segment_enable = g_new(GtkWidget*, NCRITERIA);

    for (i = 0; i < NCRITERIA; i++) {
        if (i == NCRITERIA/2) {
            vshift = 6;
            row = 1;
        }
        gui.toggle_use[i] = toggle = gtk_check_button_new();
        g_object_set_data(G_OBJECT(toggle), "id", GUINT_TO_POINTER(i));
        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle),
                                     gwy_params_get_boolean(params, PARAM_USE + i));
        g_signal_connect(toggle, "toggled",
                         G_CALLBACK(use_changed), &gui);
        gtk_table_attach(GTK_TABLE(table), toggle,
                         vshift, vshift+1, row, row+1, GTK_FILL, 0, 0, 0);

        gui.combo_curve[i] = combo = gwy_combo_box_lawn_curve_new(G_CALLBACK(curve_changed), &gui,
                                                                  args->lawn,
                                                                  gwy_params_get_int(params,
                                                                                     PARAM_CURVE + i));
        g_object_set_data(G_OBJECT(combo), "id", GUINT_TO_POINTER(i));
        gtk_table_attach(GTK_TABLE(table), combo,
                         vshift+1, vshift+2, row, row+1, GTK_FILL, 0, 0, 0);

        gui.toggle_segment_enable[i] = toggle = gtk_check_button_new();
        g_object_set_data(G_OBJECT(toggle), "id", GUINT_TO_POINTER(i));
        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle),
                                     gwy_params_get_boolean(params, PARAM_ENABLE_SEGMENT + i));
        g_signal_connect(toggle, "toggled",
                         G_CALLBACK(enable_segment_changed), &gui);
        gtk_table_attach(GTK_TABLE(table), toggle,
                         vshift+2, vshift+3, row, row+1, GTK_FILL, 0, 0, 0);

        if (args->nsegments) {
            gui.combo_segment[i] = combo = gwy_combo_box_lawn_segment_new(G_CALLBACK(segment_changed), &gui,
                                                                          args->lawn,
                                                                          gwy_params_get_int(params,
                                                                                             PARAM_SEGMENT + i));
            g_object_set_data(G_OBJECT(combo), "id", GUINT_TO_POINTER(i));
            gtk_table_attach(GTK_TABLE(table), combo,
                             vshift+3, vshift+4, row, row+1, GTK_FILL, 0, 0, 0);
        }

        gui.combo_quantity[i] = combo = gwy_enum_combo_box_new(functions, G_N_ELEMENTS(functions),
                                                               G_CALLBACK(quantity_changed), &gui,
                                                               gwy_params_get_enum(params, PARAM_QUANTITY + i), TRUE);
        g_object_set_data(G_OBJECT(combo), "id", GUINT_TO_POINTER(i));
        gtk_table_attach(GTK_TABLE(table), combo,
                         vshift+4, vshift+5, row, row+1, GTK_FILL, 0, 0, 0);
        row++;
    }
    g_signal_connect_swapped(gui.table_options, "param-changed", G_CALLBACK(param_changed), &gui);
    g_signal_connect_after(dialog, "response", G_CALLBACK(dialog_response), &gui);
    gwy_dialog_set_preview_func(dialog, GWY_PREVIEW_UPON_REQUEST, preview, &gui, NULL);

    outcome = gwy_dialog_run(dialog);

    g_object_unref(gui.data);

    return outcome;
}

static void
dialog_response(GwyDialog *dialog, gint response, ModuleGUI *gui)
{
    GtkWidget *mdialog, *label, *swindow;

    if (response == RESPONSE_SHOW_TREE) {
       mdialog = gtk_dialog_new_with_buttons(_("Classifier tree"),
                                             GTK_WINDOW(dialog),
                                             GTK_DIALOG_MODAL | GTK_DIALOG_DESTROY_WITH_PARENT,
                                             /* XXX: Use a stock OK button! */
                                             _("_OK"),
                                             GTK_RESPONSE_NONE,
                                             NULL);

       swindow = gtk_scrolled_window_new(NULL, NULL);
       gtk_widget_set_size_request(swindow, 400, 400);
       gtk_container_add(GTK_CONTAINER(gtk_dialog_get_content_area(GTK_DIALOG(mdialog))), swindow);
       label = gtk_label_new(create_report(gui));

       gtk_scrolled_window_add_with_viewport(GTK_SCROLLED_WINDOW(swindow), label);

       g_signal_connect_swapped(mdialog,
                                "response",
                                G_CALLBACK(gtk_widget_destroy),
                                mdialog);

       gtk_widget_show_all(mdialog);
    }
}

static void
use_changed(GtkToggleButton *check, ModuleGUI *gui)
{
    gboolean checked = gtk_toggle_button_get_active(check);
    guint i = GPOINTER_TO_UINT(g_object_get_data(G_OBJECT(check), "id"));
    gwy_params_set_boolean(gui->args->params, PARAM_USE + i, checked);
    gwy_param_table_param_changed(gui->table_options, PARAM_USE + i);
}

static void
curve_changed(GtkComboBox *combo, ModuleGUI *gui)
{
    guint i = GPOINTER_TO_UINT(g_object_get_data(G_OBJECT(combo), "id"));
    gwy_params_set_curve(gui->args->params, PARAM_CURVE + i, gtk_combo_box_get_active(combo));
    gwy_param_table_param_changed(gui->table_options, PARAM_CURVE + i);
}
static void
segment_changed(GtkComboBox *combo, ModuleGUI *gui)
{
    guint i = GPOINTER_TO_UINT(g_object_get_data(G_OBJECT(combo), "id"));
    gwy_params_set_int(gui->args->params, PARAM_SEGMENT + i, gtk_combo_box_get_active(combo));
    gwy_param_table_param_changed(gui->table_options, PARAM_SEGMENT + i);
}
static void
quantity_changed(GtkComboBox *combo, ModuleGUI *gui)
{
    guint i = GPOINTER_TO_UINT(g_object_get_data(G_OBJECT(combo), "id"));
    gwy_params_set_enum(gui->args->params, PARAM_QUANTITY + i, gtk_combo_box_get_active(combo));
    gwy_param_table_param_changed(gui->table_options, PARAM_QUANTITY + i);
}
static void
enable_segment_changed(GtkToggleButton *check, ModuleGUI *gui)
{
    gboolean checked = gtk_toggle_button_get_active(check);
    guint i = GPOINTER_TO_UINT(g_object_get_data(G_OBJECT(check), "id"));
    gwy_params_set_boolean(gui->args->params, PARAM_ENABLE_SEGMENT + i, checked);
    gwy_param_table_param_changed(gui->table_options, PARAM_ENABLE_SEGMENT + i);
}

static void
param_changed(ModuleGUI *gui, gint id)
{
    gint i;
    ModuleArgs *args = gui->args;
    GwyParams *params = args->params;

    if (id < 0 || id == PARAM_MASK_A)
        args->mask_a = gwy_params_get_mask(params, PARAM_MASK_A);

    if (id < 0 || id == PARAM_MASK_B)
        args->mask_b = gwy_params_get_mask(params, PARAM_MASK_B);

    if (id < 0 || (id >= PARAM_ENABLE_SEGMENT && id < (PARAM_ENABLE_SEGMENT + NCRITERIA))
        || (id >= PARAM_USE && id < (PARAM_USE + NCRITERIA))) {
        for (i = 0 ; i < NCRITERIA; i++) {
            if (args->nsegments)
                gtk_widget_set_sensitive(gui->combo_segment[i],
                                         gwy_params_get_boolean(params, PARAM_USE + i)
                                         && gwy_params_get_boolean(params, PARAM_ENABLE_SEGMENT + i));
            gtk_widget_set_sensitive(gui->combo_curve[i],
                                     gwy_params_get_boolean(params, PARAM_USE + i));
            gtk_widget_set_sensitive(gui->combo_quantity[i],
                                     gwy_params_get_boolean(params, PARAM_USE + i));
            gtk_widget_set_sensitive(gui->toggle_segment_enable[i],
                                     gwy_params_get_boolean(params, PARAM_USE + i));
        }
    }
    if (id != PARAM_DISPLAY) {
        gwy_data_field_clear(args->result_a);
        gwy_data_field_clear(args->result_b);
        gwy_param_table_info_set_valuestr(gui->table_options, INFO_RESULT, _("No result yet"));
        gtk_dialog_set_response_sensitive(GTK_DIALOG(gui->dialog), RESPONSE_SHOW_TREE, FALSE);
    }

    if (id < 0 || id == PARAM_DISPLAY || id == PARAM_MASK_A || id == PARAM_MASK_B) {
        ClassifyDisplayType display = gwy_params_get_enum(params, PARAM_DISPLAY);
        if (display == CLASSIFY_DISPLAY_MASK_A && args->mask_a)
           gwy_container_set_object_by_name(gui->data, "/0/mask", args->mask_a);
        else if (display == CLASSIFY_DISPLAY_MASK_B && args->mask_b)
           gwy_container_set_object_by_name(gui->data, "/0/mask", args->mask_b);
        else if (display == CLASSIFY_DISPLAY_RESULT_A && args->result_a)
           gwy_container_set_object_by_name(gui->data, "/0/mask", args->result_a);
        else if (display == CLASSIFY_DISPLAY_RESULT_B && args->result_b)
           gwy_container_set_object_by_name(gui->data, "/0/mask", args->result_b);
    }
    if (id != PARAM_DISPLAY)
        gwy_dialog_invalidate(GWY_DIALOG(gui->dialog));
}

static void
preview(gpointer user_data)
{
    ModuleGUI *gui = (ModuleGUI*)user_data;
    ModuleArgs *args = gui->args;

    run_classification(gui);
    gwy_data_field_data_changed(args->result_a);
    gwy_data_field_data_changed(args->result_b);
    gwy_dialog_have_result(GWY_DIALOG(gui->dialog));
}

static gdouble
get_data_line_range(GwyDataLine *dataline)
{
    gdouble min, max;

    gwy_data_line_get_min_max(dataline, &min, &max);
    return max - min;
}

/*
static gdouble
gwy_data_line_get_Rt_destructive(GwyDataLine *dline)
{
    gwy_data_line_add(dline, -gwy_data_line_get_avg(dline));
    return gwy_data_line_get_xtm(dline, 1, 1);
}

static gdouble
gwy_data_line_get_Rz_destructive(GwyDataLine *dline)
{
    gwy_data_line_add(dline, -gwy_data_line_get_avg(dline));
    return gwy_data_line_get_xtm(dline, 5, 1);
}
*/

static gdouble //FIXME: not yet using abscissa, just for reference
get_data_line_Rt(GwyDataLine *dataline)
{
    gwy_data_line_add(dataline, -gwy_data_line_get_avg(dataline));
    return gwy_data_line_get_xtm(dataline, 1, 1);
}

static gdouble //FIXME: not yet using abscissa, just for reference
get_data_line_Rz(GwyDataLine *dataline)
{
    gwy_data_line_add(dataline, -gwy_data_line_get_avg(dataline));
    return gwy_data_line_get_xtm(dataline, 5, 1);
}

static gdouble  //FIXME: not yet using abscissa, just for reference
get_data_line_slope(GwyDataLine *dataline)
{
    gdouble v;
    gwy_data_line_get_line_coeffs(dataline, NULL, &v);
    return v*dataline->res/dataline->real;
}

static gdouble
get_max_pos_abscissa(GwyDataLine *dataline, GwyDataLine *abscissa)
{
    gint i, res, maxpos = 0;
    gdouble *data, *adata;
    gdouble max = G_MINDOUBLE;

    res = gwy_data_line_get_res(dataline);
    data = gwy_data_line_get_data(dataline);
    adata = gwy_data_line_get_data(abscissa);

    for (i = 0; i < res; i++) {
        if (G_UNLIKELY(*(data++) > max)) {
            max = *data;
            maxpos = i;
        }
    }

    return adata[maxpos];
}
static gdouble
get_min_pos_abscissa(GwyDataLine *dataline, GwyDataLine *abscissa)
{
    gint i, res, minpos = 0;
    gdouble *data, *adata;
    gdouble min = G_MAXDOUBLE;

    res = gwy_data_line_get_res(dataline);
    data = gwy_data_line_get_data(dataline);
    adata = gwy_data_line_get_data(abscissa);

    for (i = 0; i < res; i++) {
        if (G_UNLIKELY(*(data++) < min)) {
            min = *data;
            minpos = i;
        }
    }
    return adata[minpos];
}

static gchar*
format_goto(gint n)
{
    if (n == -1)
        return g_strdup("mark pixel as Result A");
    else if (n == -2)
        return g_strdup("mark pixel as Result B");
    else
        return g_strdup_printf("go to Rule %d", n);
}

static gchar*
format_rule(GwyLawn *lawn, CTree *ct, gint rule, gint curveno, gint segment, gint quantity)
{
    gchar datastring[50];
    GwySIUnit *siunit = gwy_lawn_get_si_unit_curve(lawn, curveno);
    GwySIValueFormat *vf = gwy_si_unit_get_format(siunit,
                                                  GWY_SI_UNIT_FORMAT_VFMARKUP,
                                                  ct->rule_threshold[rule],
                                                  NULL);

    if (segment >= 0)
        g_snprintf(datastring, sizeof(datastring), "segment %s of %s",
                   gwy_lawn_get_segment_label(lawn, segment),
                   gwy_lawn_get_curve_label(lawn, curveno));
    else
        g_snprintf(datastring, sizeof(datastring), "%s",
                   gwy_lawn_get_curve_label(lawn, curveno));

    return g_strdup_printf("if %s of %s\n     is bigger than %.*f %s",
                           quantities[quantity].name,
                           datastring,
                           vf->precision + 1,
                           ct->rule_threshold[rule]/vf->magnitude,
                           vf->units);

    gwy_si_unit_value_format_free(vf);
}

static gchar*
create_report(ModuleGUI *gui)
{
    ModuleArgs *args = gui->args;
    GwyParams *params = args->params;
    GString *text = g_string_new(NULL);
    Classifier *cl = args->cl;
    CTree ct;
    gint n;
    gint curveno, segment, quantity;
    gboolean segment_enabled;

    if (cl == NULL)
        return NULL;

    ct = cl->ct[0];
    for (n = 0; n < ct.nrules; n++) {  //vsechno spatne, neiterovat po ne, pravidla jsou v jinem poradi
        g_string_append_printf(text, _("Rule %d of %d:\n"), n, ct.nrules);
        curveno = gwy_params_get_int(params, PARAM_CURVE + ct.rule_parameter[n]);
        segment_enabled = gwy_params_get_boolean(params, PARAM_ENABLE_SEGMENT + ct.rule_parameter[n]);
        segment = segment_enabled ? gwy_params_get_int(params, PARAM_SEGMENT + ct.rule_parameter[n]) : -1;
        quantity = gwy_params_get_enum(params, PARAM_QUANTITY + ct.rule_parameter[n]);

        g_string_append_printf(text, "%s\n", format_rule(args->lawn, &ct, n, curveno, segment, quantity));

        g_string_append_printf(text, "     %s\n", format_goto(ct.rule_goto_high[n]));
        g_string_append_printf(text, "else %s\n", format_goto(ct.rule_goto_low[n]));
    }

    return g_string_free(text, FALSE);
}

static gint
extract_data_line(GwyLawn *lawn, GwyDataLine *target,
                  gint col, gint row, gint curveno, gint segment)
{
    gint pos = 0, len;
    const gdouble *cdata;
    const gint *segments;
    gdouble *ldata;

    cdata = gwy_lawn_get_curve_data_const(lawn, col, row, curveno, &len);
    if (!len)
        return 0;

    if (segment >= 0) {
        segments = gwy_lawn_get_segments(lawn, col, row, NULL);
        pos = segments[2*segment];
        len = segments[2*segment + 1] - pos;
        if (!len)
            return 0;
    }

    gwy_data_line_resample(target, len, GWY_INTERPOLATION_NONE);
    ldata = gwy_data_line_get_data(target);
    gwy_assign(ldata, cdata + pos, len);
    gwy_data_line_set_real(target, len);

    return len;
}

static const ClassifyQuantityInfo*
find_quantity(GwyLineStatQuantity quantity)
{
    guint i;

    for (i = 0; i < G_N_ELEMENTS(quantities); i++) {
        if (quantities[i].quantity == quantity) {
            return quantities + i;
        }
    }
    g_assert_not_reached();
    return NULL;
}

static void
sanitise_params(G_GNUC_UNUSED ModuleArgs *args)
{
 //   GwyParams *params = args->params;
 //   GwyLawn *lawn = args->lawn;
}

/**  classifier, this is now same to images and should be unified **/

static void
run_classification(ModuleGUI *gui)
{
    GwyDataField *mask_a, *mask_b, *mask;
    GwyDataField **cldata;
    ModuleArgs *args = gui->args;
    GwyParams *params = args->params;
    gint ncriteria = 0;
    gchar report[80];
    gdouble *data, res_a, res_b;
    LineStatFunc lsfunc = NULL;
    GwyLawn *lawn = args->lawn;
    gint xres = gwy_lawn_get_xres(lawn), yres = gwy_lawn_get_yres(lawn);
    gint i, n;
    gboolean outliers = gwy_params_get_boolean(params, PARAM_OUTLIERS);
    gboolean abscissa_enabled = gwy_params_get_boolean(params, PARAM_ENABLE_ABSCISSA);
    gint ntrees = gwy_params_get_int(params, PARAM_N);
    gdouble subsampling = gwy_params_get_double(params, PARAM_SUBSAMPLING);
    gdouble threshold = gwy_params_get_double(params, PARAM_THRESHOLD);
    gint abscissa = gwy_params_get_int(params, PARAM_ABSCISSA);
    gint seed = gwy_params_get_int(params, PARAM_SEED);
    gint curveno, segment;
    GwyLineStatQuantity quantity;
    gboolean segment_enabled;

    mask_a = args->mask_a;
    mask_b = args->mask_b;

    for (i = 0; i < NCRITERIA; i++)
        if (gwy_params_get_boolean(params, PARAM_USE + i))
            ncriteria++;

    if (CDEBUG)
        printf("filling the data for %d criteria\n", ncriteria);

    //create the data sets - one field for each valid criterion, either allocated and filled or pointer
    cldata = g_new(GwyDataField*, ncriteria);
    for (i = 0; i < ncriteria; i++)
        cldata[i] = gwy_data_field_new(xres, yres, xres, yres, FALSE);

    n = 0;
    for (i = 0; i < NCRITERIA; i++) {
        if (!gwy_params_get_boolean(params, PARAM_USE + i))
            continue;

        curveno = gwy_params_get_int(params, PARAM_CURVE + i);
        segment_enabled = gwy_params_get_boolean(params, PARAM_ENABLE_SEGMENT + i);
        segment = (args->nsegments && segment_enabled) ? gwy_params_get_int(params, PARAM_SEGMENT + i) : -1;
        quantity = gwy_params_get_enum(params, PARAM_QUANTITY + i);
        lsfunc = find_quantity(quantity)->func;
        data = gwy_data_field_get_data(cldata[n]);

/*
#ifdef _OPENMP
#pragma omp parallel if(gwy_threads_are_enabled()) default(none) \
            shared(lawn,xres,yres,curveno,segment,data,lsfunc,abscissa,abscissa_enabled)
#endif
*/
        {
            GwyDataLine *adline = NULL, *dline = gwy_data_line_new(1, 1.0, FALSE);
            guint kfrom = 0;//gwy_omp_chunk_start(xres*yres);
            guint kto = xres*yres;//gwy_omp_chunk_end(xres*yres);
            guint k;

            if (abscissa_enabled)
                adline = gwy_data_line_new(1, 1.0, FALSE);

            for (k = kfrom; k < kto; k++) {
                if (extract_data_line(lawn, dline, k % xres, k/xres, curveno, segment)) {
                    if (abscissa_enabled && adline
                        && extract_data_line(lawn, adline, k % xres, k/xres, abscissa, segment)) {
                        if (quantity == GWY_LINE_STAT_MINPOS)
                            data[k] = get_min_pos_abscissa(dline, adline);
                        else if (quantity == GWY_LINE_STAT_MAXPOS)
                            data[k] = get_max_pos_abscissa(dline, adline);
                    } else
                        data[k] = lsfunc(dline);
                }
                else
                    data[k] = 0;
            }

            g_object_unref(dline);
            if (abscissa_enabled)
                g_object_unref(adline);
        }
        n++;
    }
    if (outliers) {
        mask = gwy_data_field_new_alike(cldata[0], FALSE);
        for (n = 0; n < ncriteria; n++) {
            gwy_data_field_mask_outliers(cldata[n], mask, 3.0);
            gwy_data_field_laplace_solve(cldata[n], mask, -1, 1.0);
        }
        gwy_object_unref(mask);
    }

    //train the classifier using the data set, mask_a and mask_b
    gwy_app_wait_start(GTK_WINDOW(gui->dialog), _("Training classifier..."));

    if (args->cl)
        classifier_destroy(args->cl);
    args->cl = classifier_init(ntrees, threshold);
    if (!classifier_train_full(args->cl, cldata, ncriteria, subsampling, seed,
                               mask_a, mask_b)) {
        gwy_app_wait_finish();
        for (i = 0; i < ncriteria; i++)
            gwy_object_unref(cldata[i]);
        g_free(cldata);
        return;
    }
    gwy_app_wait_finish();

    //run the classification, creating result_a and result_b
    if (!args->result_a)
        args->result_a = gwy_data_field_duplicate(mask_b);
    if (!args->result_b)
        args->result_b = gwy_data_field_duplicate(mask_a);

    classifier_run(args->cl, cldata, args->result_a, args->result_b);

    res_a = gwy_data_field_get_sum(args->result_a);
    res_b = gwy_data_field_get_sum(args->result_b);
    g_snprintf(report, sizeof(report), _("A: %d points, %2.1f %%, B: %d points, %2.1f %%"),
               (gint)res_a, 100*res_a/(res_a + res_b),
               (gint)res_b, 100*res_b/(res_a + res_b));
    gwy_param_table_info_set_valuestr(gui->table_options, INFO_RESULT, report);

    if (ntrees == 1)
        gtk_dialog_set_response_sensitive(GTK_DIALOG(gui->dialog), RESPONSE_SHOW_TREE, TRUE);

    if (CDEBUG)
        printf("freeing the data\n");
    for (i = 0; i < ncriteria; i++)
        gwy_object_unref(cldata[i]);
    g_free(cldata);
}

//mask_a: user defined mask a
//mask_b: user defined mask b
//result_a: computed mask a
//result_b: computed mask b
//selection: masked data will be evaluated only,
//or NULL for evaluating whole images (still based on mask_a, mask_b only).
//a_purity: how much the A is really filled with As
//b_purity: how much the B is really filled with Bs
static gdouble
get_score(GwyDataField *mask_a, GwyDataField *mask_b, GwyDataField *result_a, GwyDataField *result_b,
          GwyDataField *selection, gdouble *a_purity, gdouble *b_purity, gdouble *sumsa, gdouble *sumsb, gint *swap)
{
    gint i;
    gint n = gwy_data_field_get_xres(mask_a)*gwy_data_field_get_yres(mask_a);
    gdouble *ma, *mb, *ra, *rb, *s;
    gdouble sumainb, sumbina, nma, sumbinb, sumaina, nmb, paina, painb, pbina, pbinb;
    gboolean selall = 0;
    gdouble ginia, ginib;

    ma = gwy_data_field_get_data(mask_a);
    mb = gwy_data_field_get_data(mask_b);
    ra = gwy_data_field_get_data(result_a);
    rb = gwy_data_field_get_data(result_b);

    if (selection == NULL)
        selall = 1;
    else
        s = gwy_data_field_get_data(selection);

    sumaina = sumbina = nma = 0;
    sumainb = sumbinb = nmb = 0;
    for (i = 0; i < n; i++) {
        if (selall || s[i]) {
           sumaina += ma[i]*ra[i];
           sumbina += mb[i]*ra[i];
           nma += ma[i]*ra[i] + mb[i]*ra[i];
           sumainb += ma[i]*rb[i];
           sumbinb += mb[i]*rb[i];
           nmb += ma[i]*rb[i] + mb[i]*rb[i];
        }
    }
    if ((sumaina+sumbina) > 0) {
       paina = sumaina/(sumaina + sumbina);
       pbina = sumbina/(sumaina + sumbina);
    } else
        paina = pbina = 0;

    if ((sumainb+sumbinb) > 0) {
       painb = sumainb/(sumainb + sumbinb);
       pbinb = sumbinb/(sumainb + sumbinb);
    } else
        painb = pbinb = 0;

    ginia = paina*(1-paina) + pbina*(1-pbina);
    ginib = painb*(1-painb) + pbinb*(1-pbinb);

    *sumsa = sumaina + sumainb;
    *sumsb = sumbina + sumbinb;
    *a_purity = ginia;
    *b_purity = ginib;

    if (CDEBUG > 2)
        printf(" pura %g purb %g score %g  sumaina %g  sumainb %g  sumbina %g"
               "  sumbinb %g  nma %g nmb %g    painb %g pbinb %g\n",
               ginia, ginib, nma*ginia/(nma+nmb) + nmb*ginib/(nma+nmb),
               sumaina, sumainb, sumbina, sumbinb, nma, nmb, painb, pbinb);

    if ((sumbina+sumainb) > (sumaina+sumbinb)) {
        *swap = 1;
    }
    else {
        *swap = 0;
    }

    return nma*ginia/(nma+nmb) + nmb*ginib/(nma+nmb);
}

static void
print_ct(CTree *ct)
{
    int n;
    printf("Printing tree, it has %d rules\n", ct->nrules);
    for (n = 0; n < ct->nrules; n++) {
       printf("Node %d: ------------------------\n", n);
       printf("if quantity %d is bigger than %g:\n", ct->rule_parameter[n], ct->rule_threshold[n]);
       printf("     goto %d\n", ct->rule_goto_high[n]);
       printf("else goto %d\n", ct->rule_goto_low[n]);
       printf("--------------------------------\n");
    }
}

//find the best splitting criterion and threshold value
//returns index of the best criterion (field in cldata) for splitting,
//threshold to split most efficiently,
//purity of the found set A,
//purity of the found set B
//last crit is the criterion that was used last time, to be skipped
static gint
get_next_split(GwyDataField **cldata, gint ncriteria,
               GwyDataField *mask_a, GwyDataField *mask_b, GwyDataField *selection, gdouble *threshold,
               gdouble *a_purity, gdouble *b_purity, gdouble *gini, gdouble *sumsa, gdouble *sumsb,
               GwyDataField *result_a, GwyDataField *result_b,
               gint lastcrit, gint *swap)
{
   gint n, bestcrit, bestswap, bestswapfactor, myswap;
   gdouble bestscore, bestth, bestgini, bestthreshold, min, max, step, th, score, suma, sumb;
   gdouble apur, bpur, bestapur, bestbpur, bestapurity, bestbpurity, bestsuma, bestsumb, bestsumsa, bestsumsb;
   CTree ct;
   gint xres = gwy_data_field_get_xres(result_a);
   gint yres = gwy_data_field_get_yres(result_a);

   if (CDEBUG > 1)
       printf("Called get next split\n");

   //go thorugh criteria (fields in cldata) one by one and all possible thresholds, searching for lowest gini impurity
   bestgini = 1;
   bestcrit = 0;
   bestthreshold = 0;
   bestapurity = 0;
   bestbpurity = 0;
   bestsumsa = 0;
   bestsumsb = 0;
   bestswapfactor = 0;

   for (n = 0; n < ncriteria; n++) {
       if (n == lastcrit)
           continue; //skip what was used for splitting last time

       ct.rule_parameter[0] = n;
       ct.rule_goto_high[0] = -1;  //bigger than threshold will be class A
       ct.rule_goto_low[0] = -2;   //lower than threshold will be class B

       min = gwy_data_field_area_get_min(cldata[ct.rule_parameter[0]], selection, 0, 0,
                                         xres, yres);
       max = gwy_data_field_area_get_max(cldata[ct.rule_parameter[0]], selection, 0, 0,
                                         xres, yres);
       if (CDEBUG > 1)
           printf("criterion %d min %g max %g\n", ct.rule_parameter[0], min, max);
       step = (max-min)/20;

       bestscore = 1;
       bestth = 0;
       bestapur = 0;
       bestbpur = 0;
       bestsuma = 0;
       bestsumb = 0;
       bestswap = 0;

       if (fabs(min - max) < 1e-15 || step <= 0) {
           if (CDEBUG > 1)
              printf("min == max (%g), or step is zero, nothing to solve\n", max);
           continue;
       }

       for (th = min; th < max; th += step) {
           ct.rule_threshold[0] = th;
           ctree_run(&ct, cldata, result_a, result_b);

           if (CDEBUG > 2)
               printf("threshold %g %g %g %g ", th, min, max, step);

           //az tady se zjisti, ze kriterium je inverzni
           score = get_score(mask_a, mask_b, result_a, result_b, selection, &apur, &bpur, &suma, &sumb, &myswap);
           if (CDEBUG > 3)
               printf("n %d  th %g  score %g  apur %g  bpur %g  sums %g %g swap %d, min %g max %g step %g\n",
                      n, th, score, apur, bpur, suma, sumb, myswap, min, max, step);
           if (score < bestscore) {
               bestscore = score;
               bestth = th;
               bestapur = apur;
               bestbpur = bpur;
               bestsuma = suma;
               bestsumb = sumb;
               bestswap = myswap;
           }
       }
       if (CDEBUG)
           printf("best threshold for quantity n: %d  gini %g threshold %g  purities %g %g  sum %g %g\n",
                  n, bestscore, bestth, bestapur, bestbpur, bestsuma, bestsumb);

       if (bestscore < bestgini) {
           bestgini = bestscore;
           bestcrit = n;
           bestthreshold = bestth;
           bestapurity = bestapur;
           bestbpurity = bestbpur;
           bestsumsa = bestsuma;
           bestsumsb = bestsumb;
           bestswapfactor = bestswap;
       }
   }
   if (CDEBUG)
       printf("Get branch result: criterion %d gini %g threshold %g  purities %g %g  sums %g %g\n",
              bestcrit, bestgini, bestthreshold, bestapurity, bestbpurity, bestsumsa, bestsumsb);

   //fill the results with mask of a and b - consider swap?
   ct.rule_parameter[0] = bestcrit;
   ct.rule_threshold[0] = bestthreshold;
   //if (!bestswapfactor)
       ctree_run(&ct, cldata, result_a, result_b);
   /*else
       ctree_run(&ct, cldata, result_b, result_a);
   */

   *threshold = bestthreshold;
   *gini = bestgini;
   *a_purity = bestapurity;
   *b_purity = bestbpurity;
   *sumsa = bestsumsa;
   *sumsb = bestsumsb;
   *swap = bestswapfactor;

   return bestcrit;
}

static gint
process_branch(CTree *ct, GwyDataField **cldata, GwyDataField *mask_a, GwyDataField *mask_b,
               gint ncriteria, gint *n, GwyDataField *selection, gint lastcrit)
{
    GwyDataField *result_a = gwy_data_field_new_alike(cldata[0], TRUE);
    GwyDataField *result_b = gwy_data_field_new_alike(cldata[0], TRUE);
    GwyDataField *sel_a = gwy_data_field_new_alike(cldata[0], TRUE);
    GwyDataField *sel_b = gwy_data_field_new_alike(cldata[0], TRUE);
    gdouble apur, bpur, gini, threshold, sumsa, sumsb, retval;
    gint thisn = *n;
    gint nextn, ret, swap;
    gint goto_high, goto_low;

    if (CDEBUG)
        printf("Processing branch %d\n", thisn);

/*    if (ncriteria == 1) //special case when only one criterion exists, so we can't swap them
        ct->rule_parameter[thisn] = get_next_split(cldata, ncriteria,
                                                   mask_a, mask_b, selection, &threshold,
                                                   &apur, &bpur, &gini, &sumsa, &sumsb,
                                                   result_a, result_b, -1, &swap);
    else //normal case, last criterion is not used for next split */
    ct->rule_parameter[thisn] = get_next_split(cldata, ncriteria,
                                               mask_a, mask_b, selection, &threshold,
                                               &apur, &bpur, &gini, &sumsa, &sumsb,
                                               result_a, result_b, lastcrit, &swap);

    //my results: class A and class B parameters, class A means above threshold, B below it
    //            mask of identified members of class A and B

    ct->rule_threshold[thisn] = threshold;
    if (CDEBUG)
        printf("(%d) sugggested rule for split: crit %d  threshold %g, purities %g %g  sums %g %g swap %d\n",
               thisn, ct->rule_parameter[thisn], ct->rule_threshold[thisn], apur, bpur, sumsa, sumsb, swap);

    if (swap) {//swap: threshold had opposite meaning compared to ground truth masks,
               //which means that higher values are class B, lower class A
               //split point splits it, but results are inverted.
               //this means that goto_high should point to class B, goto_low to class A
      //  buf = apur; apur = bpur; bpur = buf;
      //  buf = sumsa; sumsa = sumsb; sumsb = buf;
        goto_high = -2;
        goto_low = -1;
    } else {
        goto_high = -1;
        goto_low = -2;
    }

    if (ncriteria == 1) { //we have nothing else to split
         ct->rule_goto_high[thisn] = goto_high;
         ct->rule_goto_low[thisn] = goto_low;
         ct->nrules++;

         if (CDEBUG)
             printf("There was just one criterium for splitting, stopping.\n.");
         return 0;
    }

    if (sumsa == 0 || sumsb == 0) { //one of branches has no members, so report this and don't create new rule.
        if (sumsa >= sumsb)
            retval = goto_high;
        else
            retval = goto_low;
        if (CDEBUG)
            printf("Error: one branch does not have members, stop further branching and return %g (sums %g %g)\n",
                   retval, sumsa, sumsb);
    }
    else {//setup new rule, A still means A, we only set the right goto direction
        if (CDEBUG)
            printf("Rule accepted and will be further developed\n");
        ct->nrules++;
        retval = 0;

        if (apur < PURCRIT || (*n >= (MAXBRANCHES-1))) {
             ct->rule_goto_high[thisn] = goto_high;

            if (CDEBUG)
                printf("(%d) step high: with branch A we are done (purity %g < %g, nbr %d >= %d), response is -1\n",
                       thisn, apur, PURCRIT,
                       *n, MAXBRANCHES-1);
        }
        else {
            *n += 1;
            nextn = *n;
            ct->rule_goto_high[thisn] = nextn;
            if (CDEBUG)
                printf("(%d) step high: go to next branch at index %d\n", thisn, ct->rule_goto_high[thisn]);

            //create actual selection, combining the previous selection with last result_a
            if (selection == NULL)
                gwy_data_field_copy(result_a, sel_a, FALSE);
            else
                gwy_data_field_multiply_fields(sel_a, selection, result_a);

            if (CDEBUG)
                printf("(%d) selection for next process %d has %g points\n",
                       thisn, nextn, gwy_data_field_get_sum(sel_a));
            if (CDEBUG)
                printf("now will process branch A with number %d\n", nextn);

            if ((ret = process_branch(ct, cldata,
                                      mask_a, mask_b, ncriteria, n, sel_a, ct->rule_parameter[thisn])) != 0) {
                if (CDEBUG)
                    printf("Branch could not be further developed, goto_high in this branch %d will be %d\n",
                           thisn, ret);
                ct->rule_goto_high[thisn] = ret;
                *n -= 1;
            }
        }

        if (bpur < PURCRIT || *n >= (MAXBRANCHES-1)) { //make this adjustable
             ct->rule_goto_low[thisn] = goto_low;

            if (CDEBUG)
                printf("(%d) step low: with branch B we are done (purity %g < %g), response is %d, sums %g %g\n",
                       thisn, bpur, PURCRIT, ct->rule_goto_low[thisn], sumsa, sumsb);
        }
        else {
            *n += 1;
            nextn = *n;
            ct->rule_goto_low[thisn] = nextn;
            if (CDEBUG)
                printf("(%d) step low: go to next branch at index %d\n", thisn, ct->rule_goto_low[thisn]);

            //create actual selection, combining the previous selection with last result_a
            if (selection == NULL)
               gwy_data_field_copy(result_b, sel_b, FALSE);
            else
               gwy_data_field_multiply_fields(sel_b, selection, result_b);

            if (CDEBUG)
                printf("(%d) selection for next process %d has %g points\n",
                       thisn, nextn, gwy_data_field_get_sum(sel_b));
            if (CDEBUG)
                printf("now will process branch B with number %d\n", nextn);

            if ((ret = process_branch(ct, cldata,
                                      mask_a, mask_b, ncriteria, n, sel_b, ct->rule_parameter[thisn])) != 0) {
                //we could not branch further, stop it
                if (CDEBUG)
                    printf("Branch could not be further developed, goto_high in this branch %d will be %d\n",
                           thisn, ret);
                ct->rule_goto_low[thisn] = ret;
                *n -= 1;
            }
        }
    }

    if (CDEBUG)
        printf("End of processing branch %d\n", thisn);

    gwy_object_unref(result_a);
    gwy_object_unref(result_b);
    gwy_object_unref(sel_a);
    gwy_object_unref(sel_b);

    return retval;
}

static void
train_tree(CTree *ct, GwyDataField **cldata,
           gint ncriteria, GwyDataField *mask_a, GwyDataField *mask_b,
           GwyDataField *selection)
{
    gint n = 0;

    process_branch(ct, cldata, mask_a, mask_b, ncriteria, &n, selection, -1);
    if (CDEBUG)
        print_ct(ct);
}

static Classifier*
classifier_init(gint ntrees, gdouble threshold)
{
    Classifier *cl = g_new(Classifier, 1);
    cl->ct = g_new(CTree, ntrees);
    cl->ntrees = ntrees;
    cl->threshold = threshold;

    return cl;
}

static void
classifier_destroy(Classifier *cl)
{
    g_free(cl->ct);
}

//setup whole forest
static gboolean
classifier_train_full(Classifier *cl, GwyDataField **cldata,
                      gint ncriteria, gdouble subsampling, gint seed,
                      GwyDataField *mask_a, GwyDataField *mask_b)
{
    CTree *ct;
    GwyDataField *selection = gwy_data_field_new_alike(cldata[0], FALSE);
    gint i, n, pos;
    gdouble *seldata;
    gint ntot = gwy_data_field_get_xres(mask_a)*gwy_data_field_get_yres(mask_a);
    GRand *rng = g_rand_new();

    g_rand_set_seed(rng, seed);

    if (CDEBUG)
        printf("Classifier train started on %d data sets\n", ncriteria);

    for (n = 0; n < cl->ntrees; n++) {
        ct = cl->ct + n;
        ct->nrules = 0;

        gwy_data_field_clear(selection);
        seldata = gwy_data_field_get_data(selection);
        i = 0;
        do {
             pos = g_rand_int_range(rng, 0, ntot);
             if (!seldata[pos]) {
                 seldata[pos] = 1;
                 i++;
             }
        } while (i < subsampling*ntot);

        train_tree(ct, cldata, ncriteria, mask_a, mask_b, selection);
        if (!gwy_app_wait_set_fraction((gdouble)n/(gdouble)cl->ntrees))
            return FALSE;
    }

    gwy_object_unref(selection);
    return TRUE;
}

//run single tree on single point in the image
static gint
run_ct(CTree *ct, GwyDataField **cldata, gint xpos, gint ypos)
{
    gint i, n;

    n = 0;
    for (i = 0; i < 1000; i++) {
        //printf("rp at %d %d  n %d param %d\n", xpos, ypos, n, ct->rule_parameter[n]);
        if (gwy_data_field_get_val(cldata[ct->rule_parameter[n]], xpos, ypos) > ct->rule_threshold[n]) {
            if (ct->rule_goto_high[n] < 0) {
                return ct->rule_goto_high[n];
            }
            else
                n = ct->rule_goto_high[n];
        }
        else {
            if (ct->rule_goto_low[n] < 0) {
                return ct->rule_goto_low[n];
            }

            n = ct->rule_goto_low[n];
        }
        //printf("next n: %d\n", n);
    }
    //printf("Error: CT run did not finish after 1000 iterations\n");
    return -3;
}

//run a single tree on whole image
static void
ctree_run(CTree *ct, GwyDataField **cldata,
          GwyDataField *result_a, GwyDataField *result_b)
{
    gint i, j, result;
    gint xres = gwy_data_field_get_xres(cldata[0]);
    gint yres = gwy_data_field_get_yres(cldata[0]);
    for (i = 0; i < xres; i++) {
        for (j = 0; j < yres; j++) {
            result = run_ct(ct, cldata, i, j);
            if (result == -1) {
                gwy_data_field_set_val(result_a, i, j, 1);
                gwy_data_field_set_val(result_b, i, j, 0);
            }
            if (result == -2) {
                gwy_data_field_set_val(result_a, i, j, 0);
                gwy_data_field_set_val(result_b, i, j, 1);
            }
        }
    }
}

//run the forest on whole image
static void
classifier_run(Classifier *cl, GwyDataField **cldata,
               GwyDataField *result_a, GwyDataField *result_b)
{
    gint i;
    GwyDataField *res_a = gwy_data_field_new_alike(result_a, TRUE);
    GwyDataField *res_b = gwy_data_field_new_alike(result_b, TRUE);

    for (i = 0; i < cl->ntrees; i++) {
        ctree_run(cl->ct + i, cldata, res_a, res_b);
        gwy_data_field_sum_fields(result_a, result_a, res_a);
        gwy_data_field_sum_fields(result_b, result_b, res_b);
    }

    gwy_data_field_threshold(result_a, cl->threshold*cl->ntrees, 0, 1);
    gwy_data_field_threshold(result_b, cl->threshold*cl->ntrees, 0, 1);

    gwy_object_unref(res_a);
    gwy_object_unref(res_b);
}

/**  end of classifier code**/


/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
