/*
 *  $Id: accurell.c 28808 2025-11-05 18:26:20Z yeti-dn $
 *  Copyright (C) 2025 David Necas (Yeti).
 *  E-mail: yeti@gwyddion.net.
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include "config.h"
#include <glib/gi18n-lib.h>
#include <math.h>
#include <string.h>
#include <gwy.h>

#include "err.h"

enum {
    IS_INTERESTING = 1u << 0,
    HAS_ANY_DATA   = 1u << 1,
};

typedef struct {
    gchar **headers;
    const gchar **unitstrs;
    GwyUnit **units;
    gdouble *q;
    gboolean *is_interesting;
    gdouble *const_values;
    gdouble *data;
    gint ncols;
    gint npoints;
    gint xcol;
    gint ycol;
} AccurionFile;

static gboolean     module_register          (void);
static gint         detect_file              (const GwyFileDetectInfo *fileinfo,
                                              gboolean only_name);
static GwyFile*     load_file                (const gchar *filename,
                                              GwyRunModeFlags mode,
                                              GError **error);
static const gchar* parse_header             (const gchar *buffer,
                                              gsize size,
                                              AccurionFile *acfile,
                                              GError **error);
static gint         find_interesting_channels(AccurionFile *acfile);
static void         acfile_free              (AccurionFile *acfile);

static GwyModuleInfo module_info = {
    GWY_MODULE_ABI_VERSION,
    &module_register,
    N_("Imports ASCII Accurion scanning ellipsometry maps."),
    "Yeti <yeti@gwyddion.net>",
    "2.0",
    "David Nečas (Yeti)",
    "2025",
};

GWY_MODULE_QUERY2(module_info, accurell)

static gboolean
module_register(void)
{
    gwy_file_func_register("accurell",
                           N_("Accurion exported ellipsometry data (.txt)"),
                           detect_file, load_file, NULL, NULL);

    return TRUE;
}

static gint
detect_file(const GwyFileDetectInfo *fileinfo, gboolean only_name)
{
    enum { NHEADERS = 7 };
    static const gchar *expected_headers[NHEADERS] = {
        "Polarizer", "Analyzer", "X-Stage", "Y-Stage", "Z-Stage", "Air-thickness", "NRMSE-thickness",
    };
    static const guint header_lengths[NHEADERS] = { 9, 8, 7, 7, 7, 13, 15 };
    const guchar *slash;
    guint i, tosearch, ngood = 0, nbad = 0;
    gint score = 0;

    g_return_val_if_fail(!only_name, 0);

    if (fileinfo->buffer_len < 30)
        return 0;

    slash = fileinfo->head;
    tosearch = MIN(fileinfo->buffer_len, 1024);
    while ((slash = memchr(slash+1, '/', tosearch - (slash+1 - fileinfo->head)))) {
        for (i = 0; i < NHEADERS; i++) {
            guint len = header_lengths[i];
            if (slash - fileinfo->head < len)
                continue;
            /* Look backward from the slash for a header name. */
            if (strncmp(slash - len, expected_headers[i], len))
                continue;
            if (slash - len == fileinfo->head || *(slash - len - 1) != '\t')
                continue;
            ngood++;
            break;
        }
        if (i == NHEADERS)
            nbad++;
        /* Do not waste time if the file starts with a bunch of slashes. */
        if (nbad >= NHEADERS)
            break;
    }

    for (i = nbad; i < ngood; i++)
        score = score + (100 - score)/2;

    return score;
}

static inline gboolean
value_is_normal(gdouble x)
{
    return !gwy_isnan(x) && !gwy_isinf(x);
}

static GwyFile*
load_file(const gchar *filename,
          G_GNUC_UNUSED GwyRunModeFlags mode,
          GError **error)
{
    GwyFile *file = NULL;
    GwyContainer *meta = NULL;
    AccurionFile acfile;
    GwySurface *surface;
    gint power10;
    guchar *buffer = NULL;
    const guchar *data_start;
    gsize size;
    GError *err = NULL;
    GwyXYZ *xyz;
    gchar *s, *u;
    gint i, j, k, ninteresting;

    if (!gwy_file_get_contents(filename, &buffer, &size, &err)) {
        err_GET_FILE_CONTENTS(error, &err);
        return NULL;
    }

    gwy_clear(&acfile, 1);
    if (!(data_start = parse_header(buffer, size, &acfile, error))) {
        gwy_file_abandon_contents(buffer, size, NULL);
        return NULL;
    }

    acfile.npoints = -1;
    if (!(acfile.data = gwy_parse_doubles(data_start, NULL, GWY_PARSE_DOUBLES_COMPLETELY,
                                          &acfile.npoints, &acfile.ncols, NULL, &err))) {
        err_PARSE_DOUBLES(error, &err);
        goto end;
    }

    ninteresting = find_interesting_channels(&acfile);
    if (!ninteresting) {
        err_NO_DATA(error);
        goto end;
    }

    acfile.q = g_new(gdouble, acfile.ncols);
    acfile.units = g_new0(GwyUnit*, acfile.ncols);
    acfile.unitstrs = g_new0(const gchar*, acfile.ncols);
    for (j = 0; j < acfile.ncols; j++) {
        gchar *slash;

        acfile.q[j] = 1.0;
        if ((slash = strchr(acfile.headers[j], '/'))) {
            *slash = '\0';
            acfile.unitstrs[j] = slash + 1;
            acfile.units[j] = gwy_unit_new_parse(acfile.unitstrs[j], &power10);
            acfile.q[j] = gwy_exp10(power10);
        }
        else {
            acfile.units[j] = gwy_unit_new(NULL);
        }
    }
    if (!gwy_unit_equal(acfile.units[acfile.xcol], acfile.units[acfile.ycol])) {
        g_warning("X and Y units differ, using X");
    }

    for (j = 0; j < acfile.ncols; j++) {
        if (!(acfile.is_interesting[j] & HAS_ANY_DATA) || (acfile.is_interesting[j] & IS_INTERESTING))
            continue;

        if (!meta)
            meta = gwy_container_new();

        u = (acfile.unitstrs[j] && *acfile.unitstrs[j]
             ? gwy_convert_to_utf8(acfile.unitstrs[j], -1, "ISO-8859-1")
             : NULL);
        s = g_strdup_printf("%g%s%s",
                            acfile.const_values[j],
                            u ? " " : "",
                            u ? u : "");
        g_free(u);
        gwy_container_set_string_by_name(meta, acfile.headers[j], s);
    }

    file = gwy_file_new_in_construction();
    for (j = 0; j < acfile.ncols; j++) {
        gdouble qx = acfile.q[acfile.xcol], qy = acfile.q[acfile.ycol], qz = acfile.q[j];
        if (j == acfile.xcol || j == acfile.ycol || !(acfile.is_interesting[j] & IS_INTERESTING))
            continue;

        surface = gwy_surface_new_sized(acfile.npoints);
        xyz = gwy_surface_get_data(surface);
        for (i = k = 0; i < acfile.npoints; i++) {
            xyz[k].x = acfile.data[i*acfile.ncols + acfile.xcol];
            xyz[k].y = acfile.data[i*acfile.ncols + acfile.ycol];
            xyz[k].z = acfile.data[i*acfile.ncols + j];
            if (value_is_normal(xyz[k].x) && value_is_normal(xyz[k].y) && value_is_normal(xyz[k].z)) {
                xyz[k].x *= qx;
                xyz[k].y *= qy;
                xyz[k].z *= qz;
                k++;
            }
        }
        g_assert(k > 0);
        gwy_surface_resize(surface, k);

        gwy_unit_assign(gwy_surface_get_unit_xy(surface), acfile.units[acfile.xcol]);
        gwy_unit_assign(gwy_surface_get_unit_z(surface), acfile.units[j]);
        gwy_file_pass_xyz(file, j, surface);
        gwy_file_set_title(file, GWY_FILE_XYZ, j, acfile.headers[j], FALSE);
        if (meta)
            gwy_file_pass_meta(file, GWY_FILE_XYZ, j, gwy_container_copy(meta));
        gwy_log_add_import(file, GWY_FILE_XYZ, j, NULL, filename);
    }

end:
    acfile_free(&acfile);
    gwy_file_abandon_contents(buffer, size, NULL);
    g_clear_object(&meta);

    return file;
}

static const gchar*
parse_header(const gchar *buffer, gsize size, AccurionFile *acfile, GError **error)
{
    gchar *header_line = NULL;
    gchar **headers = NULL;
    gsize pos;
    gint i, ncols;

    for (pos = 0; pos < size; pos++) {
        if (buffer[pos] == '\r' || buffer[pos] == '\n') {
            header_line = g_strndup(buffer, pos);
            while (pos < size && (buffer[pos] == '\r' || buffer[pos] == '\n'))
                pos++;
            break;
        }
    }
    if (!header_line) {
        err_FILE_TYPE(error, "Accurion");
        goto fail;
    }

    headers = g_strsplit(header_line, "\t", -1);
    g_free(header_line);
    ncols = g_strv_length(headers);
    if (ncols < 3) {
        err_FILE_TYPE(error, "Accurion");
        goto fail;
    }

    acfile->ncols = ncols;
    acfile->xcol = acfile->ycol = -1;
    for (i = 0; i < ncols && (acfile->xcol == -1 || acfile->ycol == -1); i++) {
        if (g_str_has_prefix(headers[i], "X-Stage/"))
            acfile->xcol = i;
        else if (g_str_has_prefix(headers[i], "Y-Stage/"))
            acfile->ycol = i;
    }
    if (acfile->xcol == -1) {
        err_MISSING_FIELD(error, "X-Stage");
        goto fail;
    }
    if (acfile->ycol == -1) {
        err_MISSING_FIELD(error, "Y-Stage");
        goto fail;
    }

    acfile->headers = headers;
    return buffer + pos;

fail:
    g_strfreev(headers);
    return NULL;
}

static gint
find_interesting_channels(AccurionFile *acfile)
{
    gint ncols = acfile->ncols, npoints = acfile->npoints;
    guint *is_interesting = acfile->is_interesting = g_new0(guint, ncols);
    gdouble *const_values = acfile->const_values = g_new(gdouble, ncols);
    const gdouble *data = acfile->data;
    gint i, j, k, ninteresting = 0;
    gint xcol = acfile->xcol, ycol = acfile->ycol;

    /* Mark only actual data channels (not coordinates) as interesting. But for the classification is easier to treat
     * the coordinates as two already known interesting channels. */
    is_interesting[acfile->xcol] = IS_INTERESTING;
    is_interesting[acfile->ycol] = IS_INTERESTING;
    for (i = 1; i < npoints && ninteresting < ncols; i++) {
        gboolean xok = value_is_normal(data[i*ncols + xcol]);
        gboolean yok = value_is_normal(data[i*ncols + ycol]);
        if (!xok || !yok)
            continue;
        for (j = 0; j < ncols; j++) {
            if (is_interesting[j] & IS_INTERESTING)
                continue;
            k = i*ncols + j;
            if (!value_is_normal(data[k]))
                continue;
            if (!is_interesting[j]) {
                is_interesting[j] |= HAS_ANY_DATA;
                const_values[j] = data[k];
                continue;
            }
            if (data[k] == const_values[j])
                continue;
            is_interesting[j] |= IS_INTERESTING;
            ninteresting++;
        }
    }
    is_interesting[acfile->xcol] = 0;
    is_interesting[acfile->ycol] = 0;

    return ninteresting;
}

static void
acfile_free(AccurionFile *acfile)
{
    gint i;

    if (acfile->units) {
        for (i = 0; i < acfile->ncols; i++)
            g_clear_object(acfile->units + i);
        g_free(acfile->units);
    }
    g_free(acfile->q);
    g_free(acfile->is_interesting);
    g_free(acfile->const_values);
    g_free(acfile->unitstrs);
    g_free(acfile->data);
    g_strfreev(acfile->headers);
}

/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
