/*
 *  $Id: shimadzu.c 29477 2026-02-14 13:29:30Z yeti-dn $
 *  Copyright (C) 2007-2025 David Necas (Yeti).
 *  E-mail: yeti@gwyddion.net.
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
 * [FILE-MAGIC-FREEDESKTOP]
 * <mime-type type="application/x-shimadzu-spm">
 *   <comment>Shimadzu SPM data</comment>
 *   <magic priority="80">
 *     <match type="string" offset="0" value="Shimadzu SPM File Format"/>
 *   </magic>
 * </mime-type>
 **/

/**
 * [FILE-MAGIC-FREEDESKTOP]
 * <mime-type type="application/x-shimadzu-spm-asc">
 *   <comment>Shimadzu SPM ASCII data</comment>
 *   <magic priority="80">
 *     <match type="string" offset="0" value="ASCII:">
 *       <match type="string" offset="8:9" value="Shimadzu SPM File Format"/>
 *     </match>
 *   </magic>
 * </mime-type>
 **/

/**
 * [FILE-MAGIC-FILEMAGIC]
 * # Shimadzu
 * # It has two variants: binary and text.  The text header seems to identify
 * # text version clearly but the `binary' version can be present in both.
 * 0 string Shimadzu\ SPM\ File\ Format\ Version\x20
 * >&0 regex [0-9.]+ Shimadzu SPM data version %s
 * 0 string ASCII:
 * >8 search/1 Shimadzu\ SPM\ File\ Format\ Version\x20
 * >&0 regex [0-9.]+ Shimadzu SPM text data version %s
 **/

/**
 * [FILE-MAGIC-USERGUIDE]
 * Shimadzu
 * .sph, .spp, .001, .002, etc.
 * Read
 **/

/**
 * [FILE-MAGIC-USERGUIDE]
 * Shimadzu ASCII
 * .txt
 * Read
 **/

#include "config.h"
#include <glib/gi18n-lib.h>
#include <string.h>
#include <stdlib.h>
#include <gwy.h>

#include "err.h"

enum {
    HEADER_SIZE = 32768
};

#define MAGIC "Shimadzu SPM File Format Version "
#define MAGIC_SIZE (sizeof(MAGIC)-1)

#define MAGIC_ASCII "ASCII:"
#define MAGIC_ASCII_SIZE (sizeof(MAGIC_ASCII)-1)

static gboolean      module_register (void);
static gint          detect_file     (const GwyFileDetectInfo *fileinfo,
                                      gboolean only_name);
static GwyFile*      load_file       (const gchar *filename,
                                      GwyRunModeFlags mode,
                                      GError **error);
static GwyField*     read_binary_data(const gchar *buffer,
                                      gsize size,
                                      GHashTable *hash,
                                      GError **error);
static GwyField*     read_text_data  (gchar *buffer,
                                      GHashTable *hash,
                                      GError **error);
static GHashTable*   read_hash       (gchar *buffer,
                                      gint *text_data_start,
                                      GError **error);
static gboolean      get_scales      (GHashTable *hash,
                                      gboolean is_text,
                                      gint *xres,
                                      gint *yres,
                                      gdouble *xreal,
                                      gdouble *yreal,
                                      gdouble *xoff,
                                      gdouble *yoff,
                                      GwyUnit *unit_xy,
                                      gdouble *zscale,
                                      gdouble *zoff,
                                      GwyUnit *unit_z,
                                      GError **error);
static GwyDict* get_metadata    (GHashTable *hash);

static GwyModuleInfo module_info = {
    GWY_MODULE_ABI_VERSION,
    &module_register,
    N_("Imports Shimadzu SPM data files."),
    "Yeti <yeti@gwyddion.net>",
    "0.9",
    "David Nečas (Yeti)",
    "2007",
};

GWY_MODULE_QUERY2(module_info, shimadzu)

static gboolean
module_register(void)
{
    gwy_file_func_register("shimadzu",
                           N_("Shimadzu files"),
                           detect_file, load_file, NULL, NULL);

    return TRUE;
}

static gint
detect_file(const GwyFileDetectInfo *fileinfo, gboolean only_name)
{
    gint score = 0;

    if (only_name)
        return 0;

    if (fileinfo->buffer_len > MAGIC_SIZE
        && fileinfo->file_size >= HEADER_SIZE + 2
        && memcmp(fileinfo->head, MAGIC, MAGIC_SIZE) == 0)
        score = 100;

    /* Version 4 text files start with the ASCII magic prefix. */
    if (score == 0
        && fileinfo->buffer_len >= MAGIC_SIZE + MAGIC_ASCII_SIZE + 3
        && memcmp(fileinfo->head, MAGIC_ASCII, MAGIC_ASCII_SIZE) == 0
        && (memcmp(fileinfo->head + MAGIC_ASCII_SIZE+1, MAGIC, MAGIC_SIZE) == 0
            || memcmp(fileinfo->head + MAGIC_ASCII_SIZE+2, MAGIC, MAGIC_SIZE) == 0))
        score = 100;

    return score;
}

static GwyFile*
load_file(const gchar *filename,
          G_GNUC_UNUSED GwyRunModeFlags mode,
          GError **error)
{
    GwyFile *file = NULL;
    GwyField *dfield = NULL;
    GError *err = NULL;
    gchar *buffer = NULL;
    GHashTable *hash;
    gchar *head;
    gsize size = 0;
    gboolean ok;
    gint text_data_start;

    if (!g_file_get_contents(filename, &buffer, &size, &err)) {
        err_GET_FILE_CONTENTS(error, &err);
        return NULL;
    }
    if (size < HEADER_SIZE + 2) {
        err_TOO_SHORT(error);
        return NULL;
    }
    if (memcmp(buffer, MAGIC, MAGIC_SIZE) != 0
        && !(memcmp(buffer, MAGIC_ASCII, MAGIC_ASCII_SIZE) == 0
             && (memcmp(buffer + MAGIC_ASCII_SIZE+1, MAGIC, MAGIC_SIZE) == 0
                 || memcmp(buffer + MAGIC_ASCII_SIZE+2, MAGIC, MAGIC_SIZE) == 0))) {
        err_FILE_TYPE(error, "Shimadzu");
        g_free(buffer);
        return NULL;
    }

    head = g_memdup(buffer, HEADER_SIZE+1);
    head[HEADER_SIZE] = '\0';

    /* text_data_start is set to nonzero if data are text */
    hash = read_hash(head, &text_data_start, error);
    ok = !!hash;
    if (ok) {
        if (text_data_start)
            dfield = read_text_data(buffer + text_data_start, hash, error);
        else
            dfield = read_binary_data(buffer, size, hash, error);

        ok = !!dfield;
    }

    if (ok) {
        const gchar *title;

        file = gwy_file_new_in_construction();
        gwy_file_pass_image(file, 0, dfield);
        gwy_file_pass_meta(file, GWY_FILE_IMAGE, 0, get_metadata(hash));

        title = g_hash_table_lookup(hash, "Channel");
        if (title && *title)
            gwy_file_set_title(file, GWY_FILE_IMAGE, 0, title, FALSE);
        else
            gwy_image_title_fall_back(file, 0);

        gwy_log_add_import(file, GWY_FILE_IMAGE, 0, NULL, filename);
    }

    g_free(head);
    g_free(buffer);
    g_hash_table_destroy(hash);

    return file;
}

static GwyField*
read_binary_data(const gchar *buffer,
                 gsize size,
                 GHashTable *hash,
                 GError **error)
{
    GwyRawDataType rawtype;
    gint xres, yres;
    guint expected;
    gdouble xreal, yreal, zscale, xoff, yoff, zoff;
    GwyUnit *unitxy, *unitz;
    GwyField *dfield = NULL;
    const gchar *s;

    if (!(s = g_hash_table_lookup(hash, "DataType"))) {
        err_MISSING_FIELD(error, "DataType");
        return NULL;
    }

    if (g_ascii_strcasecmp(s, "short") == 0)
        rawtype = GWY_RAW_DATA_UINT16;
    else if (g_ascii_strcasecmp(s, "float") == 0)
        rawtype = GWY_RAW_DATA_FLOAT;
    else {
        err_UNSUPPORTED(error, "DataType");
        return NULL;
    }

    unitxy = gwy_unit_new(NULL);
    unitz = gwy_unit_new(NULL);

    if (!get_scales(hash, FALSE, &xres, &yres, &xreal, &yreal, &xoff, &yoff, unitxy, &zscale, &zoff, unitz, error))
        goto fail;

    expected = xres*yres*gwy_raw_data_size(rawtype) + HEADER_SIZE;
    if (err_SIZE_MISMATCH(error, expected, size, FALSE))
        goto fail;

    dfield = gwy_field_new(xres, yres, xreal, yreal, FALSE);
    gwy_field_set_xoffset(dfield, xoff);
    gwy_field_set_yoffset(dfield, yoff);
    gwy_unit_assign(gwy_field_get_unit_xy(dfield), unitxy);
    gwy_unit_assign(gwy_field_get_unit_z(dfield), unitz);
    gwy_convert_raw_data(buffer + HEADER_SIZE, xres*yres, 1,
                         rawtype, GWY_BYTE_ORDER_LITTLE_ENDIAN,
                         gwy_field_get_data(dfield), zscale, zoff);
    gwy_field_flip(dfield, FALSE, TRUE);

fail:
    g_object_unref(unitxy);
    g_object_unref(unitz);
    return dfield;
}

static GwyField*
read_text_data(gchar *buffer,
               GHashTable *hash,
               GError **error)
{
    const gchar *p;
    gint xres, yres, power10;
    gdouble xreal, yreal, zscale, xoff, yoff, zoff;
    GwyUnit *unitxy, *unitz;
    GwyField *dfield = NULL;
    GError *err = NULL;

    unitxy = gwy_unit_new(NULL);
    unitz = gwy_unit_new(NULL);

    if (!get_scales(hash, TRUE, &xres, &yres, &xreal, &yreal, &xoff, &yoff, unitxy, &zscale, &zoff, unitz, error))
        goto fail;

    dfield = gwy_field_new(xres, yres, xreal, yreal, FALSE);
    gwy_field_set_xoffset(dfield, xoff);
    gwy_field_set_yoffset(dfield, yoff);

    gwy_unit_assign(gwy_field_get_unit_xy(dfield), unitxy);
    p = g_hash_table_lookup(hash, "DATA Unit");
    power10 = gwy_unit_set_from_string(gwy_field_get_unit_z(dfield), p);
    zscale = gwy_exp10(power10);

    g_strdelimit(buffer, ",;", ' ');
    if (!gwy_parse_doubles(buffer, gwy_field_get_data(dfield), 0, &yres, &xres, NULL, &err)) {
        err_PARSE_DOUBLES(error, &err);
        g_clear_object(&dfield);
        goto fail;
    }
    gwy_field_multiply(dfield, zscale);

fail:
    g_object_unref(unitxy);
    g_object_unref(unitz);
    return dfield;
}

static GHashTable*
read_hash(gchar *buffer,
          gint *text_data_start,
          GError **error)
{
    enum {
        WHATEVER = 0,
        PROCESS_PROFILE,
        COMMENT,
    } next_is;
    GHashTable *hash;
    gchar *p, *line, *value;
    GString *key;

    *text_data_start = 0;
    p = buffer;
    hash = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
    line = gwy_str_next_line(&p);
    /* Version 4 text data. */
    if (gwy_strequal(line, MAGIC_ASCII))
        line = gwy_str_next_line(&p);

    key = g_string_new(NULL);

    g_hash_table_insert(hash, g_strdup("Version"), line + MAGIC_SIZE);
    next_is = WHATEVER;
    while ((line = gwy_str_next_line(&p))) {
        guint len;
        gchar *rb;

        if (line[0] == '/')
            line++;

        if (line[0] == '\x1a') {
            /* Apparently a binary data marker */
            *text_data_start = 0;
            break;
        }

        g_strstrip(line);
        /* sections */
        if (line[0] == '[' && (rb = strchr(line, ']'))) {
            *rb = '\0';
            line++;
            g_strstrip(line);
            gwy_debug("section %s", line);
            g_string_assign(key, line);
            g_string_append(key, "::");
            if (gwy_strequal(line, "PROCESS PROFILE")) {
                next_is = PROCESS_PROFILE;
                continue;
            }
            if (gwy_strequal(line, "COMMENT")) {
                next_is = COMMENT;
                continue;
            }
            if (g_str_has_prefix(line, "DATA ")) {
                line += strlen("DATA");
                *text_data_start = p - buffer;
                break;
            }
            next_is = WHATEVER;
            /* Other sectioning seems too be uninteresting. */
            continue;
        }

        if (next_is == PROCESS_PROFILE) {
            g_hash_table_insert(hash, g_strdup("ProcessProfile"), line);
            next_is = WHATEVER;
            continue;
        }
        if (next_is == COMMENT) {
            g_hash_table_insert(hash, g_strdup("Comment"), line);
            next_is = WHATEVER;
            continue;
        }

        next_is = WHATEVER;
        value = strchr(line, ':');
        if (!value) {
            g_set_error(error, GWY_MODULE_FILE_ERROR,
                        GWY_MODULE_FILE_ERROR_DATA,
                        _("Missing colon in header line."));
            g_hash_table_destroy(hash);
            return NULL;
        }
        *value = '\0';
        value++;
        g_strstrip(line);
        g_strstrip(value);
        len = key->len;
        g_string_append(key, line);
        gwy_debug("%s = %s", key->str, value);
        g_hash_table_replace(hash, g_strdup(key->str), value);
        g_string_truncate(key, len);
    }

    if (*text_data_start) {
        g_strstrip(line);
        if (!g_str_has_prefix(line, "Unit(") || !g_str_has_suffix(line, ")")) {
            g_warning("Cannot parse DATA unit: %s", line);
            g_hash_table_insert(hash, g_strdup("DATA Unit"), "1");
        }
        else {
            line += strlen("Unit(");
            line[strlen(line)-1] = '\0';
            g_hash_table_insert(hash, g_strdup("DATA Unit"), line);
        }
    }

    g_string_free(key, TRUE);

    return hash;
}

static gboolean
get_scales(GHashTable *hash,
           gboolean is_text,
           gint *xres, gint *yres,
           gdouble *xreal, gdouble *yreal,
           gdouble *xoff, gdouble *yoff,
           GwyUnit *unit_xy,
           gdouble *zscale,
           gdouble *zoff,
           GwyUnit *unit_z,
           GError **error)
{
    GwyUnit *unit;
    gint power10, zp;
    gchar *p;
    gboolean has_unit = FALSE;

    /* Dimensions are mandatory. */
    if (!require_keys(hash, error,
                      "SCANNING PARAMS::PixelsX", "SCANNING PARAMS::PixelsY", "SCANNING PARAMS::PixelsZ",
                      "SCANNING PARAMS::SizeX", "SCANNING PARAMS::SizeY", "SCANNING PARAMS::SizeZ",
                      NULL))
        return FALSE;

    *xres = atoi(g_hash_table_lookup(hash, "SCANNING PARAMS::PixelsX"));
    if (err_DIMENSION(error, *xres))
        return FALSE;
    *yres = atoi(g_hash_table_lookup(hash, "SCANNING PARAMS::PixelsY"));
    if (err_DIMENSION(error, *yres))
        return FALSE;

    unit = gwy_unit_new(NULL);

    p = g_hash_table_lookup(hash, "SCANNING PARAMS::SizeX");
    *xreal = g_ascii_strtod(p, &p);
    sanitise_real_size(xreal, "x size");
    power10 = gwy_unit_set_from_string(unit_xy, p);
    *xreal *= gwy_exp10(power10);

    p = g_hash_table_lookup(hash, "SCANNING PARAMS::SizeY");
    *yreal = g_ascii_strtod(p, &p);
    sanitise_real_size(yreal, "y size");
    power10 = gwy_unit_set_from_string(unit, p);
    *yreal *= gwy_exp10(power10);
    if (!gwy_unit_equal(unit, unit_xy)) {
        g_warning("X and Y units differ, using X");
    }

    zp = atoi(g_hash_table_lookup(hash, "SCANNING PARAMS::PixelsZ"));
    if (!zp) {
        g_warning("Z pixels is 0, fixing to 1");
        zp = 1;
    }
    p = g_hash_table_lookup(hash, "SCANNING PARAMS::SizeZ");
    *zscale = g_ascii_strtod(p, &p);
    *zoff = 0.0;
    power10 = gwy_unit_set_from_string(unit_z, p);
    *zscale *= gwy_exp10(power10)/zp;
    /* XXX: Version 4 can have UNIT section that takes precedence.  The Conv factor may not be enough.  Apparently,
     * binary phase data need subtracting 180 deg because data are unsinged.  Also, all this is ignored by the text
     * data reader which just reads DATA Unit. Bite me. */
    if ((p = g_hash_table_lookup(hash, "UNIT::Unit"))) {
        const gchar *s = g_hash_table_lookup(hash, "UNIT::Name");
        has_unit = TRUE;
        power10 = gwy_unit_set_from_string(unit_z, p);
        *zscale *= gwy_exp10(power10);
        if ((p = g_hash_table_lookup(hash, "UNIT::Conv")))
            *zscale *= g_ascii_strtod(p, NULL);

        if (!is_text && gwy_strequal(s, "Phase"))
            *zoff = -180.0;
    }

    /* Offsets are optional. */
    *xoff = 0.0;
    if ((p = g_hash_table_lookup(hash, "SCANNING PARAMS::OffsetX"))) {
        *xoff = g_ascii_strtod(p, &p);
        power10 = gwy_unit_set_from_string(unit, p);
        if (gwy_unit_equal(unit, unit_xy))
            *xoff *= gwy_exp10(power10);
        else {
            g_warning("X offset units differ from X size units, ignoring.");
            *xoff = 0.0;
        }
    }

    *yoff = 0.0;
    if ((p = g_hash_table_lookup(hash, "SCANNING PARAMS::OffsetY"))) {
        *yoff = g_ascii_strtod(p, &p);
        power10 = gwy_unit_set_from_string(unit, p);
        if (gwy_unit_equal(unit, unit_xy))
            *yoff *= gwy_exp10(power10);
        else {
            g_warning("Y offset units differ from Y size units, ignoring.");
            *yoff = 0.0;
        }
    }

    // Don't know what to do with the offset when UNIT section is present.
    // It seems to be always 0 in wrong units, so skip it.
    if (!has_unit) {
        if ((p = g_hash_table_lookup(hash, "SCANNING PARAMS::OffsetZ"))) {
            *zoff = g_ascii_strtod(p, &p);
            power10 = gwy_unit_set_from_string(unit, p);
            if (gwy_unit_equal(unit, unit_z))
                *zoff *= gwy_exp10(power10);
            else {
                g_warning("Z offset units differ from Z size units, ignoring.");
                *zoff = 0.0;
            }
        }
    }

    g_object_unref(unit);

    return TRUE;
}

static void
add_metadata(gpointer hkey, gpointer hvalue, gpointer user_data)
{
    const gchar *key = (const gchar*)hkey, *value = (const gchar*)hvalue;
    GwyDict *meta = (GwyDict*)user_data;

    if (g_utf8_validate(key, -1, NULL) && g_utf8_validate(value, -1, NULL))
        gwy_dict_set_const_string_by_name(meta, key, value);
}

static GwyDict*
get_metadata(GHashTable *hash)
{
    GwyDict *meta = gwy_dict_new_in_construction();
    g_hash_table_foreach(hash, add_metadata, meta);
    return meta;
}

/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
