/*
 *  $Id: zyvex.c 29329 2026-01-23 16:22:18Z yeti-dn $
 *  ZyVector Scanz .zad file format import module
 *  Copyright (C) 2025 Zyvex Labs LLC.
 *  Author: Moutaz Haq.
 *  E-mail: mhaq@zyvexlabs.com.
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */
#include "config.h"
#include <glib/gi18n-lib.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <gwy.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xpath.h>

#include "gwyzlib.h"
#include "err.h"

#define MODULE_VERSION "1.0"
#define FILE_TYPE_NAME "ZyVector Scanz"
#define FILE_EXT ".zad"
#define MIN_ZAD_VERSION 5
#define MAX_ZAD_VERSION 7

static gboolean module_register(void);
static gint     detect_file    (const GwyFileDetectInfo *fileinfo,
                                gboolean only_name);
static GwyFile* load_file      (const gchar *filename,
                                GwyRunModeFlags mode,
                                GError **error);

static GwyModuleInfo module_info = {
    GWY_MODULE_ABI_VERSION,
    &module_register,
    N_("Imports ZyVector Scanz files."),
    "Moutaz Haq <mhaq@zyvexlabs.com>",
    MODULE_VERSION,
    "Zyvex Labs LLC",
    "2025"
};

GWY_MODULE_QUERY(module_info)

static gboolean
module_register(void)
{
    gwy_file_func_register("zyvex",
                           N_("ZyVector Scanz files (.zad)"),
                           &detect_file, &load_file, NULL, NULL);
    return TRUE;
}

#define Nanometer 1e-9
#define strequal gwy_strequal

/* types used for values directly from a .zad file */
typedef gint64 zad_int;
typedef gdouble zad_float;

typedef struct ScanBuf {
    const gchar *name;  /* owned by ScanBuf */
    const gchar *units;  /* owned by ScanBuf */
    zad_int dims[2];
    zad_int valid_rows;
    zad_int max_value;
    zad_int max_channel_value;
    gboolean has_scale_by;
    gdouble scale_by;
    GwyRawDataType data_type;
    int data_item_size;  /* size of data item in bytes */
    guint32 crc_data;
    guint32 crc_dataz;
    xmlNodePtr node;  /* for decoding DataZ64 when needed */
} ScanBuf;

static ScanBuf*
new_scan_buf(const gchar* name,
             const gchar* units)
{
    ScanBuf *scan_buf;
    scan_buf = g_new0(ScanBuf, 1);
    scan_buf->name = g_strdup(name);
    scan_buf->units = g_strdup(units);
    return scan_buf;
}

static void
free_scan_buf(ScanBuf *scan_buf)
{
    if (!scan_buf)
        return;
    g_free((gchar*)scan_buf->name);
    g_free((gchar*)scan_buf->units);
    g_free(scan_buf);
}

typedef struct ScanData {
    zad_float scan_size[2];
    zad_float vernier_z;
    GPtrArray *scan_bufs;
} ScanData;

static ScanData*
new_scan_data(void)
{
    ScanData *scan_data;
    scan_data = g_new0(ScanData, 1);
    scan_data->scan_bufs = g_ptr_array_new_with_free_func((GDestroyNotify)&free_scan_buf);
    return scan_data;
}

static void
free_scan_data(ScanData *scan_data)
{
    if (!scan_data)
        return;
    g_ptr_array_unref(scan_data->scan_bufs);
    g_free(scan_data);
}

#define scan_data_append_scan_buf(sd, buf) g_ptr_array_add((sd)->scan_bufs, (buf))
#define scan_data_num_scan_bufs(sd) (sd)->scan_bufs->len
#define scan_data_get_scan_buf(sd, i) (ScanBuf*)((sd)->scan_bufs->pdata[(i)])

typedef struct SystemData {
    zad_float calib_xyz[4];
    zad_float max_piezo_voltage;
    zad_float preamp_max_curr;
    zad_float vernier_z;
} SystemData;

/* Skip leading and trailing whitespace in a string segment.
 * The string segment is from @start to @end, where @end points to the first
 * character NOT in the string segment (which can be the NUL terminator).
 * @out_start is set to point to the first non-whitespace character in the string
 * segment.
 * @out_end is set to point to the first character after the last non-whitespace
 * character in the string segment.
 * @out_start == @out_end if the string segment contains only whitespace or
 * is empty. */
static inline void
skip_whitespace(const gchar* start, const char* end,
                const gchar **out_start, const gchar **out_end)
{
    while (start < end && g_ascii_isspace(*start))
        start++;
    do {
        end--;
    } while (start < end && g_ascii_isspace(*end));

    end++;
    *out_start = start;
    *out_end = end;
}

static inline void
err_DATA_TYPE_NAME(GError **error,
                   gchar *name)
{
    g_set_error(error, GWY_MODULE_FILE_ERROR, GWY_MODULE_FILE_ERROR_DATA,
                _("Data type `%s' is invalid or unsupported."), name);
}

static gint
detect_file(const GwyFileDetectInfo *fileinfo, gboolean only_name)
{
    const gchar *head, *scanz_data, *version;
    gint64 version_num;

    if (only_name)
        return g_str_has_suffix(fileinfo->name_lowercase, FILE_EXT) ? 20 : 0;

    /* File begins with "<?xml" */
    head = (const gchar*)fileinfo->head;
    if (!g_str_has_prefix(head, "<?xml"))
        return 0;

    /* Check for SCANZ_DATA element */
    scanz_data = strstr(head, "<SCANZ_DATA");
    if (!scanz_data)
        return 0;
    scanz_data += 11;
    if (!g_ascii_isspace(*scanz_data))
        return 0;
    scanz_data++;

    /* Check version attribute in SCANZ_DATA */
    version = strstr(scanz_data, "version");
    if (!version)
        return 0;
    version += 7;
    while (g_ascii_isspace(*version))
        version++;
    if (*version != '=')
        return 0;
    version++;
    while (g_ascii_isspace(*version))
        version++;
    if (*version != '\'' && *version != '"')
        return 0;
    version++;
    version_num = g_ascii_strtoll(version, NULL, 0);
    if (version_num < MIN_ZAD_VERSION || version_num > MAX_ZAD_VERSION)
        return 0;

    return 100;
}

/* Unpacks strings like "(x, y, ...)" or "[x, y, ...]" into a string array.
 * @str must begin and end with matching () or [] ignoring leading and trailing whitespace.
 * Interior of @str is tokenized on ',' and surrounding whitespace is trimmed from each token.
 * Returned string array must be freed with g_strfreev().
 * Returns NULL if @str is badly formatted. */
static gchar**
unpack_string_tuple(const gchar *str)
{
    gchar closer;
    const gchar *str_start, *str_end;
    const gchar *tok_start, *tok_end;
    gchar *token;
    GPtrArray *tokens;

    str_start = str;
    str_end = str + strlen(str);
    skip_whitespace(str_start, str_end, &str_start, &str_end);
    if ((str_end - str_start) < 2)
        return NULL;
    switch (*str_start) {
        case '(':
        closer = ')';
        break;

        case '[':
        closer = ']';
        break;

        default:
        return NULL;
    }
    if (*(str_end - 1) != closer)
        return NULL;
    str_start++;
    str_end--;

    tokens = g_ptr_array_new();
    tok_start = str_start;
    while (tok_start < str_end) {
        tok_end = tok_start;
        while (tok_end < str_end && *tok_end != ',')
            tok_end++;
        token = g_strndup(tok_start, tok_end - tok_start);
        token = g_strstrip(token);
        g_ptr_array_add(tokens, token);
        tok_start = tok_end + 1;
    }
    g_ptr_array_add(tokens, NULL);
    return (gchar**)g_ptr_array_free(tokens, FALSE);
}

static GArray*
copy_garray(const GArray *array)
{
    guint element_size = g_array_get_element_size((GArray*)array);
    GArray *new_array = g_array_sized_new(FALSE, FALSE, element_size, array->len);
    memcpy(new_array->data, array->data, element_size * array->len);
    return new_array;
}

/* Parse @str as zad_int. Ignore leading and trailing whitespace.
 * Report error on invalid characters in @str.
 * Store result in @out_value on success.
 * Returns FALSE on error. */
static gboolean
parse_int(const gchar* str, zad_int* out_value)
{
    const gchar *str_beg, *str_end, *endptr;
    zad_int value;

    str_beg = str;
    str_end = str + strlen(str);
    skip_whitespace(str_beg, str_end, &str_beg, &str_end);
    if (str_beg == str_end)
        return FALSE;
    value = g_ascii_strtoull(str_beg, (gchar**)&endptr, 0);
    if (endptr != str_end)
        return FALSE;
    *out_value = value;
    return TRUE;
}

/* Parse @str as zad_float. Ignore leading and trailing whitespace.
 * Report error on invalid characters in @str.
 * Store result in @out_value on success.
 * Returns FALSE on error. */
static gboolean
parse_float(const gchar* str, zad_float* out_value)
{
    const gchar *str_beg, *str_end, *endptr;
    zad_float value;

    str_beg = str;
    str_end = str + strlen(str);
    skip_whitespace(str_beg, str_end, &str_beg, &str_end);
    if (str_beg == str_end)
        return FALSE;
    value = g_ascii_strtod(str_beg, (gchar**)&endptr);
    if (endptr != str_end)
        return FALSE;
    *out_value = value;
    return TRUE;
}

/* Get string value of attribute @attr_name on XML @node.
 * Returns NULL if attribute is missing.
 * Returned string should be freed with g_free(). */
static gchar*
get_attr_string(xmlNodePtr node,
                const gchar* attr_name)
{
    gchar* value;
    xmlChar* xmlProp;

    xmlProp = xmlGetProp(node, (const xmlChar*)attr_name);
    if (!xmlProp)
        return NULL;
    value = g_strdup((gchar*)xmlProp);
    xmlFree(xmlProp);
    return value;
}

/* Get zad_int value of attribute @attr_name on XML @node.
 * If @out_missing is not NULL, it is set to TRUE if attribute is missing or FALSE otherwise.
 * Result is @default_value if attribute is missing.
 * Store result in @out_value.
 * Return FALSE on error. */
static gboolean
get_attr_int(xmlNodePtr node,
             const gchar* attr_name,
             zad_int default_value,
             zad_int *out_value,
             gboolean *out_missing)
{
    gchar *value_str;
    gboolean ret;

    value_str = get_attr_string(node, attr_name);
    if (!value_str) {
        *out_value = default_value;
        if (out_missing)
            *out_missing = TRUE;
        return TRUE;
    }
    if (out_missing)
        *out_missing = FALSE;
    ret = parse_int(value_str, out_value);
    g_free(value_str);
    return ret;
}

/* Get zad_float value of attribute @attr_name on XML @node.
 * If @out_missing is not NULL, it is set to TRUE if attribute is missing or FALSE otherwise.
 * Result is @default_value if attribute is missing.
 * Store result in @out_value.
 * Return FALSE on error. */
static gboolean
get_attr_float(xmlNodePtr node,
               const gchar* attr_name,
               zad_float default_value,
               zad_float *out_value,
               gboolean *out_missing)
{
    gchar *value_str;
    gboolean ret;

    value_str = get_attr_string(node, attr_name);
    if (!value_str) {
        *out_value = default_value;
        if (out_missing)
            *out_missing = TRUE;
        return TRUE;
    }
    if (out_missing)
        *out_missing = FALSE;
    ret = parse_float(value_str, out_value);
    g_free(value_str);
    return ret;
}

/* Get value of attribute @attr_name on XML @node as zad_int GArray.
 * Attribute string value must be of the form "(x, y, ...)" or "[x, y, ...]".
 * If @out_missing is not NULL, it is set to TRUE if attribute is missing or FALSE otherwise.
 * Result is copy of @default_values if attribute is missing. @default_values can be NULL.
 * Store result as a new GArray in @out_values.
 * Return FALSE on error. */
static gboolean
get_attr_int_array(xmlNodePtr node,
                   const gchar* attr_name,
                   const GArray* default_values,
                   GArray **out_values,
                   gboolean *out_missing)
{
    gchar *values_str;
    gchar **values_str_array;
    guint i, values_count;
    GArray *values;

    values_str = get_attr_string(node, attr_name);
    if (!values_str) {
        if (!default_values)
            *out_values = NULL;
        else
            *out_values = copy_garray(default_values);
        if (out_missing)
            *out_missing = TRUE;
        return TRUE;
    }
    if (out_missing)
        *out_missing = FALSE;

    values_str_array = unpack_string_tuple(values_str);
    g_free(values_str);
    if (!values_str_array)
        return FALSE;

    values_count = g_strv_length(values_str_array);
    values = g_array_sized_new(FALSE, FALSE, sizeof(zad_int), values_count);
    for (i = 0; i < values_count; i++) {
        zad_int value;
        if (!parse_int(values_str_array[i], &value)) {
            g_array_unref(values);
            g_strfreev(values_str_array);
            return FALSE;
        }
        g_array_append_val(values, value);
    }
    g_strfreev(values_str_array);

    *out_values = values;
    return TRUE;
}

/* Get value of attribute @attr_name on XML @node as zad_float GArray.
 * Attribute string value must be of the form "(x, y, ...)" or "[x, y, ...]".
 * If @out_missing is not NULL, it is set to TRUE if attribute is missing or FALSE otherwise.
 * Result is copy of @default_values if attribute is missing. @default_values can be NULL.
 * Store result as a new GArray in @out_values.
 * Return FALSE on error. */
static gboolean
get_attr_float_array(xmlNodePtr node,
                     const gchar* attr_name,
                     const GArray* default_values,
                     GArray **out_values,
                     gboolean *out_missing)
{
    gchar *values_str;
    gchar **values_str_array;
    guint i, values_count;
    GArray *values;

    values_str = get_attr_string(node, attr_name);
    if (!values_str) {
        if (!default_values)
            *out_values = NULL;
        else
            *out_values = copy_garray(default_values);
        if (out_missing)
            *out_missing = TRUE;
        return TRUE;
    }
    if (out_missing)
        *out_missing = FALSE;

    values_str_array = unpack_string_tuple(values_str);
    g_free(values_str);
    if (!values_str_array)
        return FALSE;

    values_count = g_strv_length(values_str_array);
    values = g_array_sized_new(FALSE, FALSE, sizeof(zad_float), values_count);
    for (i = 0; i < values_count; i++) {
        zad_float value;
        if (!parse_float(values_str_array[i], &value)) {
            g_array_unref(values);
            g_strfreev(values_str_array);
            return FALSE;
        }
        g_array_append_val(values, value);
    }
    g_strfreev(values_str_array);

    *out_values = values;
    return TRUE;
}

/* Find all elements under @node by @name and return them in a new nodeset.
 * Return NULL if no such elements were found or on error. */
static xmlNodeSetPtr
find_elems_by_name(xmlXPathContextPtr xpathCtx,
                   xmlNodePtr node,
                   const gchar* name)
{
    xmlXPathObjectPtr xpathObj;
    xmlNodeSetPtr nodeset;
    gsize loc_size = 3 + strlen(name) + 1;
    gchar loc[loc_size];

    gwy_clear(loc, loc_size);
    g_strlcat(loc, ".//", loc_size);
    g_strlcat(loc, name, loc_size);
    xpathObj = xmlXPathNodeEval(node, (xmlChar*)loc, xpathCtx);
    if (!xpathObj)
        return NULL;
    if (xpathObj->type != XPATH_NODESET) {
        xmlXPathFreeObject(xpathObj);
        return NULL;
    }
    nodeset = xpathObj->nodesetval;
    /* Despite its name, this function actually frees everything in a
     * XPathObject _except_ the nodesetval. */
    xmlXPathFreeNodeSetList(xpathObj);
    if (nodeset->nodeNr < 1) {
        xmlXPathFreeNodeSet(nodeset);
        return NULL;
    }
    return nodeset;
}

/* Find an element under @node by @name and return its node.
 * Return NULL if no such element was found or on error. */
static xmlNodePtr
find_elem_by_name(xmlXPathContextPtr xpathCtx,
                  xmlNodePtr node,
                  const gchar* name)
{
    xmlNodeSetPtr nodeset;
    xmlNodePtr found;

    nodeset = find_elems_by_name(xpathCtx, node, name);
    if (!nodeset)
        return NULL;
    found = nodeset->nodeTab[0];
    xmlXPathFreeNodeSet(nodeset);
    return found;
}

static gdouble
get_z_scale(const SystemData *system_data, const ScanData *scan_data, const ScanBuf *scan_buf)
{
    const gchar *units = scan_buf->units;

    if (scan_buf->has_scale_by)
        return scan_buf->scale_by;

    if (strequal(units, "nm")) {
        return scan_data->vernier_z / 10.0 * system_data->calib_xyz[2]
            * system_data->max_piezo_voltage / scan_buf->max_value;
    }
    else if (strequal(units, "nA")) {
        return system_data->preamp_max_curr / scan_buf->max_value;
    }
    else if (strequal(units, "dB")) {
        return log10(scan_buf->max_value) * 20.0 / scan_buf->max_value;
    }
    else if (strequal(units, "1/nm")) {
        /* 1/nm is used for d log(I) /d Z */
        return log(scan_buf->max_channel_value) * scan_buf->max_channel_value
            / scan_buf->max_value / system_data->max_piezo_voltage / system_data->calib_xyz[2];
    }
    else if (strequal(units, "DACU")) {
        return (gdouble)scan_buf->max_channel_value / scan_buf->max_value;
    }

    return 1.0;  /* unknown units */
}

static guchar*
decode_scan_buf_dataz64(const ScanBuf *scan_buf,
                        GError **error)
{
    xmlChar *dataz64;
    guchar *dataz, *data;
    gsize dataz_len, data_len, expected_data_len;
    const guint32 initial_crc = crc32(0, NULL, 0);
    guint32 computed_crc;

    /* Don't use get_attr_string because it makes a copy of the string.
     * dataz64 can be huge so we don't want unnecessary copies. */
    dataz64 = xmlGetProp(scan_buf->node, (const xmlChar*)"DataZ64");
    if (!dataz64) {
        err_MISSING_FIELD(error, "ScanBuf.DataZ64");
        return NULL;
    }

    dataz = g_base64_decode((const gchar*)dataz64, &dataz_len);
    xmlFree(dataz64);

    computed_crc = crc32(initial_crc, dataz, dataz_len);
    if (computed_crc != scan_buf->crc_dataz) {
        g_set_error(error, GWY_MODULE_FILE_ERROR, GWY_MODULE_FILE_ERROR_DATA,
                    _("Compressed data CRC mismatch for channel `%s'."), scan_buf->name);
        g_free(dataz);
        return NULL;
    }

    data_len = 0;
    data = gwyzlib_unpack_compressed_data(dataz, &dataz_len, NULL, &data_len, error);
    g_free(dataz);
    if (!data)
        return NULL;

    expected_data_len = scan_buf->dims[0] * scan_buf->dims[1] * scan_buf->data_item_size;
    if (err_SIZE_MISMATCH(error, expected_data_len, data_len, TRUE)) {
        g_free(data);
        return NULL;
    }

    computed_crc = crc32(initial_crc, data, data_len);
    if (computed_crc != scan_buf->crc_data) {
        g_set_error(error, GWY_MODULE_FILE_ERROR, GWY_MODULE_FILE_ERROR_DATA,
                    _("Data CRC mismatch for channel `%s'."), scan_buf->name);
        g_free(data);
        return NULL;
    }

    return data;
}

static ScanBuf*
read_scan_buf(xmlNodePtr node, GError **error)
{
    ScanBuf *scan_buf = NULL;
    gchar *name, *units, *dtype;
    GArray *dims_array = NULL;
    zad_int crc_data, crc_dataz;
    gboolean success, missing;

    name = get_attr_string(node, "ChanName");
    if (!name) {
        err_INVALID(error, "ScanBuf.ChanName");
        goto fail;
    }

    units = get_attr_string(node, "Units");
    if (!units) {
        /* Old ZAD files do not have the 'Units' attribute.
         * Luckily, their ChanType, which should also be the first part of ChanName,
         * can only be "Topo" or "Current". */
        if (g_str_has_prefix(name, "Topo")) {
            units = g_strdup("nm");
        }
        else if (g_str_has_prefix(name, "Current")) {
            units = g_strdup("nA");
        }
        else {
            /* Still allow loading the data but with unknown units */
            units = g_strdup("");
        }
    }
    else {
        /* Some versions of SCANZ erroneously saved Units in '' quotes. */
        gsize units_len = strlen(units);
        if (units_len >= 2 && units[0] == '\'' && units[units_len - 1] == '\'') {
            gchar *old_units = units;
            units = g_strndup(old_units + 1, units_len - 2);
            g_free(old_units);
        }
    }

    scan_buf = new_scan_buf(name, units);
    g_free(name);
    g_free(units);

    success = get_attr_int_array(node, "Dims", NULL, &dims_array, NULL);
    if (!success || !dims_array || dims_array->len != 2) {
        if (dims_array)
            g_array_unref(dims_array);
        err_INVALID(error, "ScanBuf.Dims");
        goto fail;
    }
    gwy_assign(scan_buf->dims, dims_array->data, 2);
    g_array_unref(dims_array);

    if (err_DIMENSION(error, scan_buf->dims[0]) || err_DIMENSION(error, scan_buf->dims[1]))
        goto fail;

    success = get_attr_int(node, "ValidRows", scan_buf->dims[1], &scan_buf->valid_rows, NULL);
    if (!success || scan_buf->valid_rows < 0) {
        err_INVALID(error, "ScanBuf.ValidRows");
        goto fail;
    }

    success = get_attr_int(node, "MaxValue", 0x7fff, &scan_buf->max_value, NULL);
    if (!success) {
        err_INVALID(error, "ScanBuf.MaxValue");
        goto fail;
    }

    success = get_attr_int(node, "MaxChannelValue", 0, &scan_buf->max_channel_value, &missing);
    if (!success) {
        err_INVALID(error, "ScanBuf.MaxChannelValue");
        goto fail;
    }
    if (missing) {
        if (scan_buf->max_value % 0x7fff == 0)
            scan_buf->max_channel_value = 0x7fff;
        else
            scan_buf->max_channel_value = 0x7ffff;
    }

    success = get_attr_float(node, "ScaleBy", 0.0, &scan_buf->scale_by, &missing);
    if (!success) {
        err_INVALID(error, "ScanBuf.ScaleBy");
        goto fail;
    }
    scan_buf->has_scale_by = !missing;

    dtype = get_attr_string(node, "DType");
    if (!dtype) {
        if (scan_buf->max_value < 0x8000) {
            scan_buf->data_type = GWY_RAW_DATA_SINT16;
            scan_buf->data_item_size = 2;
        }
        else {
            scan_buf->data_type = GWY_RAW_DATA_SINT32;
            scan_buf->data_item_size = 4;
        }
    }
    else if (strequal(dtype, "int16")) {
        scan_buf->data_type = GWY_RAW_DATA_SINT16;
        scan_buf->data_item_size = 2;
    }
    else if (strequal(dtype, "int32")) {
        scan_buf->data_type = GWY_RAW_DATA_SINT32;
        scan_buf->data_item_size = 4;
    }
    else if (strequal(dtype, "float32")) {
        scan_buf->data_type = GWY_RAW_DATA_FLOAT;
        scan_buf->data_item_size = 4;
    }
    else {
        err_DATA_TYPE_NAME(error, dtype);
        g_free(dtype);
        goto fail;
    }
    g_free(dtype);

    success = get_attr_int(node, "CRCA", 0, &crc_data, &missing);
    if (!success || missing) {
        err_INVALID(error, "ScanBuf.CRCA");
        goto fail;
    }
    scan_buf->crc_data = (guint64)crc_data & G_GUINT64_CONSTANT(0xFFFFFFFF);

    success = get_attr_int(node, "CRCC", 0, &crc_dataz, &missing);
    if (!success || missing) {
        err_INVALID(error, "ScanBuf.CRCC");
        goto fail;
    }
    scan_buf->crc_dataz = (guint64)crc_dataz & G_GUINT64_CONSTANT(0xFFFFFFFF);

    scan_buf->node = node;

    return scan_buf;

fail:
    free_scan_buf(scan_buf);
    return NULL;
}

static ScanData*
read_scan_data(int version,
               const SystemData *system_data,
               xmlNodePtr node,
               xmlXPathContextPtr xpathCtx,
               GError **error)
{
    ScanData *scan_data = new_scan_data();
    xmlNodeSetPtr scan_buf_nodeset = NULL;
    GArray *scan_size_array = NULL;
    gdouble sanitised_size;
    gboolean success, missing;
    gint i;

    success = get_attr_float_array(node, "ScanSize", NULL, &scan_size_array, NULL);
    if (!success || !scan_size_array || scan_size_array->len != 2) {
        if (scan_size_array)
            g_array_unref(scan_size_array);
        err_INVALID(error, "ScanData.ScanSize");
        goto fail;
    }
    gwy_assign(scan_data->scan_size, scan_size_array->data, 2);
    g_array_unref(scan_size_array);

    sanitised_size = scan_data->scan_size[0];
    sanitise_real_size(&sanitised_size, "x size");
    scan_data->scan_size[0] = sanitised_size;
    sanitised_size = scan_data->scan_size[1];
    sanitise_real_size(&sanitised_size, "y size");
    scan_data->scan_size[1] = sanitised_size;

    if (version == 5) {
        scan_data->vernier_z = system_data->vernier_z;
    }
    else {
        success = get_attr_float(node, "VernierZ", 10.0, &scan_data->vernier_z, &missing);
        if (!success) {
            err_INVALID(error, "ScanData.VernierZ");
            goto fail;
        }
    }
    /* VernierZ is disabled in SCANZ and defaults to 10.0 but some versions of
     * SCANZ erroneously defaulted to 0.5 after it was disabled. */
    if (scan_data->vernier_z == 0.5)
        scan_data->vernier_z = 10.0;

    scan_buf_nodeset = find_elems_by_name(xpathCtx, node, "ScanBuf");
    if (!scan_buf_nodeset) {
        err_NO_DATA(error);
        goto fail;
    }
    for (i = 0; i < scan_buf_nodeset->nodeNr; i++) {
        ScanBuf *scan_buf;
        scan_buf = read_scan_buf(scan_buf_nodeset->nodeTab[i], error);
        if (!scan_buf)
            goto fail;
        scan_data_append_scan_buf(scan_data, scan_buf);
    }

finish:
    if (scan_buf_nodeset)
        xmlXPathFreeNodeSet(scan_buf_nodeset);
    return scan_data;

fail:
    free_scan_data(scan_data);
    scan_data = NULL;
    goto finish;
}

static SystemData*
read_system_data(int version,
                 xmlNodePtr node,
                 GError **error)
{
    SystemData *system_data = NULL;
    GArray *calib_xyz_array = NULL;
    gboolean success, missing;

    system_data = g_new0(SystemData, 1);

    success = get_attr_float_array(node, "CalibXYZ", NULL, &calib_xyz_array, NULL);
    if (!success || !calib_xyz_array || calib_xyz_array->len != 4) {
        if (calib_xyz_array)
            g_array_unref(calib_xyz_array);
        err_INVALID(error, "SystemData.CalibXYZ");
        goto fail;
    }
    gwy_assign(system_data->calib_xyz, calib_xyz_array->data, 4);
    g_array_unref(calib_xyz_array);

    success = get_attr_float(node, "MaxPiezoVoltage", 150.0, &system_data->max_piezo_voltage, NULL);
    if (!success) {
        err_INVALID(error, "SystemData.MaxPiezoVoltage");
        goto fail;
    }

    success = get_attr_float(node, "PreampMaxCurr", 0.0, &system_data->preamp_max_curr, &missing);
    if (!success) {
        err_INVALID(error, "SystemData.PreampMaxCurr");
        goto fail;
    }
    if (missing) {
        zad_int preamp_gain;
        success = get_attr_int(node, "PreampGain", 0, &preamp_gain, &missing);
        if (!success) {
            err_INVALID(error, "SystemData.PreampGain");
            goto fail;
        }
        if (missing) {
            err_MISSING_FIELD(error, "SystemData.PreampMaxCurr");
            goto fail;
        }
        system_data->preamp_max_curr = gwy_exp10(10 - preamp_gain);
    }

    success = get_attr_float(node, "VernierZ", 0.0, &system_data->vernier_z, &missing);
    if (!success) {
        err_INVALID(error, "SystemData.VernierZ");
        goto fail;
    }
    if (version == 5 && missing) {
        err_MISSING_FIELD(error, "SystemData.VernierZ");
        goto fail;
    }

    return system_data;

fail:
    g_free(system_data);
    return NULL;
}

static GwyField*
create_field(const SystemData* system_data,
             const ScanData* scan_data,
             const ScanBuf* scan_buf,
             GError **error)
{
    guchar *data;
    GwyField *field;
    gint z_gwy_exp10;
    gdouble z_scale;

    z_scale = get_z_scale(system_data, scan_data, scan_buf);

    data = decode_scan_buf_dataz64(scan_buf, error);
    if (!data)
        return NULL;

    field = gwy_field_new(scan_buf->dims[0], scan_buf->dims[1],
                          scan_data->scan_size[0] * Nanometer,
                          scan_data->scan_size[1] * Nanometer,
                          FALSE);
    gwy_unit_set_from_string(gwy_field_get_unit_xy(field), "m");
    z_gwy_exp10 = gwy_unit_set_from_string(gwy_field_get_unit_z(field), scan_buf->units);
    z_scale *= gwy_exp10(z_gwy_exp10);  /* converts from prefixed to base SI units */
    gwy_convert_raw_data(data, scan_buf->dims[0] * scan_buf->dims[1], 1,
                         scan_buf->data_type, GWY_BYTE_ORDER_LITTLE_ENDIAN,
                         gwy_field_get_data(field), z_scale, 0.0);
    g_free(data);

    if (scan_buf->valid_rows < scan_buf->dims[1]) {
        /* Set invalid rows to NAN */
        gdouble *raw_data = gwy_field_get_data(field);
        gint i = scan_buf->valid_rows * scan_buf->dims[0];
        gint max_i = scan_buf->dims[0] * scan_buf->dims[1];
        while (i < max_i) {
            raw_data[i] = NAN;
            i++;
        }
    }

    return field;
}

static GwyFile*
load_file(const gchar *filename,
          G_GNUC_UNUSED GwyRunModeFlags mode,
          GError **error)
{
    GwyFile* file = NULL;
    xmlDocPtr doc = NULL;
    xmlXPathContextPtr xpathCtx = NULL;
    xmlNodePtr root, system_data_node, scan_data_node;
    SystemData *system_data = NULL;
    ScanData *scan_data = NULL;
    zad_int version;
    gboolean missing;
    guint i;

    doc = xmlReadFile(filename, NULL, XML_PARSE_HUGE | XML_PARSE_NOERROR | XML_PARSE_NOWARNING);
    if (!doc) {
        err_FILE_TYPE(error, FILE_TYPE_NAME);
        goto finish;
    }

    root = xmlDocGetRootElement(doc);
    if (!root || root->type != XML_ELEMENT_NODE || !strequal((const char*)root->name, "SCANZ_DATA")) {
        err_FILE_TYPE(error, FILE_TYPE_NAME);
        goto finish;
    }

    if (!get_attr_int(root, "version", 0, &version, &missing) || missing) {
        err_MISSING_FIELD(error, "version");
        goto finish;
    }
    if (version < MIN_ZAD_VERSION || version > MAX_ZAD_VERSION) {
        err_UNSUPPORTED(error, "version");
        goto finish;
    }

    xpathCtx = xmlXPathNewContext(doc);
    system_data_node = find_elem_by_name(xpathCtx, root, "SystemData");
    if (!system_data_node) {
        err_MISSING_FIELD(error, "SystemData");
        goto finish;
    }
    scan_data_node = find_elem_by_name(xpathCtx, root, "ScanData");
    if (!scan_data_node) {
        err_MISSING_FIELD(error, "ScanData");
        goto finish;
    }
    system_data = read_system_data(version, system_data_node, error);
    if (!system_data)
        goto finish;
    scan_data = read_scan_data(version, system_data, scan_data_node, xpathCtx, error);
    if (!scan_data)
        goto finish;

    /* read_scan_data() already checked for err_NO_DATA so no need to check here. */
    file = gwy_file_new_in_construction();
    for (i = 0; i < scan_data_num_scan_bufs(scan_data); i++) {
        ScanBuf *scan_buf = scan_data_get_scan_buf(scan_data, i);
        GwyField *field = create_field(system_data, scan_data, scan_buf, error);
        if (!field) {
            g_clear_object(&file);
            goto finish;
        }
        GwyNield *mask = gwy_mask_nans_in_field(field, TRUE);
        gwy_file_pass_image(file, i, field);
        gwy_file_set_title(file, GWY_FILE_IMAGE, i, (const guchar*)scan_buf->name, FALSE);
        if (mask)
            gwy_file_pass_image_mask(file, i, mask);
        gwy_check_nonsquare_image(file, i);
        gwy_log_add_import(file, GWY_FILE_IMAGE, i, NULL, filename);
    }

finish:
    free_scan_data(scan_data);
    g_free(system_data);
    if (xpathCtx)
        xmlXPathFreeContext(xpathCtx);
    if (doc)
        xmlFreeDoc(doc);
    return file;
}

/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
