/*
 *  $Id: serialize.c 28485 2025-09-03 08:07:07Z yeti-dn $
 *  Copyright (C) 2009-2025 David Nečas (Yeti).
 *  E-mail: yeti@gwyddion.net.
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include "config.h"
#include <string.h>
#include <glib/gi18n-lib.h>

#include "libgwyddion/gwyddion.h"
#include "libgwyddion/serializable-internal.h"

G_STATIC_ASSERT(GLIB_SIZEOF_SIZE_T == 8);
G_STATIC_ASSERT(sizeof(gdouble) == sizeof(guint64));
G_STATIC_ASSERT(sizeof(gfloat) == sizeof(guint32));
G_STATIC_ASSERT(G_BYTE_ORDER == G_LITTLE_ENDIAN || G_BYTE_ORDER == G_BIG_ENDIAN);

/* Maximum buffer size. A one large page, or something like that. For small objects we allocate small buffers because
 * we know the size. So this is the size to which large objects are chunked. */
enum {
    GWY_SERIALIZE_BUFFER_SIZE = 4*1024*1024,
};

/* These actually byte sizes because it is more convenient. */
typedef struct {
    GOutputStream *output;
    GError **error;
    gsize len;
    gsize bfree;
    guchar *data;
    GwySerializeSizeType sizetype;
} GwySerializeBuffer;

static gsize    calculate_sizes(GwySerializableGroup *group,
                                GwySerializeSizeType sizetype);
static gboolean dump_to_stream (const GwySerializableGroup *group,
                                GwySerializeBuffer *buffer);

static void
buffer_alloc(GwySerializeBuffer *buffer, gsize max_size)
{
    /* For small objects, allocate a buffer just large enough to hold the serialised data. For large ones, allocate
     * a large but reasonable buffer. */
    max_size = MIN(GWY_SERIALIZE_BUFFER_SIZE, max_size);

    if (max_size % 8 != 0)
        max_size += max_size % 8;

    buffer->len = buffer->bfree = max_size;
    buffer->data = g_malloc(max_size);
}

static gboolean
buffer_finish(GwySerializeBuffer *buffer)
{
    gsize size = buffer->len - buffer->bfree;

    if (!size)
        return TRUE;

    buffer->data -= size;
    buffer->bfree = buffer->len;
    return g_output_stream_write_all(buffer->output, buffer->data, size, NULL, NULL, buffer->error);
}

static void
buffer_dealloc(GwySerializeBuffer *buffer)
{
    buffer->data -= buffer->len - buffer->bfree;
    buffer->bfree = buffer->len;
    GWY_FREE(buffer->data);
}

static gboolean
buffer_write(GwySerializeBuffer *buffer,
             gconstpointer data,
             gsize size)
{
    while (size >= buffer->bfree) {
        memcpy(buffer->data, data, buffer->bfree);
        data = (const guchar*)data + buffer->bfree;
        size -= buffer->bfree;
        buffer->data -= buffer->len - buffer->bfree;
        buffer->bfree = buffer->len;
        if (!g_output_stream_write_all(buffer->output, buffer->data, buffer->len, NULL, NULL, buffer->error))
            return FALSE;
    }

    memcpy(buffer->data, data, size);
    buffer->data += size;
    buffer->bfree -= size;

    return TRUE;
}

static gboolean
buffer_write_size(GwySerializeBuffer *buffer,
                  gsize size)
{
    if (buffer->sizetype == GWY_SERIALIZE_SIZE_64BIT) {
        guint64 size_in_file = size;

        size_in_file = GUINT64_TO_LE(size_in_file);
        return buffer_write(buffer, &size_in_file, sizeof(guint64));
    }

    g_return_val_if_fail(buffer->sizetype == GWY_SERIALIZE_SIZE_32BIT, FALSE);
    if (size & G_GUINT64_CONSTANT(0xffffffff00000000)) {
        g_critical("A size is not representable by 32bit integer. Serialised data will be invalid.");
    }

    /* We basically do not care what happens when the value is not representable. All options mean a broken file. */
    guint32 size_in_file = size;

    size_in_file = GUINT32_TO_LE(size_in_file);
    return buffer_write(buffer, &size_in_file, sizeof(guint32));
}

static gboolean
buffer_write16(GwySerializeBuffer *buffer,
               const guint16 *data16,
               gsize n)
{
    if (G_BYTE_ORDER == G_LITTLE_ENDIAN)
        return buffer_write(buffer, data16, sizeof(guint16)*n);

    /* Only swap aligned data.
     * First, we do not want to mess with the leftover bytes, second, the mem-swapping instructions usually work much
     * better on aligned data.*/
    if (buffer->bfree % sizeof(guint16) != 0) {
        if (!buffer_finish(buffer))
            return FALSE;
    }

    while (n >= buffer->bfree/sizeof(guint16)) {
        guint16 *bdata16 = (guint16*)buffer->data;
        for (gsize i = buffer->bfree/sizeof(guint16); i; i--) {
            /* The default swapping macros evaulate the value multiple times. */
            guint16 v = *(data16++);
            *(bdata16++) = GUINT16_SWAP_LE_BE(v);
        }
        n -= buffer->bfree/sizeof(guint16);
        buffer->data -= buffer->len - buffer->bfree;
        buffer->bfree = buffer->len;
        if (!g_output_stream_write_all(buffer->output, buffer->data, buffer->len, NULL, NULL, buffer->error))
            return FALSE;
    }

    guint16 *bdata16 = (guint16*)buffer->data;
    for (gsize i = buffer->bfree/sizeof(guint16); i; i--) {
        /* The default swapping macros evaulate the value multiple times. */
        guint16 v = *(data16++);
        *(bdata16++) = GUINT16_SWAP_LE_BE(v);
    }
    buffer->data += sizeof(guint16)*n;
    buffer->bfree -= sizeof(guint16)*n;

    return TRUE;
}

static gboolean
buffer_write32(GwySerializeBuffer *buffer,
               const guint32 *data32,
               gsize n)
{
    if (G_BYTE_ORDER == G_LITTLE_ENDIAN)
        return buffer_write(buffer, data32, sizeof(guint32)*n);

    /* Only swap aligned data.
     * First, we do not want to mess with the leftover bytes, second, the mem-swapping instructions usually work much
     * better on aligned data.*/
    if (buffer->bfree % sizeof(guint32) != 0) {
        if (!buffer_finish(buffer))
            return FALSE;
    }

    while (n >= buffer->bfree/sizeof(guint32)) {
        guint32 *bdata32 = (guint32*)buffer->data;
        for (gsize i = buffer->bfree/sizeof(guint32); i; i--) {
            /* The default swapping macros evaulate the value multiple times. */
            guint32 v = *(data32++);
            *(bdata32++) = GUINT32_SWAP_LE_BE(v);
        }
        n -= buffer->bfree/sizeof(guint32);
        buffer->data -= buffer->len - buffer->bfree;
        buffer->bfree = buffer->len;
        if (!g_output_stream_write_all(buffer->output, buffer->data, buffer->len, NULL, NULL, buffer->error))
            return FALSE;
    }

    guint32 *bdata32 = (guint32*)buffer->data;
    for (gsize i = buffer->bfree/sizeof(guint32); i; i--) {
        /* The default swapping macros evaulate the value multiple times. */
        guint32 v = *(data32++);
        *(bdata32++) = GUINT32_SWAP_LE_BE(v);
    }
    buffer->data += sizeof(guint32)*n;
    buffer->bfree -= sizeof(guint32)*n;

    return TRUE;
}

static gboolean
buffer_write64(GwySerializeBuffer *buffer,
               const guint64 *data64,
               gsize n)
{
    if (G_BYTE_ORDER == G_LITTLE_ENDIAN)
        return buffer_write(buffer, data64, sizeof(guint64)*n);

    /* Only swap aligned data.
     * First, we do not want to mess with the leftover bytes, second, the mem-swapping instructions usually work much
     * better on aligned data.*/
    if (buffer->bfree % sizeof(guint64) != 0) {
        if (!buffer_finish(buffer))
            return FALSE;
    }

    while (n >= buffer->bfree/sizeof(guint64)) {
        guint64 *bdata64 = (guint64*)buffer->data;
        for (gsize i = buffer->bfree/sizeof(guint64); i; i--) {
            /* The default swapping macros evaulate the value multiple times. */
            guint64 v = *(data64++);
            *(bdata64++) = GUINT64_SWAP_LE_BE(v);
        }
        n -= buffer->bfree/sizeof(guint64);
        buffer->data -= buffer->len - buffer->bfree;
        buffer->bfree = buffer->len;
        if (!g_output_stream_write_all(buffer->output, buffer->data, buffer->len, NULL, NULL, buffer->error))
            return FALSE;
    }

    guint64 *bdata64 = (guint64*)buffer->data;
    for (gsize i = buffer->bfree/sizeof(guint64); i; i--) {
        /* The default swapping macros evaulate the value multiple times. */
        guint64 v = *(data64++);
        *(bdata64++) = GUINT64_SWAP_LE_BE(v);
    }
    buffer->data += sizeof(guint64)*n;
    buffer->bfree -= sizeof(guint64)*n;

    return TRUE;
}

/**
 * gwy_serialize_gio:
 * @serializable: A serialisable object.
 * @output: Output stream to write the serialised object to.
 * @error: Location to store the error occuring, %NULL to ignore.  Errors from %G_IO_ERROR domain can occur.
 *
 * Serialises an object to GIO stream in GWY format.
 *
 * The data writing employs internal buffering.  If the output stream is already buffered (e.g.,
 * #GBufferedOutputStream), the output will be unnecessarily buffered twice.
 *
 * Returns: %TRUE if the operation succeeded, %FALSE if it failed.
 **/
gboolean
gwy_serialize_gio(GwySerializable *serializable,
                  GOutputStream *output,
                  GError **error)
{
    g_return_val_if_fail(GWY_IS_SERIALIZABLE(serializable), FALSE);
    g_return_val_if_fail(G_IS_OUTPUT_STREAM(output), FALSE);

    GwySerializableGroup *group = gwy_serializable_itemize(serializable);

    gboolean ok = gwy_serialize_group_gio(group, GWY_SERIALIZE_SIZE_DEFAULT, output, error);

    gwy_serializable_group_done(group);
    gwy_serializable_done(serializable);
    gwy_serializable_group_free(group, FALSE, NULL);

    return ok;
}

/**
 * gwy_serialize_group_gio:
 * @group: Itemised object components.
 * @sizetype: Type of size representation.
 * @output: Output stream to write the serialised object to.
 * @error: Location to store the error occuring, %NULL to ignore.  Errors from %G_IO_ERROR domain can occur.
 *
 * Serialises itemised data to GIO stream in GWY format.
 *
 * This is low-level function, mean when @group was created or modified manually or the itemisation was run separately
 * for some other reason. Use gwy_serialize_gio() for normal object serialisation. Remember to call
 * gwy_serializable_group_done() on the group and gwy_serializable_done() on the top-level object if needed. This
 * function only carries out the serialisation to an output stream.
 *
 * The data writing employs internal buffering.  If the output stream is already buffered (e.g.,
 * #GBufferedOutputStream), the output will be unnecessarily buffered twice.
 *
 * Returns: %TRUE if the operation succeeded, %FALSE if it failed.
 **/
gboolean
gwy_serialize_group_gio(GwySerializableGroup *group,
                        GwySerializeSizeType sizetype,
                        GOutputStream *output,
                        GError **error)
{
    g_return_val_if_fail(group, FALSE);
    g_return_val_if_fail(sizetype == GWY_SERIALIZE_SIZE_32BIT || sizetype == GWY_SERIALIZE_SIZE_64BIT, FALSE);
    g_return_val_if_fail(G_IS_OUTPUT_STREAM(output), FALSE);

    calculate_sizes(group, sizetype);

    GwySerializeBuffer buffer;
    buffer.sizetype = sizetype;
    buffer_alloc(&buffer, group->size_bytes);
    buffer.output = output;
    buffer.error = error;

    gboolean ok = dump_to_stream(group, &buffer);
    buffer_dealloc(&buffer);

    return ok;
}

/**
 * ctype_size:
 * @ctype: Component type.
 *
 * Computes type size based on type letter.
 *
 * Returns: Size in bytes, 0 for arrays and other nonatomic types.
 **/
static inline gsize G_GNUC_CONST
ctype_size(GwySerializableCType ctype)
{
    if (ctype == GWY_SERIALIZABLE_INT8 || ctype == GWY_SERIALIZABLE_BOOLEAN)
        return sizeof(guint8);
    if (ctype == GWY_SERIALIZABLE_INT16)
        return sizeof(guint16);
    if (ctype == GWY_SERIALIZABLE_INT32)
        return sizeof(gint32);
    if (ctype == GWY_SERIALIZABLE_INT64)
        return sizeof(gint64);
    if (ctype == GWY_SERIALIZABLE_DOUBLE)
        return sizeof(gdouble);
    if (ctype == GWY_SERIALIZABLE_FLOAT)
        return sizeof(gfloat);
    return 0;
}

/* The value is returned for convenience, it permits us to declare item as const even when recusring, because we do
 * not access the fields after the size changes. */
static gsize
calculate_sizes(GwySerializableGroup *group,
                GwySerializeSizeType sizetype)
{
    gsize n = group->n;
    gsize size = strlen(group->name)+1 + sizetype;
    for (gsize i = 0; i < n; i++) {
        const GwySerializableItem *item = group->items + i;
        const GwySerializableCType ctype = item->ctype;
        gsize simple_type_size;

        size += strlen(item->name)+1 + 1;  /* Extra +1 is for ctype. */

        if ((simple_type_size = ctype_size(ctype)))
            size += simple_type_size;
        else if ((simple_type_size = ctype_size(g_ascii_tolower(ctype)))) {
            g_warn_if_fail(item->array_size != 0);
            size += simple_type_size*item->array_size + sizetype;
        }
        else if (ctype == GWY_SERIALIZABLE_STRING)
            size += strlen((const gchar*)item->value.v_string)+1;
        else if (ctype == GWY_SERIALIZABLE_OBJECT || ctype == GWY_SERIALIZABLE_BOXED) {
            g_assert(item->flags & GWY_SERIALIZABLE_IS_GROUP);
            size += calculate_sizes(item->value.v_group, sizetype);
        }
        else if (ctype == GWY_SERIALIZABLE_STRING_ARRAY) {
            size += sizetype;
            gsize alen = item->array_size;
            for (gsize j = 0; j < alen; j++)
                size += strlen((const gchar*)item->value.v_string_array[j])+1;
        }
        else if (ctype == GWY_SERIALIZABLE_OBJECT_ARRAY) {
            g_assert(item->flags & GWY_SERIALIZABLE_IS_GROUP);
            size += sizetype;
            gsize alen = item->array_size;
            GwySerializableGroup **groups = item->value.v_group_array;
            for (gsize j = 0; j < alen; j++)
                size += calculate_sizes(groups[j], sizetype);
        }
        else {
            g_return_val_if_reached(0);
        }
    }

    return group->size_bytes = size;
}

/**
 * dump_to_stream:
 * @group: Itemised object components.
 * @buffer: Serialization output buffer.
 *
 * Write itemised object tree list into an output stream.
 *
 * Byte-swapping and similar transforms are done on-the-fly, as necessary.
 *
 * Returns: %TRUE if the operation succeeded, %FALSE if it failed.
 **/
static gboolean
dump_to_stream(const GwySerializableGroup *group, GwySerializeBuffer *buffer)
{
    /* The size stored in GWY files exludes the name and itself. */
    gsize namelen = strlen(group->name) + 1;
    gsize size = group->size_bytes - namelen - buffer->sizetype;

    if (!buffer_write(buffer, group->name, namelen))
        return FALSE;
    if (!buffer_write_size(buffer, size))
        return FALSE;

    for (gsize i = 0; i < group->n; i++) {
        const GwySerializableItem *item = group->items + i;
        /* Use the single-byte type here to faciliate writing. */
        const guint8 ctype = item->ctype;
        gsize len = strlen(item->name) + 1;
        gboolean is_array = g_ascii_isupper(ctype);

        if (!buffer_write(buffer, item->name, len))
            return FALSE;

        if (!buffer_write(buffer, &ctype, sizeof(guint8)))
            return FALSE;

        if (is_array) {
            gsize array_size = item->array_size;
            g_warn_if_fail(array_size);

            if (!buffer_write_size(buffer, array_size))
                return FALSE;

            if (ctype == GWY_SERIALIZABLE_INT64_ARRAY || ctype == GWY_SERIALIZABLE_DOUBLE_ARRAY) {
                if (!buffer_write64(buffer, item->value.v_uint64_array, array_size))
                    return FALSE;
            }
            else if (ctype == GWY_SERIALIZABLE_STRING_ARRAY) {
                gchar **strings = item->value.v_string_array;
                for (gsize j = 0; j < array_size; j++) {
                    if (!buffer_write(buffer, strings[j], strlen(strings[j])+1))
                        return FALSE;
                }
            }
            else if (ctype == GWY_SERIALIZABLE_OBJECT_ARRAY) {
                g_return_val_if_fail(item->flags & GWY_SERIALIZABLE_IS_GROUP, FALSE);
                GwySerializableGroup **groups = item->value.v_group_array;
                for (gsize j = 0; j < array_size; j++) {
                    if (!dump_to_stream(groups[j], buffer))
                        return FALSE;
                }
            }
            else if (ctype == GWY_SERIALIZABLE_INT32_ARRAY || ctype == GWY_SERIALIZABLE_FLOAT_ARRAY) {
                if (!buffer_write32(buffer, item->value.v_uint32_array, array_size))
                    return FALSE;
            }
            else if (ctype == GWY_SERIALIZABLE_INT8_ARRAY) {
                if (!buffer_write(buffer, item->value.v_uint8_array, array_size))
                    return FALSE;
            }
            else if (ctype == GWY_SERIALIZABLE_INT16_ARRAY) {
                if (!buffer_write16(buffer, item->value.v_uint16_array, array_size))
                    return FALSE;
            }
            else {
                g_return_val_if_reached(FALSE);
            }
        }
        else if (ctype == GWY_SERIALIZABLE_OBJECT || ctype == GWY_SERIALIZABLE_BOXED) {
            g_return_val_if_fail(item->flags & GWY_SERIALIZABLE_IS_GROUP, FALSE);
            if (!dump_to_stream(item->value.v_group, buffer))
                return FALSE;
        }
        else if (ctype == GWY_SERIALIZABLE_INT64 || ctype == GWY_SERIALIZABLE_DOUBLE) {
            guint64 v = GUINT64_TO_LE(item->value.v_uint64);
            if (!buffer_write(buffer, &v, sizeof(guint64)))
                return FALSE;
        }
        else if (ctype == GWY_SERIALIZABLE_INT32 || ctype == GWY_SERIALIZABLE_FLOAT) {
            guint32 v = GUINT32_TO_LE(item->value.v_uint32);
            if (!buffer_write(buffer, &v, sizeof(guint32)))
                return FALSE;
        }
        else if (ctype == GWY_SERIALIZABLE_BOOLEAN) {
            guint8 v = !!item->value.v_boolean;
            if (!buffer_write(buffer, &v, sizeof(guint8)))
                return FALSE;
        }
        else if (ctype == GWY_SERIALIZABLE_STRING) {
            const gchar *s = item->value.v_string;
            if (!buffer_write(buffer, s, strlen(s)+1))
                return FALSE;
        }
        else if (ctype == GWY_SERIALIZABLE_INT8) {
            if (!buffer_write(buffer, &item->value.v_uint8, sizeof(guint8)))
                return FALSE;
        }
        else if (ctype == GWY_SERIALIZABLE_INT16) {
            guint16 v = GUINT16_TO_LE(item->value.v_uint16);
            if (!buffer_write(buffer, &v, sizeof(guint16)))
                return FALSE;
        }
        else {
            g_return_val_if_reached(FALSE);
        }
    }

    return buffer_finish(buffer);
}

/**
 * SECTION: serialize
 * @title: Serialize
 * @short_description: Object serialisation
 *
 * Functions available here at this moment implement the GWY binary data format, version 3.
 *
 * <refsect2>
 * <title>Details of Serialization</title>
 * <para>The following information is not necessary for implementing #GwySerializable interface in your classes, but
 * it can help prevent wrong decision about serialised representation of your objects.  Also, it might help
 * implementing a different serialisation backend than GWY files.</para>
 * <para>Serialization occurs in several steps.</para>
 * <para>First, all objects are recursively asked to represent themselves as #GwySerializableGroup, which is a list
 * of items. The groups can contain nested groups in @v_group item values to represent object- and boxed-valued
 * components. The creation of the items may require the allocation of temporary data structures, if the internal
 * object representation does not match the serialised representation. This step is independent on the serialisation
 * backend format.</para>
 * <para>Second, the top-level group is written to the output stream, recusring to contiained groups, byte-swapping or
 * otherwise normalizing the data on the fly if necessary. This part strongly depends on the storage format.</para>
 * <para>Finally, virtual method done() is called on all objects in the tree which define it.  This step frees the
 * temporary storage allocated in the itemisation step, if any. It is done in reverse order of which the objects have
 * been serialised, although this should not matter. The serialiser does the recursion. Hence, objects do not need to
 * implement done() if do not needed themselves but some of their member objects might. At the very end the groups are
 * freed too.</para>
 * </refsect2>
 **/

/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
