/*
 *  $Id: unit.c 28911 2025-11-24 18:27:42Z yeti-dn $
 *  Copyright (C) 2004-2025 David Necas (Yeti), Petr Klapetek.
 *  E-mail: yeti@gwyddion.net, klapetek@gwyddion.net.
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include "config.h"
#include <string.h>
#include <stdlib.h>

#include "libgwyddion/macros.h"
#include "libgwyddion/utils.h"
#include "libgwyddion/math.h"
#include "libgwyddion/enum.h"
#include "libgwyddion/unit.h"
#include "libgwyddion/serializable-utils.h"

#define TYPE_NAME "GwyUnit"

#define unit_index(u,i) g_array_index((u)->units,GwySimpleUnit,(i))

enum {
    SGNL_VALUE_CHANGED,
    NUM_SIGNALS
};

enum {
    GWY_UNIT_N_BASE = 7,
};

enum {
    ITEM_UNITSTR,
    NUM_ITEMS
};

typedef struct {
    GQuark unit;
    gshort power;
    gshort traits;
} GwySimpleUnit;

typedef void (*FormatPowerFunc)(GString *str, gint power);

typedef struct {
    FormatPowerFunc format_power;
    const gchar *multiplier;
    const gchar *unit_times;
    const gchar *unit_division;
    const gchar *power_unit_separator;
} GwySIStyleSpec;

typedef struct {
    guint len;
    const gchar *symbol;
    const gchar *name;
} GwyUnitLongName;

typedef struct {
    const gchar *unit;
    gint powers[GWY_UNIT_N_BASE];
    gdouble factor;
} GwyUnitDecomposition;

struct _GwyUnitPrivate {
    gint power10;
    GArray *units;

    gchar *ser_unitstr;
};

static void             finalize              (GObject *object);
static void             serializable_init     (GwySerializableInterface *iface);
static void             serializable_itemize  (GwySerializable *serializable,
                                               GwySerializableGroup *group);
static void             serializable_done     (GwySerializable *serializable);
static gboolean         serializable_construct(GwySerializable *serializable,
                                               GwySerializableGroup *group,
                                               GwyErrorList **error_list);
static GwySerializable* serializable_copy     (GwySerializable *serializable);
static void             serializable_assign   (GwySerializable *destination,
                                               GwySerializable *source);
static gboolean         parse                 (GwyUnit *siunit,
                                               const gchar *string);
static GwyUnit*         power_do              (GwyUnit *siunit,
                                               gint power,
                                               GwyUnit *result);
static GwyUnit*         canonicalize_do       (GwyUnit *siunit);
static gboolean         equal_do              (GwyUnit *siunit1,
                                               GwyUnit *siunit2,
                                               gboolean strict);
static gboolean         equal_direct          (GwyUnit *siunit1,
                                               GwyUnit *siunit2);
static const gchar*     format_prefix         (gint power);
static void             format_do             (GwyUnit *siunit,
                                               const GwySIStyleSpec *fs,
                                               GwyValueFormat *vf,
                                               gboolean magnitude_fixed);
static void             format_power_plain    (GString *string,
                                               gint power);
static void             format_power_pango    (GString *string,
                                               gint power);
static void             format_power_TeX      (GString *string,
                                               gint power);
static void             format_power_unicode  (GString *string,
                                               gint power);

/* Canonical form must be always first, because this table is used for reverse mapping too */
static const GwyEnum SI_prefixes[] = {
    { "k",     3  },
    { "c",    -2  },
    { "d",    -1  },
    { "m",    -3  },
    { "M",     6  },
    { "µ",    -6  },
    /* People are extremely creative when it comes to µ replacements... */
    { "μ",    -6  },
    { "~",    -6  },
    { "u",    -6  },
    { "G",     9  },
    { "n",    -9  },
    { "T",     12 },
    { "p",    -12 },
    { "P",     15 },
    { "f",    -15 },
    { "E",     18 },
    { "a",    -18 },
    { "Z",     21 },
    { "z",    -21 },
    { "Y",     24 },
    { "y",    -24 },
};

/* Units that can conflict with prefixes */
static const gchar *known_units[] = {
    "deg", "Pa", "cd", "mol", "cal", "px", "pt", "cps", "cts", "Gy", "Gauss"
};

/* XXX: The base unit SI is kilogram.  But there is no way we can generally keep the kilo prefix there, so we
 * decompose to prefixable units -- and prefixable unit is gram.  Yes, this means we are not techncally decomposing to
 * *base* SI units.  */
static const gchar *base_si_units[GWY_UNIT_N_BASE] = {
    "m", "g", "s", "A", "K", "mol", "cd",
};

static const GwyUnitDecomposition derived_units[] = {
    /*           m   g   s   A   K mol  cd   */
    { "Hz",   {  0,  0, -1,  0,  0,  0,  0 }, 1.0            },
    { "N",    {  1,  1, -2,  0,  0,  0,  0 }, 1.0e3          },
    { "Pa",   { -1,  1, -2,  0,  0,  0,  0 }, 1.0e3          },
    { "J",    {  2,  1, -2,  0,  0,  0,  0 }, 1.0e3          },
    { "eV",   {  2,  1, -2,  0,  0,  0,  0 }, 1.60217653e-16 },
    { "W",    {  2,  1, -3,  0,  0,  0,  0 }, 1.0e3          },
    { "C",    {  0,  0,  1,  1,  0,  0,  0 }, 1.0            },
    { "V",    {  2,  1, -3, -1,  0,  0,  0 }, 1.0e3          },
    { "F",    { -2, -1,  4,  2,  0,  0,  0 }, 1.0e-3         },
    { "H",    {  2,  1, -2, -2,  0,  0,  0 }, 1.0e3          },
    { "Ω",    {  2,  1, -3, -2,  0,  0,  0 }, 1.0e3          },
    { "S",    { -2, -1,  3,  2,  0,  0,  0 }, 1.0e-3         },
    { "T",    {  0,  1, -2, -1,  0,  0,  0 }, 1.0e3          },
    { "Wb",   {  2,  1, -2, -1,  0,  0,  0 }, 1.0e3          },
    { "Bq",   {  0,  0, -1,  0,  0,  0,  0 }, 1.0            },
    { "Sv",   {  2,  0, -2,  0,  0,  0,  0 }, 1.0            },
    { "Gy",   {  2,  0, -2,  0,  0,  0,  0 }, 1.0            },
};

/* Unit formats */
static const GwySIStyleSpec format_style_plain = {
    &format_power_plain, NULL, " ", "/", " "
};
static const GwySIStyleSpec format_style_markup = {
    &format_power_pango, NULL, " ", "/", " "
};
static const GwySIStyleSpec format_style_vfmarkup = {
    &format_power_pango, "× ", " ", "/", " "
};
static const GwySIStyleSpec format_style_TeX = {
    &format_power_TeX, NULL, "\\,", "/", "\\,"
};
static const GwySIStyleSpec format_style_vfTeX = {
    &format_power_TeX, "\\times", "\\,", "/", "\\,"
};
static const GwySIStyleSpec format_style_unicode = {
    &format_power_unicode, NULL, " ", "/", " "
};
static const GwySIStyleSpec format_style_vfunicode = {
    &format_power_unicode, "× ", " ", "/", " "
};

static const GwySIStyleSpec *format_styles[] = {
    NULL,
    &format_style_plain,
    &format_style_markup,
    &format_style_vfmarkup,
    &format_style_TeX,
    &format_style_vfTeX,
    &format_style_unicode,
    &format_style_vfunicode,
};

static guint signals[NUM_SIGNALS];
static GObjectClass *parent_class = NULL;

static const GwySerializableItem serializable_items[NUM_ITEMS] = {
    { .name = "unitstr", .ctype = GWY_SERIALIZABLE_STRING, },
};

G_DEFINE_TYPE_WITH_CODE(GwyUnit, gwy_unit, G_TYPE_OBJECT,
                        G_ADD_PRIVATE(GwyUnit)
                        G_IMPLEMENT_INTERFACE(GWY_TYPE_SERIALIZABLE, serializable_init))

static void
serializable_init(GwySerializableInterface *iface)
{
    iface->itemize   = serializable_itemize;
    iface->done      = serializable_done;
    iface->construct = serializable_construct;
    iface->copy      = serializable_copy;
    iface->assign    = serializable_assign;
}

static void
gwy_unit_class_init(GwyUnitClass *klass)
{
    GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
    GType type = G_TYPE_FROM_CLASS(klass);

    parent_class = gwy_unit_parent_class;

    gobject_class->finalize = finalize;

    /**
     * GwyUnit::value-changed:
     * @gwysiunit: The #GwyUnit which received the signal.
     *
     * The ::value-changed signal is emitted whenever unit changes.
     */
    signals[SGNL_VALUE_CHANGED] = g_signal_new("value-changed", type,
                                               G_SIGNAL_RUN_FIRST,
                                               G_STRUCT_OFFSET(GwyUnitClass, value_changed),
                                               NULL, NULL,
                                               g_cclosure_marshal_VOID__VOID,
                                               G_TYPE_NONE, 0);
    g_signal_set_va_marshaller(signals[SGNL_VALUE_CHANGED], type, g_cclosure_marshal_VOID__VOIDv);
}

#if 0
static void
debug_print_unit(GArray *units, const gchar *name)
{
    guint i;

    g_printerr("%s: ", name);
    for (i = 0; i < units->len; i++) {
        g_printerr(" %s(%d)",
                   g_quark_to_string(g_array_index(units, GwySimpleUnit, i).unit),
                   g_array_index(units, GwySimpleUnit, i).power);
    }
    g_printerr("\n");
}
#endif

static void
gwy_unit_init(GwyUnit *siunit)
{
    GwyUnitPrivate *priv;

    priv = siunit->priv = gwy_unit_get_instance_private(siunit);
    priv->units = g_array_new(FALSE, FALSE, sizeof(GwySimpleUnit));
}

static void
finalize(GObject *object)
{
    GwyUnit *siunit = (GwyUnit*)object;

    GWY_FREE_ARRAY(siunit->priv->units);

    G_OBJECT_CLASS(parent_class)->finalize(object);
}

/**
 * gwy_unit_new: (constructor)
 * @unit_string: (nullable):
 *               Unit string (it can be %NULL for an empty unit).
 *
 * Creates a new unit from string representation.
 *
 * Unit string represents unit with no prefixes (e. g. "m", "N", "A", etc.)
 *
 * Returns: (transfer full):
 *          A new unit.
 **/
GwyUnit*
gwy_unit_new(const char *unit_string)
{
    return gwy_unit_new_parse(unit_string, NULL);
}

/**
 * gwy_unit_new_parse:
 * @unit_string: (nullable): Unit string (it can be %NULL for an empty unit).
 * @power10: (out) (optional): Where power of 10 should be stored (or %NULL).
 *
 * Creates a new unit from string representation.
 *
 * This is a more powerful version of gwy_unit_new(): @unit_string may be a relatively complex unit, with prefixes,
 * like "pA/s" or "km^2". Beside conversion to a base unit like "A/s" or "m^2" it also computes the power of 10 one
 * has to multiply the base unit with to get an equivalent of @unit_string.
 *
 * For example, for <literal>"pA/s"</literal> it will store -12 to @power10 because 1 pA/s is 1e-12 A/s, for
 * <literal>"km^2"</literal> it will store 6 to @power10 because 1 km^2 is 1e6 m^2.
 *
 * Returns: (transfer full):
 *          A new unit.
 **/
GwyUnit*
gwy_unit_new_parse(const char *unit_string, gint *power10)
{
    GwyUnit *siunit = g_object_new(GWY_TYPE_UNIT, NULL);
    parse(siunit, unit_string);
    if (power10)
        *power10 = siunit->priv->power10;

    return siunit;
}

/**
 * gwy_unit_clear:
 * @siunit: A unit.
 *
 * Sets a unit to empty.
 **/
void
gwy_unit_clear(GwyUnit *siunit)
{
    g_return_if_fail(GWY_IS_UNIT(siunit));
    GwyUnitPrivate *priv = siunit->priv;
    if (!priv->units->len)
        return;

    g_array_set_size(priv->units, 0);
    g_signal_emit(siunit, signals[SGNL_VALUE_CHANGED], 0);
}

/**
 * gwy_unit_set_from_string:
 * @siunit: A unit.
 * @unit_string: (nullable): Unit string to set @siunit from.
 *
 * Changes an unit according to string representation.
 *
 * See gwy_unit_new_parse() for a discussion of the returned power of 10.
 *
 * The string can be %NULL to make the unit empty, but you can just use gwy_unit_clear() then.
 *
 * Returns: The power of 10 corresponding to @unit_string.
 **/
gint
gwy_unit_set_from_string(GwyUnit *siunit,
                         const gchar *unit_string)
{
    g_return_val_if_fail(GWY_IS_UNIT(siunit), 0);

    parse(siunit, unit_string);
    gint retval = siunit->priv->power10;
    g_signal_emit(siunit, signals[SGNL_VALUE_CHANGED], 0);
    return retval;
}

static inline const GwySIStyleSpec*
find_style_spec(GwyUnitFormatStyle style)
{
    if (style == GWY_UNIT_FORMAT_NONE || (guint)style >= G_N_ELEMENTS(format_styles)) {
        g_critical("Invalid format style %d.", style);
        style = GWY_UNIT_FORMAT_PLAIN;
    }

    return format_styles[style];
}

/**
 * gwy_unit_get_string:
 * @siunit: A unit.
 * @style: Unit format style.
 *
 * Obtains string representing a unit.
 *
 * Returns: A newly allocated string that represents the base unit (with no prefixes).
 **/
gchar*
gwy_unit_get_string(GwyUnit *siunit, GwyUnitFormatStyle style)
{
    g_return_val_if_fail(GWY_IS_UNIT(siunit), NULL);

    GwyUnitPrivate *priv = siunit->priv;
    priv->power10 = 0;
    GwyValueFormat vf;
    vf.magnitude = 1.0;
    vf.precision = 3;
    vf.units_gstring = g_string_new(NULL);

    format_do(siunit, find_style_spec(style), &vf, TRUE);

    return g_string_free(vf.units_gstring, FALSE);
}

/* Be more adverse to powers of 10 when we have plain numbers. */
static void
unpower_format_for_plain_numbers(GwyValueFormat *vf, gdouble value, GwyUnit *siunit)
{
    if (siunit->priv->units->len || vf->magnitude == 1.0)
        return;

    if (vf->magnitude == 1e-3 && value >= 1e-3) {
        vf->magnitude = 1.0;
        vf->precision += 3;
    }
}

/**
 * gwy_unit_get_format_for_power10:
 * @siunit: A unit.
 * @style: Unit format style.
 * @power10: Power of 10, in the same sense as gwy_unit_new_parse() returns it.
 * @format: (nullable): A value format to set-up, may be %NULL, a new value format is allocated then.
 *
 * Finds format for representing a specific power-of-10 multiple of a unit.
 *
 * The values should be then printed as value/@format->magnitude [@format->units] with @format->precision decimal
 * places.
 *
 * This function does not change the precision field of @format.
 *
 * Returns: The value format.  If @format was %NULL, a newly allocated format is returned, otherwise (modified)
 *          @format itself is returned.
 **/
GwyValueFormat*
gwy_unit_get_format_for_power10(GwyUnit *siunit,
                                GwyUnitFormatStyle style,
                                gint power10,
                                GwyValueFormat *format)
{
    g_return_val_if_fail(GWY_IS_UNIT(siunit), NULL);

    const GwySIStyleSpec *spec = find_style_spec(style);
    if (!format) {
        format = (GwyValueFormat*)g_new0(GwyValueFormat, 1);
        format->units_gstring = g_string_new(NULL);
    }

    GwyUnitPrivate *priv = siunit->priv;
    priv->power10 = power10;
    format->magnitude = gwy_exp10(power10);
    format_do(siunit, spec, format, TRUE);

    return format;
}

/**
 * gwy_unit_get_format:
 * @siunit: A unit.
 * @style: Unit format style.
 * @value: Value the format should be suitable for.
 * @format: (nullable): A value format to set-up, may be %NULL, a new value format is allocated then.
 *
 * Finds a good format for representing a value.
 *
 * The values should be then printed as value/@format->magnitude [@format->units] with @format->precision decimal
 * places.
 *
 * Returns: The value format.  If @format was %NULL, a newly allocated format is returned, otherwise (modified)
 *          @format itself is returned.
 **/
GwyValueFormat*
gwy_unit_get_format(GwyUnit *siunit,
                    GwyUnitFormatStyle style,
                    gdouble value,
                    GwyValueFormat *format)
{
    g_return_val_if_fail(GWY_IS_UNIT(siunit), NULL);

    const GwySIStyleSpec *spec = find_style_spec(style);
    if (!format) {
        format = (GwyValueFormat*)g_new0(GwyValueFormat, 1);
        format->units_gstring = g_string_new(NULL);
    }

    value = fabs(value);
    if (!value) {
        format->magnitude = 1;
        format->precision = 2;
    }
    else {
        format->magnitude = gwy_math_humanize_numbers(value/36, value, &format->precision);
        unpower_format_for_plain_numbers(format, value, siunit);
    }

    GwyUnitPrivate *priv = siunit->priv;
    priv->power10 = GWY_ROUND(log10(format->magnitude));
    format_do(siunit, spec, format, FALSE);

    return format;
}

/**
 * gwy_unit_get_format_with_resolution:
 * @siunit: A unit.
 * @style: Unit format style.
 * @maximum: The maximum value to be represented.
 * @resolution: The smallest step (approximately) that should make a visible difference in the representation.
 * @format: (nullable): A value format to set-up, may be %NULL, a new value format is allocated then.
 *
 * Finds a good format for representing a range of values with given resolution.
 *
 * The values should be then printed as value/@format->magnitude [@format->units] with @format->precision decimal
 * places.
 *
 * Returns: The value format.  If @format was %NULL, a newly allocated format is returned, otherwise (modified)
 *          @format itself is returned.
 **/
GwyValueFormat*
gwy_unit_get_format_with_resolution(GwyUnit *siunit,
                                    GwyUnitFormatStyle style,
                                    gdouble maximum,
                                    gdouble resolution,
                                    GwyValueFormat *format)
{
    g_return_val_if_fail(GWY_IS_UNIT(siunit), NULL);

    const GwySIStyleSpec *spec = find_style_spec(style);
    if (!format) {
        format = (GwyValueFormat*)g_new0(GwyValueFormat, 1);
        format->units_gstring = g_string_new(NULL);
    }

    maximum = fabs(maximum);
    resolution = fabs(resolution);
    if (!maximum) {
        format->magnitude = 1;
        format->precision = 2;
    }
    else {
        format->magnitude = gwy_math_humanize_numbers(resolution, maximum, &format->precision);
        unpower_format_for_plain_numbers(format, maximum, siunit);
    }

    GwyUnitPrivate *priv = siunit->priv;
    priv->power10 = GWY_ROUND(log10(format->magnitude));
    format_do(siunit, spec, format, FALSE);

    return format;
}

/**
 * gwy_unit_get_format_with_digits:
 * @siunit: A unit.
 * @style: Unit format style.
 * @maximum: The maximum value to be represented.
 * @sdigits: The number of significant digits the value should have.
 * @format: (nullable): A value format to set-up, may be %NULL, a new value format is allocated then.
 *
 * Finds a good format for representing a values with given number of significant digits.
 *
 * The values should be then printed as value/@format->magnitude [@format->units] with @format->precision decimal
 * places.
 *
 * Returns: The value format.  If @format was %NULL, a newly allocated format is returned, otherwise (modified)
 *          @format itself is returned.
 **/
GwyValueFormat*
gwy_unit_get_format_with_digits(GwyUnit *siunit,
                                GwyUnitFormatStyle style,
                                gdouble maximum,
                                gint sdigits,
                                GwyValueFormat *format)
{
    g_return_val_if_fail(GWY_IS_UNIT(siunit), NULL);

    const GwySIStyleSpec *spec = find_style_spec(style);
    if (!format) {
        format = (GwyValueFormat*)g_new0(GwyValueFormat, 1);
        format->units_gstring = g_string_new(NULL);
    }

    maximum = fabs(maximum);
    if (!maximum) {
        format->magnitude = 1;
        format->precision = sdigits;
    }
    else {
        format->magnitude = gwy_math_humanize_numbers(maximum/gwy_exp10(sdigits), maximum, &format->precision);
        unpower_format_for_plain_numbers(format, maximum, siunit);
    }

    GwyUnitPrivate *priv = siunit->priv;
    priv->power10 = GWY_ROUND(log10(format->magnitude));
    format_do(siunit, spec, format, FALSE);

    return format;
}

/**
 * gwy_unit_equal:
 * @siunit: First unit.
 * @siunit2: Second unit.
 *
 * Checks whether two units are equal.
 *
 * Returns: %TRUE if the units are equal.
 **/
gboolean
gwy_unit_equal(GwyUnit *siunit1, GwyUnit *siunit2)
{
    /* Here we do not want eV to be equal to J. */
    return equal_do(siunit1, siunit2, TRUE);
}

/**
 * gwy_unit_equal_string:
 * @siunit: A unit.
 * @unit_string: (nullable): Unit string (it can be %NULL for an empty unit).
 *
 * Checks whether an unit corresponds to given string.
 *
 * Any power-of-ten prefixes are ignored.  This function is mostly useful for quick commensurability checks with
 * simple units such as "m" and for checking whether a unit is non-empty (by comparing with %NULL or an empty string).
 *
 * Returns: %TRUE if the units is equivalent to the given string.
 **/
gboolean
gwy_unit_equal_string(GwyUnit *siunit,
                      const gchar *unit_string)
{
    g_return_val_if_fail(GWY_IS_UNIT(siunit), FALSE);

    GwyUnitPrivate *priv = siunit->priv;
    if (!unit_string || !*unit_string)
        return !priv->units->len;

    if (priv->units->len == 1) {
        GwySimpleUnit *unit = &unit_index(priv, 0);
        if (gwy_strequal(g_quark_to_string(unit->unit), unit_string))
            return TRUE;
        /* If they differ we cannot make any conclusion. */
    }

    GwyUnit *tmpunit = gwy_unit_new_parse(unit_string, NULL);
    gboolean retval = equal_do(siunit, tmpunit, FALSE);
    g_object_unref(tmpunit);

    return retval;
}

static const GwyUnitLongName*
check_for_long_name(const gchar *s, guint len,
                    const GwyUnitLongName *longnames, guint n,
                    gboolean exact_match)
{
    guint i;

    for (i = 0; i < n; i++) {
        const gchar *name = longnames[i].name;
        guint ll = longnames[i].len;

        if (ll > len)
            return NULL;

        if (exact_match) {
            if (ll != len)
                continue;
            if (memcmp(s, name, len) == 0)
                return longnames + i;
        }
        else {
            if (g_ascii_strncasecmp(s, name, ll) == 0)
                return longnames + i;
        }
    }

    return NULL;
}

/* fix all kinds of sloppy and strange notations */
static void
fix_unit_name(GString *str)
{
    /* Keep the lists sorted by length so that we can give up quickly and only attempt to translate long names. */

    /* Special silly names which cannot take prefixes and we only accept as exact matches. */
    static const GwyUnitLongName odd_names[] = {
        { 1, "Å",   "\xc5",    },
        { 1, "deg", "\xb0",    },
        { 1, "deg", "\xba",    },
        { 2, "deg", "°",       },
        { 2, "Å",   "AA",      },
        { 2, "Å",   "Å",       },
        { 3, "Å",   "Ang",     },
        { 3, "Å",   "ang",     },
        { 3, "",    "arb",     },
        { 3, "µm",  "mum",     },
        { 4, "",    "a.u.",    },
        { 6, "",    "counts",  },
        { 6, "µm",  "micron",  },
        { 7, "µm",  "microns", },
    };

    /* Long names and their prefixes. */
    static const GwyUnitLongName long_names[] = {
        { 3, "s",   "sec",      },
        { 3, "deg", "Deg",      },
        { 3, "Ω",   "Ohm",      },
        { 4, "V",   "Volt",     },
        { 4, "W",   "Watt",     },
        { 5, "Hz",  "Hertz",    },
        { 5, "F",   "Farad",    },
        { 5, "H",   "Henry",    },
        { 5, "J",   "Joule",    },
        { 5, "m",   "meter",    },
        { 5, "m",   "metre",    },
        { 5, "px",  "pixel",    },
        { 5, "T",   "Tesla",    },
        { 5, "Wb",  "Weber",    },
        { 6, "A",   "Ampere",   },
        { 6, "deg", "degree",   },
        { 6, "K",   "Kelvin",   },
        { 6, "N",   "Newton",   },
        { 6, "Pa",  "Pascal",   },
        { 6, "rad", "radian",   },
        { 6, "s",   "second",   },
        { 7, "cd",  "candela",  },
        { 7, "S",   "Siemens",  },
        { 8, "Å",   "Angstrom", },
    };

    static const GwyUnitLongName long_pfxes[] = {
        { 4, "a", "atto", },
        { 4, "G", "giga", },
        { 4, "k", "kilo", },
        { 4, "M", "mega", },
        { 4, "n", "nano", },
        { 4, "P", "peta", },
        { 4, "p", "pico", },
        { 4, "T", "tera", },
        { 5, "c", "centi", },
        { 5, "f", "femto", },
        { 5, "µ", "micro", },
        { 5, "m", "milli", },
    };

    const GwyUnitLongName *xname;
    const gchar *s = str->str, *prefix = "";
    guint len = str->len;

    if (!len)
        return;

    /* b0 = degree sign
     * ba = masculine ordinal indicator (yes, people use it for degrees) */
    if (len > 1 && (s[0] == '\xb0' || s[0] == '\xba')) {
        g_string_erase(str, 0, 1);
        g_string_prepend(str, "°");
        return;
    }

    /* Fix silly names. */
    if ((xname = check_for_long_name(s, len, odd_names, G_N_ELEMENTS(odd_names), TRUE))) {
        g_string_assign(str, xname->symbol);
        return;
    }

    /* Fix long names, possibly prefixed. */
    if ((xname = check_for_long_name(s, len, long_pfxes, G_N_ELEMENTS(long_pfxes), FALSE))) {
        prefix = xname->symbol;
        s += xname->len;
        len -= xname->len;
    }
    if ((xname = check_for_long_name(s, len, long_names, G_N_ELEMENTS(long_names), FALSE))
        && (xname->len == len || (xname->len+1 == len && g_ascii_tolower(s[len-1]) == 's'))) {
        g_string_assign(str, prefix);
        g_string_append(str, xname->symbol);
    }
}

static gboolean
parse_exponent_unicode(const gchar *s, gint *n, const gchar **end)
{
    static const guint lengths[10] = { 3, 2, 2, 2, 3, 3, 3, 3, 3, 3 };
    static const gchar digits[] = "⁰¹²³⁴⁵⁶⁷⁸⁹";
    const gchar *p = s;
    guint k, pos, diglen;
    gint i = 0, sign = 1;

    if (strncmp(p, "⁻", 3) == 0) {
        sign = -1;
        p += 3;
        /* Do not count standalone minus as a number. */
        s = p;
    }

    while (*p) {
        for (k = pos = 0; k < 10; k++) {
            diglen = lengths[k];
            if (strncmp(p, digits + pos, diglen) == 0) {
                i = 10*i + k;
                p += diglen;
                break;
            }
            pos += diglen;
        }
        if (k == 10)
            break;
    }

    if (p == s)
        return FALSE;

    *n = i*sign;
    if (end)
        *end = p;
    return TRUE;
}

static const gchar*
parse_numerical_multiplier(GwyUnit *siunit, const gchar *string, gint sign)
{
    const gchar *end;
    gint n, power10 = 0;
    gdouble q = g_ascii_strtod(string, (gchar**)&end);

    /* Unfortunately, g_ascii_strtod() can parses Nanometre as NaN + ometre.
     * Accept only known numbers as multipliers. */
    if (end != string && !gwy_isinf(q) && !gwy_isnan(q)) {
        string = end;
        power10 = GWY_ROUND(log10(q));
        if (q <= 0 || fabs(log(q/gwy_exp10(power10))) > 1e-13) {
            /* g_warning("Bad multiplier %g", q); */
            power10 = 0;
        }
        else if (g_str_has_prefix(string, "<sup>")) {
            string += strlen("<sup>");
            n = strtol(string, (gchar**)&end, 10);
            if (end == string) {
                /* g_warning("Bad exponent %s", string); */
            }
            else if (!g_str_has_prefix(end, "</sup>")) {
                /* g_warning("Expected </sup> after exponent"); */
            }
            else
                power10 *= n;
            string = end;
        }
        else if (string[0] == '^') {
            string++;
            n = strtol(string, (gchar**)&end, 10);
            if (end == string) {
                /* g_warning("Bad exponent %s", string); */
            }
            else
                power10 *= n;
            string = end;
        }
        else if (parse_exponent_unicode(string, &n, &end)) {
            power10 *= n;
            string = end;
        }
    }

    siunit->priv->power10 += sign*power10;
    return string;
}

static gboolean
parse(GwyUnit *siunit, const gchar *string)
{
    gchar *p, *e;
    gboolean dividing = FALSE, beginning = TRUE;
    gchar *utf8string = NULL;

    GwyUnitPrivate *priv = siunit->priv;
    g_array_set_size(priv->units, 0);
    priv->power10 = 0;

    if (!string || !*string)
        return TRUE;

    /* give up when it looks too wild */
    const gchar *end = strpbrk(string,
                               "\177\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
                               "\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
                               "!#$&(),:;=?@\\[]_`|{}");
    if (end) {
        /* g_warning("Invalid character 0x%02x", *end); */
        return FALSE;
    }

    /* If the string is not UTF-8, assume it's Latin 1.  This is what people usually have in various files. */
    if (!g_utf8_validate(string, -1, NULL)) {
        string = utf8string = gwy_convert_to_utf8(string, -1, "ISO-8859-1");
        if (!string)
            return FALSE;
    }

    /* may start with a multiplier, but it must be a power of 10 */
    while (g_ascii_isspace(string[0]))
        string++;

    if (string[0] == '*')
        string++;
    else if (strncmp(string, "×", sizeof("×")-1) == 0)
        string += sizeof("×")-1;
    else if (strncmp(string, "x10", sizeof("x10")-1) == 0) {
        /* Only recognise x as a multiplier when followed by 10. Otherwise it can be a unit or something. */
        string++;
    }

    GString *buf = g_string_new(NULL);

    /* Check a. u. with a space specially here because we then split on spaces so putting it to odd_names[] does
     * not help. */
    if (gwy_strequal(string, "a. u."))
        string += strlen(string);

    /* the rest are units */
    while (*string) {
        /* here we actually parse the number, also after a division sign */
        if (beginning || dividing) {
            string = parse_numerical_multiplier(siunit, string, dividing ? -1 : 1);
            while (g_ascii_isspace(*string))
                string++;
        }

        /* units are separated with whitespace and maybe a division sign */
        end = string;
        do {
            end = strpbrk(end, " /");
            if (!end || end == string || *end != '/' || *(end-1) != '<')
                break;
            end++;
        } while (TRUE);
        if (!end)
            end = string + strlen(string);

        g_string_set_size(buf, 0);
        g_string_append_len(buf, string, end - string);
        fix_unit_name(buf);

        /* get prefix, but be careful not to split mol to mili-ol */
        gint pfpower = 0;
        gboolean may_split_prefix = buf->len > 1;
        if (may_split_prefix) {
            for (guint i = 0; i < G_N_ELEMENTS(known_units); i++) {
                if (g_str_has_prefix(buf->str, known_units[i]) && !g_ascii_isalpha(buf->str[strlen(known_units[i])])) {
                    may_split_prefix = FALSE;
                    break;
                }
            }
        }
        /* also don't split prefixes of long words, they are unlikely to be symbols. */
        if (may_split_prefix && buf->len > 4) {
            guint i;
            for (i = 0; i < buf->len; i++) {
                if (!g_ascii_isalpha(buf->str[i]))
                    break;
            }
            if (i == buf->len)
                may_split_prefix = FALSE;
        }

        if (may_split_prefix) {
            for (guint i = 0; i < G_N_ELEMENTS(SI_prefixes); i++) {
                const gchar *pfx = SI_prefixes[i].name;

                if (g_str_has_prefix(buf->str, pfx) && g_ascii_isalpha(buf->str[strlen(pfx)])) {
                    pfpower = SI_prefixes[i].value;
                    g_string_erase(buf, 0, strlen(pfx));
                    break;
                }
            }
        }

        /* get unit power */
        GwySimpleUnit unit;
        unit.power = 1;
        if (buf->len && (p = strstr(buf->str + 1, "<sup>"))) {
            unit.power = strtol(p + strlen("<sup>"), &e, 10);
            if (e == p + strlen("<sup>") || !g_str_has_prefix(e, "</sup>")) {
                /* g_warning("Bad power %s", p); */
                unit.power = 1;
            }
            else if (!unit.power || abs(unit.power) > 12) {
                /* g_warning("Bad power %d", unit.power); */
                unit.power = 1;
            }
            g_string_truncate(buf, p - buf->str);
        }
        else if (buf->len && (p = strchr(buf->str + 1, '^'))) {
            unit.power = strtol(p + 1, &e, 10);
            if (e == p + 1 || *e) {
                /* g_warning("Bad power %s", p); */
                unit.power = 1;
            }
            else if (!unit.power || abs(unit.power) > 12) {
                /* g_warning("Bad power %d", unit.power); */
                unit.power = 1;
            }
            g_string_truncate(buf, p - buf->str);
        }
        else if (buf->len) {
            /* Try to find a Unicode exponent first by looking for a non-letter character. */
            gint i = 1, j;
            while (g_ascii_isalpha(buf->str[i]))
                i++;
            if (parse_exponent_unicode(buf->str + i, &j, (const gchar**)&e)) {
                unit.power = j;
                g_string_truncate(buf, i);
            }
            else {
                /* Are we really desperate?  Yes, we are! */
                i = buf->len;
                while (i && (g_ascii_isdigit(buf->str[i-1]) || buf->str[i-1] == '-'))
                    i--;
                if (i != buf->len) {
                    unit.power = strtol(buf->str + i, NULL, 10);
                    if (!unit.power || abs(unit.power) > 12) {
                        /* g_warning("Bad power %d", unit.power); */
                        unit.power = 1;
                    }
                    g_string_truncate(buf, i);
                }
            }
        }

        /* handle some ugly, but quite common units */
        if (gwy_strequal(buf->str, "Å")) {
            pfpower -= 10;
            g_string_assign(buf, "m");
        }
        else if (gwy_strequal(buf->str, "%")) {
            pfpower -= 2;
            g_string_assign(buf, "");
        }
        else if (gwy_strequal(buf->str, "‰")) {
            pfpower -= 3;
            g_string_assign(buf, "");
        }
        else if (gwy_strequal(buf->str, "м")) {
            g_string_assign(buf, "m");
        }

        /* elementary sanity */
        if (!g_utf8_validate(buf->str, -1, (const gchar**)&p)) {
            /* g_warning("Unit string is not valid UTF-8"); */
            g_string_truncate(buf, p - buf->str);
        }
        if (!buf->len) {
            /* maybe it's just percentage.  cross fingers and proceed. */
            if (dividing)
                unit.power = -unit.power;
            priv->power10 += unit.power * pfpower;
        }
        else if (!g_ascii_isalpha(buf->str[0]) && (guchar)buf->str[0] < 128) {
            /* g_warning("Invalid base unit: %s", buf->str); */
        }
        else {
            /* append it */
            unit.unit = g_quark_from_string(buf->str);
            if (dividing)
                unit.power = -unit.power;
            gwy_debug("<%s:%u> %d\n", buf->str, unit.unit, unit.power);
            priv->power10 += unit.power * pfpower;
            g_array_append_val(priv->units, unit);
        }

        /* TODO: scan known obscure units */
        unit.traits = 0;

        /* get to the next token, looking for division */
        while (g_ascii_isspace(*end))
            end++;
        if (*end == '/') {
            if (dividing) {
                /* g_warning("Cannot group multiple divisions"); */
            }
            dividing = TRUE;
            end++;
            while (g_ascii_isspace(*end))
                end++;
        }
        string = end;
        beginning = FALSE;
    }

    g_string_free(buf, TRUE);
    g_free(utf8string);

    canonicalize_do(siunit);

    return TRUE;
}

/**
 * gwy_unit_multiply:
 * @siunit1: A unit.
 * @siunit2: A unit.
 * @result: (nullable):
 *          A unit to set to product of @siunit1 and @siunit2.  It is safe to pass one of @siunit1, @siunit2. It
 *          can be %NULL too, a new unit is created then and returned.
 *
 * Multiplies two units.
 *
 * Returns: When @result is %NULL, a newly created unit that has to be dereferenced when no longer used later.
 *          Otherwise @result itself is simply returned, its reference count is NOT increased.
 **/
GwyUnit*
gwy_unit_multiply(GwyUnit *siunit1,
                  GwyUnit *siunit2,
                  GwyUnit *result)
{
    return gwy_unit_power_multiply(siunit1, 1, siunit2, 1, result);
}

/**
 * gwy_unit_divide:
 * @siunit1: A unit.
 * @siunit2: A unit.
 * @result: (nullable):
 *          A unit to set to quotient of @siunit1 and @siunit2.  It is safe to pass one of @siunit1, @siunit2. It
 *          can be %NULL too, a new unit is created then and returned.
 *
 * Divides two units.
 *
 * Returns: When @result is %NULL, a newly created unit that has to be dereferenced when no longer used later.
 *          Otherwise @result itself is simply returned, its reference count is NOT increased.
 **/
GwyUnit*
gwy_unit_divide(GwyUnit *siunit1,
                GwyUnit *siunit2,
                GwyUnit *result)
{
    return gwy_unit_power_multiply(siunit1, 1, siunit2, -1, result);
}

/**
 * gwy_unit_power:
 * @siunit: A unit.
 * @power: Power to raise @siunit to.
 * @result: (nullable):
 *          A unit to set to power of @siunit.  It is safe to pass @siunit itself.  It can be %NULL too, a new SI
 *          unit is created then and returned.
 *
 * Computes a power of an unit.
 *
 * Returns: When @result is %NULL, a newly created unit that has to be dereferenced when no longer used later.
 *          Otherwise @result itself is simply returned, its reference count is NOT increased.
 **/
GwyUnit*
gwy_unit_power(GwyUnit *siunit,
               gint power,
               GwyUnit *result)
{
    g_return_val_if_fail(GWY_IS_UNIT(siunit), NULL);
    g_return_val_if_fail(!result || GWY_IS_UNIT(result), NULL);

    if (!result)
        result = gwy_unit_new(NULL);

    power_do(siunit, power, result);
    g_signal_emit(result, signals[SGNL_VALUE_CHANGED], 0);

    return result;
}

static GwyUnit*
power_do(GwyUnit *siunit,
         gint power,
         GwyUnit *result)
{
    GArray *units = g_array_new(FALSE, FALSE, sizeof(GwySimpleUnit));
    GwyUnitPrivate *priv = siunit->priv, *rpriv = result->priv;
    rpriv->power10 = power*priv->power10;

    if (power) {
        g_array_append_vals(units, priv->units->data, priv->units->len);
        for (guint j = 0; j < units->len; j++) {
            GwySimpleUnit *unit = &g_array_index(units, GwySimpleUnit, j);
            unit->power *= power;
        }
    }

    g_array_set_size(rpriv->units, 0);
    g_array_append_vals(rpriv->units, units->data, units->len);
    g_array_free(units, TRUE);

    return result;
}

/**
 * gwy_unit_nth_root:
 * @siunit: A unit.
 * @ipower: The root to take: 2 means a quadratic root, 3 means cubic root, etc.
 * @result: (nullable):
 *          A unit to set to power of @siunit.  It is safe to pass @siunit itself.  It can be %NULL too, a new SI
 *          unit is created then and returned.
 *
 * Calulates n-th root of an unit.
 *
 * This operation fails if the result would have fractional powers that are not representable by #GwyUnit.
 *
 * Returns: On success: When @result is %NULL, a newly created unit that has to be dereferenced when no longer used
 *          later, otherwise @result itself is simply returned, its reference count is NOT increased. On failure %NULL
 *          is always returned.
 **/
GwyUnit*
gwy_unit_nth_root(GwyUnit *siunit,
                  gint ipower,
                  GwyUnit *result)
{
    g_return_val_if_fail(GWY_IS_UNIT(siunit), NULL);
    g_return_val_if_fail(!result || GWY_IS_UNIT(result), NULL);
    g_return_val_if_fail(ipower != 0, NULL);

    /* Check applicability */
    GwyUnitPrivate *priv = siunit->priv;
    for (guint j = 0; j < priv->units->len; j++) {
        GwySimpleUnit *unit = &unit_index(priv, j);
        if (unit->power % ipower != 0)
            return NULL;
    }

    if (!result)
        result = gwy_unit_new(NULL);

    /* XXX: Applicability not required */
    GwyUnitPrivate *rpriv = result->priv;
    rpriv->power10 = priv->power10/ipower;

    GArray *units = g_array_new(FALSE, FALSE, sizeof(GwySimpleUnit));
    g_array_append_vals(units, priv->units->data, priv->units->len);
    for (guint j = 0; j < units->len; j++) {
        GwySimpleUnit *unit = &g_array_index(units, GwySimpleUnit, j);
        unit->power /= ipower;
    }

    g_array_set_size(rpriv->units, 0);
    g_array_append_vals(rpriv->units, units->data, units->len);
    g_array_free(units, TRUE);

    g_signal_emit(result, signals[SGNL_VALUE_CHANGED], 0);

    return result;
}

/**
 * gwy_unit_power_multiply:
 * @siunit1: A unit.
 * @power1: Power to raise @siunit1 to.
 * @siunit2: A unit.
 * @power2: Power to raise @siunit2 to.
 * @result: (nullable):
 *          A unit to set to @siunit1^@power1*@siunit2^@power2. It is safe to pass @siunit1 or @siunit2.  It can
 *          be %NULL too, a new unit is created then and returned.
 *
 * Computes the product of two units raised to arbitrary powers.
 *
 * This is the most complex unit arithmetic function.  It can be easily chained when more than two units are to be
 * multiplied.
 *
 * Returns: When @result is %NULL, a newly created unit that has to be dereferenced when no longer used later.
 *          Otherwise @result itself is simply returned, its reference count is NOT increased.
 **/
GwyUnit*
gwy_unit_power_multiply(GwyUnit *siunit1,
                        gint power1,
                        GwyUnit *siunit2,
                        gint power2,
                        GwyUnit *result)
{
    GwyUnit *op2 = NULL;

    g_return_val_if_fail(GWY_IS_UNIT(siunit1), NULL);
    g_return_val_if_fail(GWY_IS_UNIT(siunit2), NULL);
    g_return_val_if_fail(!result || GWY_IS_UNIT(result), NULL);

    if (!result)
        result = gwy_unit_new(NULL);

    /* Try to avoid hard work by making siunit2 the simplier one */
    GwyUnitPrivate *priv1 = siunit1->priv, *priv2 = siunit2->priv;
    if ((!priv1->units->len && priv2->units->len)
        || (!power1 && power2)
        || (siunit2 == result && siunit1 != result)) {
        GWY_SWAP(GwyUnit*, siunit1, siunit2);
        GWY_SWAP(gint, power1, power2);
        GWY_SWAP(GwyUnitPrivate*, priv1, priv2);
    }
    if (!power2 || !priv2->units->len) {
        /* We can do this only if we won't use siunit2 for anything as it can be the same object as result. */
        power_do(siunit1, power1, result);
        canonicalize_do(result);
        return result;
    }

    /* Operate on a temporary copy in the general case to ensure siunit2 and result are different objects.*/
    if (siunit2 == result) {
        op2 = gwy_unit_copy(siunit2);
        siunit2 = op2;
        priv2 = siunit2->priv;
    }
    power_do(siunit1, power1, result);

    GwyUnitPrivate *rpriv = result->priv;
    rpriv->power10 += power2*priv2->power10;
    for (guint i = 0; i < priv2->units->len; i++) {
        GwySimpleUnit *unit2 = &unit_index(priv2, i);
        guint j;

        for (j = 0; j < rpriv->units->len; j++) {
            GwySimpleUnit *unit = &unit_index(rpriv, j);
            gwy_debug("[%d] %u == [%d] %u", i, unit2->unit, j, unit->unit);
            if (unit2->unit == unit->unit) {
                unit->power += power2*unit2->power;
                break;
            }
        }
        if (j == rpriv->units->len) {
            g_array_append_val(rpriv->units, *unit2);
            GwySimpleUnit *unit = &g_array_index(rpriv->units, GwySimpleUnit, rpriv->units->len - 1);
            unit->power *= power2;
        }
    }
    canonicalize_do(result);
    g_clear_object(&op2);
    g_signal_emit(result, signals[SGNL_VALUE_CHANGED], 0);

    return result;
}

/**
 * gwy_unit_factor_to_base:
 * @siunit: A unit.
 * @result: (nullable):
 *          A unit to set to decomposed @siunit.  It is safe to pass @siunit itself.  It can be %NULL too, a new
 *          unit is created then and returned.
 * @mfactor: (out) (optional): Location to store multiplicative factor between @siunit and @result.  For instance, for electronvolt the
 *           value 1.60217653e-16 would be stored (the factor 1000 comes from kilogram).  Pass %NULL if you are only
 *           interested dimension equality.
 *
 * Factors a possibly derived unit to base units.
 *
 * For instance, if @siunit was set to "N/m" the result will be "kg/s^2".
 *
 * Normally the result will consist only of the base seven units.  However, recognised non-units (and
 * pseudounits) in @siunit, such as "px" are left intact in the decomposition.
 *
 * Also note that the decomposition is done to prefixable units. Kilogram is not prefixable (gram is) and there is no
 * general way to keep the kilo- on the kilograms when deriving units for different powers of 10.  Therefore, the
 * calculated factor corresponds to decomposition to grams.
 *
 * You must multiply the corresponding data with @factor if you intend to use the @result for them instead of @unit!
 *
 * Returns: When @result is %NULL, a newly created unit that has to be dereferenced when no longer used later.
 *          Otherwise @result itself is simply returned, its reference count is NOT increased.
 **/
GwyUnit*
gwy_unit_factor_to_base(GwyUnit *siunit,
                        GwyUnit *result,
                        gdouble *mfactor)
{
    GwyUnit *factored = NULL;
    GwySimpleUnit *unit;
    GwySimpleUnit bunit;
    const GwyUnitDecomposition *decomp;
    const gchar *unitstr;
    gdouble mf = 1.0;
    guint i, j, k;

    g_return_val_if_fail(GWY_IS_UNIT(siunit), NULL);
    g_return_val_if_fail(!result || GWY_IS_UNIT(result), NULL);

    if (!result)
        result = gwy_unit_new(NULL);
    else if (result == siunit)
        result = factored = gwy_unit_new(NULL);
    else
        g_array_set_size(result->priv->units, 0);

    GwyUnitPrivate *priv = siunit->priv, *rpriv = result->priv;
    for (i = 0; i < priv->units->len; i++) {
        unit = &unit_index(priv, i);
        unitstr = g_quark_to_string(unit->unit);
        /* Try to find the simple unit in known derived_units[] */
        for (j = 0; j < G_N_ELEMENTS(derived_units); j++) {
            decomp = derived_units + j;
            if (gwy_strequal(unitstr, decomp->unit)) {
                for (k = 0; k < GWY_UNIT_N_BASE; k++) {
                    if (!decomp->powers[k])
                        continue;

                    bunit.unit = g_quark_from_static_string(base_si_units[k]);
                    bunit.power = decomp->powers[k] * unit->power;
                    bunit.traits = 0;
                    g_array_append_val(rpriv->units, bunit);
                    mf *= decomp->factor * unit->power;
                }
                break;
            }
        }
        /* If we cannot find it, just copy it over. */
        if (j == G_N_ELEMENTS(derived_units)) {
            bunit = *unit;
            g_array_append_val(rpriv->units, bunit);
        }
    }

    canonicalize_do(result);
    /* If caller passes @result == @siunit, we have to modify @siunit and throw away the allocated unit.  Otherwise
     * @result is either some other unit or newly allocated and we just return it.  */
    if (result == factored) {
        g_array_set_size(priv->units, 0);
        g_array_append_vals(priv->units, rpriv->units->data, rpriv->units->len);
        result = siunit;
        g_object_unref(factored);
    }

    if (mfactor)
        *mfactor = mf;

    return result;
}

/* FIXME: Consider sorting SI units g, m, s, A, cd, mol. */
static GwyUnit*
canonicalize_do(GwyUnit *siunit)
{
    GwyUnitPrivate *priv = siunit->priv;
    GwySimpleUnit *dst, *src;
    gint i, j;

    /* consolidate multiple occurences of the same unit */
    i = 0;
    while (i < priv->units->len) {
        src = &unit_index(priv, i);

        for (j = 0; j < i; j++) {
            dst = &unit_index(priv, j);
            if (src->unit == dst->unit) {
                dst->power += src->power;
                g_array_remove_index(priv->units, i);
                break;
            }
        }

        if (j == i)
            i++;
    }

    /* remove units with zero power */
    i = 0;
    while (i < priv->units->len) {
        if (unit_index(priv, i).power)
            i++;
        else {
            g_array_remove_index(priv->units, i);
        }
    }

    return siunit;
}

static gboolean
equal_do(GwyUnit *siunit1, GwyUnit *siunit2, gboolean strict)
{
    if (siunit2 == siunit1)
        return TRUE;

    g_return_val_if_fail(GWY_IS_UNIT(siunit1), FALSE);
    g_return_val_if_fail(GWY_IS_UNIT(siunit2), FALSE);
    if (equal_direct(siunit1, siunit2))
        return TRUE;

    /* For strict comparison only accept as equal units that are identical, including any factors.  Otherwise just
     * check dimensional equality. */
    gdouble mf1 = 0.0, mf2 = 0.0;
    GwyUnit *factored1 = gwy_unit_factor_to_base(siunit1, NULL, &mf1);
    GwyUnit *factored2 = gwy_unit_factor_to_base(siunit2, NULL, &mf2);
    gboolean result = FALSE;
    if ((!strict || fabs(log(mf1/mf2)) < 1e-12) && equal_direct(factored1, factored2))
        result = TRUE;

    g_object_unref(factored1);
    g_object_unref(factored2);

    return result;
}

static gboolean
equal_direct(GwyUnit *siunit1, GwyUnit *siunit2)
{
    guint i, j;

    if (siunit2 == siunit1)
        return TRUE;

    GwyUnitPrivate *priv1 = siunit1->priv, *priv2 = siunit2->priv;
    if (priv2->units->len != priv1->units->len)
        return FALSE;

    for (i = 0; i < priv1->units->len; i++) {
        GwySimpleUnit *unit = &unit_index(priv1, i);

        for (j = 0; j < priv2->units->len; j++) {
            if (unit_index(priv2, j).unit == unit->unit) {
                if (unit_index(priv2, j).power != unit->power)
                    return FALSE;
                break;
            }
        }
        if (j == priv2->units->len)
            return FALSE;
    }

    return TRUE;
}

static void
format_do(GwyUnit *siunit,
          const GwySIStyleSpec *fs,
          GwyValueFormat *format,
          gboolean magnitude_fixed)
{
    GString *string = format->units_gstring;
    const gchar *prefix = "No GCC, this can't be used uninitialized";
    GwyUnitPrivate *priv = siunit->priv;
    GwySimpleUnit *unit;
    gint i, prefix_bearer, move_me_to_end, moveby;

    g_assert(string);
    g_string_truncate(string, 0);

    /* If there is a single unit with negative exponent, move it to the end
     * TODO: we may want more sophistication here */
    move_me_to_end = -1;
    if (priv->units->len > 1) {
        for (i = 0; i < priv->units->len; i++) {
            unit = &unit_index(priv, i);
            if (unit->power < 0) {
                if (move_me_to_end >= 0) {
                    move_me_to_end = -1;
                    break;
                }
                move_me_to_end = i;
            }
        }
    }

    /* Find a victim to prepend a prefix to.  Mwhahaha. */
    prefix_bearer = -1;
    if (priv->power10) {
        for (i = 0; i < priv->units->len; i++) {
            if (i == move_me_to_end)
                continue;
            unit = &unit_index(priv, i);
            if (priv->power10 % (3*abs(unit->power)) == 0) {
                prefix_bearer = i;
                break;
            }
        }
    }
    if (priv->power10 && prefix_bearer < 0 && move_me_to_end >= 0) {
        unit = &unit_index(priv, move_me_to_end);
        if (priv->power10 % (3*abs(unit->power)) == 0)
            prefix_bearer = move_me_to_end;
    }
    /* When we did not find any suitable prefix bearer, try moving the magnitude while keeping significant digits. */
    if (!magnitude_fixed && priv->power10 && prefix_bearer < 0) {
        /* First try moving decimal dot right. */
        for (moveby = 3; prefix_bearer < 0 && moveby <= format->precision + 1; moveby += 3) {
            for (i = 0; i < priv->units->len; i++) {
                if (i == move_me_to_end)
                    continue;
                unit = &unit_index(priv, i);
                if ((priv->power10 - moveby) % (3*abs(unit->power)) == 0) {
                    priv->power10 -= moveby;
                    format->magnitude /= gwy_exp10(moveby);
                    format->precision -= moveby;
                    /* We allow moving one digit to far there, but we must avoid making the precsion negative. */
                    format->precision = MAX(format->precision, 0);
                    prefix_bearer = i;
                    break;
                }
            }
        }
        /* Then left, just once because we do not have any sanity check here.
         * XXX: is this part a good idea? */
        if (prefix_bearer < 0 && format->precision < 3) {
            moveby = 3;
            for (i = 0; i < priv->units->len; i++) {
                if (i == move_me_to_end)
                    continue;
                unit = &unit_index(priv, i);
                if ((priv->power10 + moveby) % (3*abs(unit->power)) == 0) {
                    priv->power10 += moveby;
                    format->magnitude *= gwy_exp10(moveby);
                    format->precision += moveby;
                    prefix_bearer = i;
                    break;
                }
            }
        }
    }
    /* Check whether we are not out of prefix range. */
    if (prefix_bearer >= 0) {
        unit = &unit_index(priv, prefix_bearer);
        prefix = format_prefix(priv->power10/unit->power);
        if (!prefix)
            prefix_bearer = -1;
    }

    /* If we were unable to place the prefix, we must add a power of 10. */
    if (priv->power10 && prefix_bearer < 0) {
        if (fs->multiplier)
            g_string_append(string, fs->multiplier);
        switch (priv->power10) {
            case -1:
            g_string_append(string, "0.1");
            break;

            case 1:
            g_string_append(string, "10");
            break;

            case 2:
            g_string_append(string, "100");
            break;

            default:
            g_string_append(string, "10");
            fs->format_power(string, priv->power10);
            break;
        }
        if (fs->power_unit_separator && priv->units->len)
            g_string_append(string, fs->power_unit_separator);
    }

    /* Append units. */
    for (i = 0; i < priv->units->len; i++) {
        if (i == move_me_to_end)
            continue;
        if (i > 1 || (i && move_me_to_end)) {
            g_string_append(string, fs->unit_times);
        }
        unit = &unit_index(priv, i);
        if (i == prefix_bearer)
            g_string_append(string, prefix);
        g_string_append(string, g_quark_to_string(unit->unit));
        if (unit->power != 1)
            fs->format_power(string, unit->power);
    }
    if (move_me_to_end >= 0) {
        g_string_append(string, fs->unit_division);
        unit = &unit_index(priv, move_me_to_end);
        if (move_me_to_end == prefix_bearer)
            g_string_append(string, prefix);
        g_string_append(string, g_quark_to_string(unit->unit));
        if (unit->power != -1)
            fs->format_power(string, -unit->power);
    }

    format->units = format->units_gstring->str;
}

static const gchar*
format_prefix(gint power)
{
    for (guint i = 0; i < G_N_ELEMENTS(SI_prefixes); i++) {
        if (SI_prefixes[i].value == power)
            return SI_prefixes[i].name;
    }
    return NULL;
}

static void
format_power_plain(GString *string, gint power)
{
    g_string_append_printf(string, "^%d", power);
}

static void
format_power_pango(GString *string, gint power)
{
    g_string_append_printf(string, "<sup>%d</sup>", power);
}

static void
format_power_TeX(GString *string, gint power)
{
    if (power >= 0 && power <= 9)
        g_string_append_printf(string, "^%d", power);
    else
        g_string_append_printf(string, "^{%d}", power);
}

static void
format_power_unicode(GString *string, gint power)
{
    gchar buf[16];

    g_snprintf(buf, sizeof(buf), "%d", power);
    for (guint i = 0; buf[i]; i++) {
        if (buf[i] == '0' || (buf[i] >= '4' && buf[i] <= '9'))
            g_string_append_unichar(string, 0x2070 + buf[i] - '0');
        else if (buf[i] == '1')
            g_string_append_len(string, "¹", sizeof("¹")-1);
        else if (buf[i] == '2')
            g_string_append_len(string, "²", sizeof("²")-1);
        else if (buf[i] == '3')
            g_string_append_len(string, "³", sizeof("³")-1);
        else if (buf[i] == '-')
            g_string_append_len(string, "⁻", sizeof("⁻")-1);
        else {
            g_warning("Weird digits in exponent %s\n", buf);
            g_string_append_c(string, buf[i]);
        }
    }
}

static void
serializable_itemize(GwySerializable *serializable, GwySerializableGroup *group)
{
    GwyUnit *unit = GWY_UNIT(serializable);
    GwyUnitPrivate *priv = unit->priv;

    if (!priv->units->len)
        return;

    g_assert(!priv->ser_unitstr);
    priv->ser_unitstr = gwy_unit_get_string(unit, GWY_UNIT_FORMAT_PLAIN);
    gwy_serializable_group_append_string(group, serializable_items + ITEM_UNITSTR, priv->ser_unitstr);
}

static void
serializable_done(GwySerializable *serializable)
{
    GwyUnit *unit = GWY_UNIT(serializable);
    GWY_FREE(unit->priv->ser_unitstr);
}

static gboolean
serializable_construct(GwySerializable *serializable, GwySerializableGroup *group, GwyErrorList **error_list)
{
    GwySerializableItem its[NUM_ITEMS], *it;
    gwy_assign(its, serializable_items, NUM_ITEMS);
    gwy_deserialize_filter_items(its, NUM_ITEMS, group, TYPE_NAME, error_list);

    GwyUnit *unit = GWY_UNIT(serializable);

    it = its + ITEM_UNITSTR;
    gwy_unit_set_from_string(unit, it->value.v_string);
    g_free(it->value.v_string);

    return TRUE;
}

static gboolean
copy_units(GwyUnit *source, GwyUnit *destination)
{
    GwyUnitPrivate *spriv = source->priv, *dpriv = destination->priv;
    GArray *unitsrc = spriv->units, *unitdest = dpriv->units;
    if (dpriv->power10 == spriv->power10
        && unitdest->len == unitsrc->len
        && !memcpy(unitdest->data, unitsrc->data, unitsrc->len*sizeof(GwySimpleUnit)))
        return FALSE;

    g_array_set_size(unitdest, 0);
    g_array_append_vals(unitdest, &g_array_index(unitsrc, GwySimpleUnit, 0), unitsrc->len);
    dpriv->power10 = spriv->power10;
    return TRUE;
}

static GwySerializable*
serializable_copy(GwySerializable *serializable)
{
    GwyUnit *copy = gwy_unit_new(NULL);
    copy_units(GWY_UNIT(serializable), GWY_UNIT(copy));
    return GWY_SERIALIZABLE(copy);
}

static void
serializable_assign(GwySerializable *destination, GwySerializable *source)
{
    if (copy_units(GWY_UNIT(source), GWY_UNIT(destination)))
        g_signal_emit(destination, signals[SGNL_VALUE_CHANGED], 0);
}

/**
 * gwy_unit_copy:
 * @unit: A unit to duplicate.
 *
 * Create a new unit as a copy of an existing one.
 *
 * This function is a convenience gwy_serializable_copy() wrapper.
 *
 * Returns: (transfer full):
 *          A copy of the unit.
 **/
GwyUnit*
gwy_unit_copy(GwyUnit *unit)
{
    /* Try to return a valid object even on utter failure. Returning NULL probably would crash something soon. */
    if (!GWY_IS_UNIT(unit)) {
        g_assert(GWY_IS_UNIT(unit));
        return g_object_new(GWY_TYPE_UNIT, NULL);
    }
    return GWY_UNIT(gwy_serializable_copy(GWY_SERIALIZABLE(unit)));
}

/**
 * gwy_unit_assign:
 * @destination: Target unit.
 * @source: Source unit.
 *
 * Makes one unit equal to another.
 *
 * This function is a convenience gwy_serializable_assign() wrapper.
 **/
void
gwy_unit_assign(GwyUnit *destination, GwyUnit *source)
{
    g_return_if_fail(GWY_IS_UNIT(destination));
    g_return_if_fail(GWY_IS_UNIT(source));
    if (destination != source)
        gwy_serializable_assign(GWY_SERIALIZABLE(destination), GWY_SERIALIZABLE(source));
}

/**
 * SECTION: unit
 * @title: GwyUnit
 * @short_description: unit representation, physical quantitiy formatting
 *
 * #GwyUnit object represents a physical unit, it can be created from a unit string with gwy_unit_new().
 *
 * GwyUnit is also responsible for prefixes selection and generally formatting of physical quantities (see also
 * gwymath for pure number formatting functions).  There are several functions computing value format (as
 * a #GwyValueFormat structure) with given resolution -- gwy_unit_get_format_with_resolution(), or number of
 * significant digits -- gwy_unit_get_format_with_digits().
 **/

/**
 * GwyUnitFormatStyle:
 * @GWY_UNIT_FORMAT_NONE: No units.  This value is unused by #GwyUnit itself and must not be requested as
 *                           a format style.
 * @GWY_UNIT_FORMAT_PLAIN: Plain style, as one would use on a text terminal.
 * @GWY_UNIT_FORMAT_MARKUP: Pango markup, for units usable standalone.
 * @GWY_UNIT_FORMAT_VFMARKUP: Pango markup, for units directly after value.
 * @GWY_UNIT_FORMAT_TEX: Representation that can be typeset by TeX, for units usable standalone.
 * @GWY_UNIT_FORMAT_VFTEX: Representation that can be typeset by TeX, for units directly after value.
 * @GWY_UNIT_FORMAT_UNICODE: Representation in which exponents are rendered as Unicode characters, for units usable
 *                              standalone.
 * @GWY_UNIT_FORMAT_VFUNICODE: Representation in which exponents are rendered as Unicode characters, for units
 *                                directly after value.
 *
 * Physical quantity formatting style.
 *
 * The VF-variants differ from tne non-VF ones by a multiplication sign at the start of units (where appropriate).
 **/

/**
 * GwyValueFormat:
 * @magnitude: Number to divide a quantity by (a power of 1000).
 * @precision: Number of decimal places to format a quantity to.
 * @units: Units to put after quantity divided by @magnitude.  This is actually an alias to @units_gstring->str.
 * @units_gstring: #GString used to represent @units internally.
 *
 * A physical quantity formatting information.
 *
 * The @magnitude and @precision fields can be directly modified if necessary. Units must be always set with
 * gwy_value_format_set_units() to update the internal representation properly.
 */

/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
