/*
 *  $Id: tip.c 29540 2026-02-24 14:00:46Z yeti-dn $
 *  Copyright (C) 2003-2026 David Necas (Yeti), Petr Klapetek.
 *  E-mail: yeti@gwyddion.net, klapetek@gwyddion.net.
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include "config.h"
#include <string.h>
#include <glib/gi18n-lib.h>

#include "libgwyddion/macros.h"
#include "libgwyddion/arithmetic.h"
#include "libgwyddion/extend.h"
#include "libgwyddion/stats.h"
#include "libgwyddion/tip.h"

#include "libgwyddion/omp.h"
#include "libgwyddion/internal.h"

// NB: The dilation and erosion functions are exacly the same, just taking maximum or minimum. You are supposed to
// mirror the tip (structuring element) beforehand appropriately! This way we scan the tip image memory linearly
// forward in both cases.
static inline gdouble
dilate_interior(const gdouble *src, gint xres,
                const gdouble *tip, gint txres, gint tyres)
{
    gdouble hmax = -G_MAXDOUBLE;

    for (gint i = 0; i < tyres; i++) {
        const gdouble *srcrow = src + i*xres;
        for (gint j = txres; j; j--) {
            gdouble h = *(srcrow++) + *(tip++);
            if (h > hmax)
                hmax = h;
        }
    }
    return hmax;
}

static inline gdouble
dilate_border(const gdouble *src, gint xres, gint yres,
              const gdouble *tip, gint txres, gint tyres,
              gint j, gint i)
{
    gint ioff = tyres/2, joff = txres/2;
    gdouble hmax = -G_MAXDOUBLE;

    for (gint ii = 0; ii < tyres; ii++) {
        gint isrc = CLAMP(i + ii - ioff, 0, yres-1);
        for (gint jj = 0; jj < txres; jj++) {
            gint jsrc = CLAMP(j + jj - joff, 0, xres-1);
            gdouble h = src[isrc*xres + jsrc] + *(tip++);
            if (h > hmax)
                hmax = h;
        }
    }
    return hmax;
}

static inline gdouble
erode_interior(const gdouble *src, gint xres,
               const gdouble *tip, gint txres, gint tyres)
{
    gdouble hmin = G_MAXDOUBLE;

    for (gint i = 0; i < tyres; i++) {
        const gdouble *srcrow = src + i*xres;
        for (gint j = txres; j; j--) {
            gdouble h = *(srcrow++) - *(tip++);
            if (h < hmin)
                hmin = h;
        }
    }
    return hmin;
}

static inline gdouble
erode_border(const gdouble *src, gint xres, gint yres,
             const gdouble *tip, gint txres, gint tyres,
             gint j, gint i)
{
    gint ioff = tyres-1 - tyres/2, joff = txres-1 - txres/2;
    gdouble hmin = G_MAXDOUBLE;

    for (gint ii = 0; ii < tyres; ii++) {
        gint isrc = CLAMP(i + ii - ioff, 0, yres-1);
        for (gint jj = 0; jj < txres; jj++) {
            gint jsrc = CLAMP(j + jj - joff, 0, xres-1);
            gdouble h = src[isrc*xres + jsrc] - *(tip++);
            if (h < hmin)
                hmin = h;
        }
    }
    return hmin;
}

static gboolean
dilate(const gdouble *s, gdouble *d, gint xres, gint yres,
       const gdouble *t, gint txres, gint tyres, gint joff, gint ioff,
       GwySetFractionFunc set_fraction)
{
    gboolean cancelled = FALSE, *pcancelled = &cancelled;

#ifdef _OPENMP
#pragma omp parallel if (gwy_threads_are_enabled()) default(none) \
            shared(s,d,t,xres,yres,txres,tyres,ioff,joff,set_fraction,pcancelled)
#endif
    {
        gint ifrom = gwy_omp_chunk_start(yres), ito = gwy_omp_chunk_end(yres);

        for (gint i = ifrom; i < ito; i++) {
            gboolean row_inside = (i >= ioff && i + tyres-ioff <= yres);

            for (gint j = 0; j < xres; j++) {
                gboolean col_inside = (j >= joff && j + txres-joff <= xres);
                gint k = i*xres + j;

                if (row_inside && col_inside) {
                    const gdouble *src = s + (i - ioff)*xres + (j - joff);
                    d[k] = dilate_interior(src, xres, t, txres, tyres);
                }
                else
                    d[k] = dilate_border(s, xres, yres, t, txres, tyres, j, i);
            }

            if (gwy_omp_set_fraction_check_cancel(set_fraction, i, ifrom, ito, pcancelled))
                break;
        }
    }

    return !cancelled;
}

// NB: This is just dilation with minimum instead of maximum. You are supposed to mirror the tip beforehand!
static gboolean
erode(const gdouble *s, gdouble *d, gint xres, gint yres,
      const gdouble *t, gint txres, gint tyres, gint joff, gint ioff,
      GwySetFractionFunc set_fraction)
{
    gboolean cancelled = FALSE, *pcancelled = &cancelled;

#ifdef _OPENMP
#pragma omp parallel if (gwy_threads_are_enabled()) default(none) \
            shared(s,d,t,xres,yres,txres,tyres,ioff,joff,set_fraction,pcancelled)
#endif
    {
        gint ifrom = gwy_omp_chunk_start(yres), ito = gwy_omp_chunk_end(yres);

        for (gint i = ifrom; i < ito; i++) {
            gboolean row_inside = (i >= ioff && i + tyres-ioff <= yres);

            for (gint j = 0; j < xres; j++) {
                gboolean col_inside = (j >= joff && j + txres-joff <= xres);
                gint k = i*xres + j;

                if (row_inside && col_inside) {
                    const gdouble *src = s + (i - ioff)*xres + (j - joff);
                    d[k] = erode_interior(src, xres, t, txres, tyres);
                }
                else
                    d[k] = erode_border(s, xres, yres, t, txres, tyres, j, i);
            }

            if (gwy_omp_set_fraction_check_cancel(set_fraction, i, ifrom, ito, pcancelled))
                break;
        }
    }

    return !cancelled;
}

/**
 * gwy_field_tip_dilation:
 * @field: A data field with surface topography.
 * @tip: Tip data field. Its pixel size must match @field pixel size.
 * @result: Data field where to store dilated surface to.
 * @max_is_zero: %TRUE to behave as if @tip maximum was zero, %FALSE to use @tip as given.
 * @set_fraction: (scope call) (nullable): Function that sets fraction to output (or %NULL).
 * @set_message: (scope call) (nullable): Function that sets message to output (or %NULL).
 *
 * Performs the tip convolution (dilation) with a data field.
 *
 * The function is equivalent to morphological dilation. Passing @max_is_zero as %TRUE ensures the absolute surface
 * height does not shift, which is usually convenient for SPM tip operations. If you use the function to carry out
 * general morphological dilation, you may want to pass %FALSE.
 *
 * If the operation is cancelled the size and contents of @result field are undefined. Cancellation can only occur if
 * non-%NULL @set_fraction or @set_message is passed.
 *
 * Returns: %TRUE if the operation finished, %FALSE if it was cancelled.
 **/
gboolean
gwy_field_tip_dilation(GwyField *field,
                       GwyField *tip,
                       GwyField *result,
                       gboolean max_is_zero,
                       GwySetFractionFunc set_fraction,
                       GwySetMessageFunc set_message)
{
    g_return_val_if_fail(GWY_IS_FIELD(field), FALSE);
    g_return_val_if_fail(GWY_IS_FIELD(tip), FALSE);
    g_return_val_if_fail(GWY_IS_FIELD(result), FALSE);

    if ((set_message && !set_message(_("Dilation...")))
        || (set_fraction && !set_fraction(0.0)))
        return FALSE;

    gint xres = field->xres, yres = field->yres;
    gwy_field_resize(result, xres, yres);
    gwy_field_invalidate(result);

    /* Preserve the field height as the original implementation does. */
    GwyField *mytip = gwy_field_copy(tip);
    gwy_field_flip(mytip, TRUE, TRUE);
    if (max_is_zero)
        gwy_field_add(mytip, -gwy_field_max(mytip));

    gint txres = tip->xres, tyres = tip->yres;
    gboolean ok = dilate(field->priv->data, result->priv->data, xres, yres,
                         mytip->priv->data, txres, tyres, txres/2, tyres/2,
                         set_fraction);

    g_object_unref(mytip);
    return ok;
}

/**
 * gwy_field_tip_erosion:
 * @field: A data field with surface topography.
 * @tip: Tip data field. Its pixel size must match @field pixel size.
 * @result: Data field where to store eroded surface to.
 * @set_fraction: (scope call) (nullable): Function that sets fraction to output (or %NULL).
 * @set_message: (scope call) (nullable): Function that sets message to output (or %NULL).
 *
 * Performs the surface reconstruction (erosion) with a data field.
 *
 * The function is equivalent to the morphological erosion operation. Passing @max_is_zero as %TRUE ensures the
 * absolute surface height does not shift, which is usually convenient for SPM tip operations. If you use the function
 * to carry out general morphological erosion, you may want to pass %FALSE.
 *
 * If the operation is cancelled the size and contents of @result field are undefined. Cancellation can only occur if
 * non-%NULL @set_fraction or @set_message is passed.
 *
 * Returns: %TRUE if the operation finished, %FALSE if it was cancelled.
 **/
gboolean
gwy_field_tip_erosion(GwyField *field,
                      GwyField *tip,
                      GwyField *result,
                      gboolean max_is_zero,
                      GwySetFractionFunc set_fraction,
                      GwySetMessageFunc set_message)
{
    g_return_val_if_fail(GWY_IS_FIELD(field), FALSE);
    g_return_val_if_fail(GWY_IS_FIELD(tip), FALSE);
    g_return_val_if_fail(GWY_IS_FIELD(result), FALSE);

    if ((set_message && !set_message(_("Erosion...")))
        || (set_fraction && !set_fraction(0.0)))
        return FALSE;

    gint xres = field->xres, yres = field->yres;
    gwy_field_resize(result, xres, yres);

    /* Preserve the field height as original implementation does. */
    GwyField *mytip = gwy_field_copy(tip);
    if (max_is_zero)
        gwy_field_add(mytip, -gwy_field_max(mytip));

    gint txres = tip->xres, tyres = tip->yres;
    // The dilation and erosion must use the opposite parities to properly cancel and not move the image.
    gboolean ok = erode(field->priv->data, result->priv->data, xres, yres,
                        mytip->priv->data, txres, tyres, txres-1 - txres/2, tyres-1 - tyres/2,
                        set_fraction);

    g_object_unref(mytip);

    return ok;
}

static gboolean
certainty_map(GwyField *field, GwyField *eroded, GwyNield *cmap, GwyField *tip,
              gint xc, gint yc, gdouble tol,
              GwySetFractionFunc set_fraction)
{
    gint xres = field->xres, yres = field->yres;
    gint txres = tip->xres, tyres = tip->yres;
    const gdouble *d = field->priv->data, *e = eroded->priv->data, *t = tip->priv->data;
    gint *c = cmap->priv->data;

    /* Here Villarrubia scans only pixels in the interior in the image. Pixels where the tip could be in contact with
     * something outside the image are conservatively marked as not certain.
     *
     * We counter this by extending the image beforehand, which looks rather silly. The main advantage of doing it
     * this way is probably that the inner loop can always go through the entire tip and does not need to care about
     * coordinates being outside the image. It also keeps this procedure independent of the extension, so we can
     * easily choose how or whether to do that. */
    gboolean cancelled = FALSE, *pcancelled = &cancelled;

#ifdef _OPENMP
#pragma omp parallel if (gwy_threads_are_enabled()) default(none) \
            shared(d,t,e,c,xres,yres,txres,tyres,xc,yc,tol,set_fraction,pcancelled)
#endif
    {
        gint ifrom = gwy_omp_chunk_start(yres+1 - tyres) + yc;
        gint ito = gwy_omp_chunk_end(yres+1 - tyres) + yc;

        for (gint i = ifrom; i < ito; i++) {
            for (gint j = xc; j <= xres + xc - txres; j++) {
                gdouble z = d[i*xres + j];
                gint count = 0, x, y;
                const gdouble *tt = t;
                for (gint ti = 0; ti < txres; ti++) {
                    const gdouble *erow = e + xres*(ti + i - yc) + j - xc;
                    for (gint tj = 0; tj < tyres; tj++) {
                        // Villarrubia works with integers and has an eact equality here. Use a tiny but finite
                        // tolerance since we have floating point numbers.
                        if (fabs(z - *tt - erow[tj]) < tol) {
                            if (++count == 2)
                                break;
                            x = tj + j - xc;
                            y = ti + i - yc;
                        }
                        tt++;
                    }
                    // If we put the same condition directly to the for-cycle, GCC thinks x and y can be used
                    // uninitialised. Bite me.
                    if (count == 2)
                        break;
                }
                /* One contact = good recon. */
                /* This is OK with parallelisation because if we write from multiple threads to the same location, we
                 * write the same value. It is not that nice generally, because we determine the certainty for
                 * (x,y) but would be interested in the certainty for (j,i). In other words, there are probably
                 * undetected certain pixels (as can be seen in almost-but-not-quite flat regions) as the algorithm
                 * does not know how to check the certainty of a specific image pixel.  */
                if (count == 1)
                    c[y*xres + x] = 1;
            }

            if (gwy_omp_set_fraction_check_cancel(set_fraction, i, ifrom, ito, pcancelled))
                break;
        }
    }

    return !cancelled;
}

/**
 * gwy_field_tip_certainty_map:
 * @field: A data field with surface topography.
 * @tip: Tip data field. Its pixel size must match @field pixel size.
 * @result: Number field to fill with the ceratainty map.
 * @trust_border: %TRUE to try to assign certainty also to pixels close to the border, %FALSE to always consider pixels
 *                uncertain if the tip could touch something outside the image.
 * @set_fraction: Function that sets fraction to output (or %NULL).
 * @set_message: Function that sets message to output (of %NULL).
 *
 * Estimates tip certainty map for a data field.
 *
 * The function implements the certainty map algorithm published by Villarrubia. The certainty map is a mask of points
 * where tip did not directly touch the surface.
 *
 * If the operation is cancelled, @result is empty. Cancellation can only occur if non-%NULL @set_fraction or
 * @set_message is passed.
 *
 * Returns: %TRUE if the operation finished, %FALSE if it was cancelled.
 **/
gboolean
gwy_field_tip_certainty_map(GwyField *field,
                            GwyField *tip,
                            GwyNield *result,
                            gboolean trust_border,
                            GwySetFractionFunc set_fraction,
                            GwySetMessageFunc set_message)
{
    g_return_val_if_fail(GWY_IS_FIELD(field), FALSE);
    g_return_val_if_fail(GWY_IS_FIELD(tip), FALSE);
    g_return_val_if_fail(GWY_IS_NIELD(result), FALSE);

    gwy_nield_clear(result);
    if ((set_message && !set_message(_("Erosion...")))
        || (set_fraction && !set_fraction(0.0)))
        return FALSE;

    GwyField *invtip = gwy_field_copy(tip);
    gwy_field_flip(invtip, TRUE, TRUE);

    gint txres = invtip->xres, tyres = invtip->yres, hext = 0, vext = 0;
    GwyField *extended;
    if (trust_border) {
        hext = txres/2 + 1;
        vext = tyres/2 + 1;
        extended = gwy_field_extend(field, hext, hext, vext, vext,
                                    GWY_EXTERIOR_FIXED_VALUE, gwy_field_min(field), FALSE);
    }
    else
        extended = g_object_ref(field);

    GwyField *eroded = gwy_field_new_alike(extended, FALSE);
    gint xres = field->xres, yres = field->yres;
    gboolean ok = FALSE;

    if (!erode(extended->priv->data, eroded->priv->data, xres + 2*hext, yres + 2*vext,
               invtip->priv->data, txres, tyres, txres-1 - txres/2, tyres-1 - tyres/2,
               set_fraction))
        goto finalise;

    GwyNield *extresult = trust_border ? gwy_field_new_nield_alike(eroded) : g_object_ref(result);
    gdouble min, max;
    gwy_field_min_max(tip, &min, &max);
    if ((ok = certainty_map(extended, eroded, extresult, tip, txres/2, tyres/2, 1e-9*(max - min), set_fraction))) {
        if (trust_border)
            gwy_nield_area_copy(extresult, result, hext, vext, xres, yres, 0, 0);
    }
    g_object_unref(extresult);

finalise:
    g_object_unref(invtip);
    g_object_unref(eroded);
    g_object_unref(extended);

    return ok;
}

/**
 * is_good_candidate:
 * @d: Field data.
 * @xres: Number of columns.
 * @yres: Number of rows.
 * @col: Image column.
 * @row: Image row.
 * @delta: Neigbourhood size to search.
 *
 * Deterime whether a surface pixel seems suitable for tip refinement.
 *
 * We simply check if it is a kind-of-sharp local maximum within a square @delta-neighbourhood. Meaning, either the
 * pixel is a sharp local maximum, or there are not too many near neighbours with the same height (which indicates
 * a flat region).
 */
static gboolean
is_good_candidate(const gdouble *d, gint xres, gint yres,
                  gint col, gint row, gint delta)
{
    gint colmin = MAX(col - delta, 0);
    gint colmax = MIN(col + delta, xres-1);
    gint rowmin = MAX(row - delta, 0);
    gint rowmax = MIN(row + delta, yres-1);
    gdouble max = d[row*xres + col];    // Pixel value, which we want to be the maximum in the neighbourhood.
    gint count = 0; // Number of pixels with the same value

    for (gint i = rowmin; i <= rowmax; i++) {
        const gdouble *drow = d + i*xres;
        for (gint j = colmin; j <= colmax; j++) {
            // If the pixel is not the local maximum, abort immediately because we will never select it.
            //
            // For some mysterious reason, Villarrubia seems to always go through the entire neighbourhood to find the
            // true local maximum and checks at the end that this maximum is equal to the original pixel value. Or at
            // least Petr's code did that.
            //
            // Since Villarrubia works with discretised heights, there is some implicit leinence. Our condition is
            // strict. Would it be more useful to include pixels that are almost, but not quite, local maxima?
            //
            // The speed considerations are different. Paralellisation is only used in the full estimation.
            // Preselection also probably speeds up the full estimation than partial. We expect more useless pixels
            // in the full estimation because of more useless locations and a better initial tip estimate. It seems
            // preferable to keep the good candidate list tight.
            if (drow[j] > max)
                return FALSE;
            if (drow[j] == max)
                count++;
        }
    }

    /* Now the pixel is the local maximum in the neighbourhood. Use it unless there are too many points in the
     * neighbourhood with the same property, i.e. the neighbourhood is flat. */
    return count <= (rowmax+1 - rowmin)*(colmax+1 - colmin)/5;
}

/**
 * tip_estimate_partial_init:
 * @field: A data field with surface topography.
 * @tip: Tip data field.
 * @xc: Tip apex column coordinate.
 * @yc: Tip apex row coordinate.
 * @npixels: Location where to store the returned number of candidate pixels.
 *
 * Finds a list of candidate points where to try tip improvement.
 *
 * Returns: A newly allocated array of pixel coordinates with @npixel elements.
 **/
static GridPoint*
tip_estimate_partial_init(GwyField *field, GwyField *tip,
                          gint xc, gint yc,
                          guint *npixels)
{
    const gdouble *d = field->priv->data;
    gint xres = field->xres, yres = field->yres;
    gint txres = tip->xres, tyres = tip->yres;

    /* Defines what is meant by near neighbourhood for purposes of point selection. */
    gint delta = MAX(MAX(txres, tyres)/10, 1);

    /* Create a list of coordinates to use */
    GArray *candidates = g_array_sized_new(FALSE, FALSE, sizeof(GridPoint), (gint)sqrt(xres*yres));
#ifdef _OPENMP
#pragma omp parallel for if (gwy_threads_are_enabled()) default(none) \
            shared(d,txres,tyres,xres,yres,xc,yc,delta,candidates)
#endif
    for (gint i = tyres - 1 - yc; i <= yres - 1 - yc; i++) {
        for (gint j = txres - 1 - xc; j <= xres - 1 - xc; j++) {
            if (is_good_candidate(d, xres, yres, j, i, delta)) {
#ifdef _OPENMP
#pragma omp critical
#endif
                {
                    GridPoint pt = { .i = i, .j = j };
                    g_array_append_val(candidates, pt);
                }
            }
        }
    }

    *npixels = candidates->len;
    return (GridPoint*)g_array_free(candidates, FALSE);
}

/**
 * improve_at_single_location:
 * @d: Surface image data.
 * @xres: Number of columns.
 * @yres: Number of rows.
 * @col: Image column.
 * @row: Image row.
 * @t: Tip data.
 * @txres: Tip number of columns.
 * @tyres: Tip number of rows.
 * @threshold: Threshold for noise supression.
 * @use_edges: Whether use also edges of image.
 * @goodpixels: Caller-allocated workspace for storing which tip pixels look promising. It must have at least
 *              @txres*@tyres elements.
 *
 * Improve tip estimate from a single surface image location.
 *
 * This is a single-location version of itip_estimate_iter(). For what is this useful? The order of evaluation of the
 * points can affect the execution speed. That is because the image at some locations puts great constraints on the
 * tip shape. If the tip shape is refined by considering these points first, time is saved later (this is especially
 * true with our preselection of promising pixels).
 *
 * Since the function improves the tip at a single point, it allows the user to select the order in which image
 * coordinates are considered. As the tip is continuously refined, more image locations (and tip positions) can be
 * discarded quickly later if we start with the locations which promise the greatest improvements.
 *
 * The function only ever monotonically decreases the values in tip. It can be run in parallel on independent tips,
 * which are then combined using min(). The only issue is that we cannot calculate the number refinements
 * corresponding to a serial execution (in general, the split tips receive together more refinements than one shared
 * tip). The solution is to not care about the exact number of refinements made and use a different criterion, such as
 * the magnitude of the largest improvement.
 *
 * Returns: A positive number if the tip was improved.
 **/
static gint
improve_at_single_location(const gdouble *d, gint xres, gint yres, gint col, gint row,
                           gdouble *t, gint txres, gint tyres, gint xc, gint yc,
                           gdouble threshold, gboolean use_edges, GridPoint *goodpixels)
{
    gboolean interior = (col >= txres-1 && col <= xres - txres && row >= tyres-1 && row <= yres - tyres);
    gint improved = 0;
    if (!interior && !use_edges)
        return improved;

    gdouble z = d[row*xres + col];

    // Preselect. On average, this should be worth doing even for the edge regions, so do it always.
    gint ngood = 0;
    for (gint id = 0; id < tyres; id++) {
        const gdouble *drow = d + (row + yc - id)*xres;
        gdouble *trow = t + id*txres;
        for (gint jd = 0; jd < txres; jd++) {
            /* The following condition is
             * - independent on jtip and itip,
             * - rarely satisfied (especially once the tip has already been improved a bit), and
             * - progressively becoming more false as we modify the tip, but not becoming true if it was not
             *   before.
             * So, instead always checking the entire tip in the inner cycle as Villarrubia does, we evaluate it once
             * at the beginning. Then only iterate over the promising pixels, which are much fewer. This is a major
             * speedup, basically going from O(|T|²) to O(|T|) as – except for the initial improvements – ngood tends
             * to be only O(1). */
            if (z - drow[col + xc - jd] <= trow[jd]) {
                goodpixels[ngood].i = id;
                goodpixels[ngood].j = jd;
                ngood++;
            }
        }
    }

    if (interior) {
        // Interior. The easy and fast part where we do not need to worry about being outside the image, etc.
        for (gint itip = 0; itip < tyres; itip++) {
            gdouble *trow = t + itip*txres;
            for (gint jtip = 0; jtip < txres; jtip++) {
                gdouble dil = -G_MAXDOUBLE;
                for (gint igood = 0; igood < ngood; igood++) {
                    /* Introduce auxiliary variables for the translated coordinates. We do not need them here because
                     * we do not use them repeatedly. It makes clear how the code matches the edge regions below,
                     * where we do use them repeatedly. */
                    gint jd = goodpixels[igood].j, id = goodpixels[igood].i;
                    gdouble tz = t[id*txres + jd];
                    gint j = col + xc - jd, i = row + yc - id;
                    gint jt = col + jtip - jd, it = row + itip - id;
                    /* Keep checking the condition. It is still often false. */
                    if (z - d[xres*i + j] <= tz)
                        dil = fmax(dil, d[xres*it + jt] + tz - z);
                }
                /* Improve the tip pixel: tip → min(tip, dil+threshold). */
                if (dil > -G_MAXDOUBLE) {
                    gdouble imp = trow[jtip] - (dil + threshold);
                    if (imp > 0.0) {
                        trow[jtip] -= imp;
                        improved++;
                    }
                }
            }
        }
    }
    else {
        /* Handle the edge regions, if we must. */
        for (gint itip = 0; itip < tyres; itip++) {
            gdouble *trow = t + itip*txres;
            for (gint jtip = 0; jtip < txres; jtip++) {
                gdouble dil = -G_MAXDOUBLE;
                for (gint igood = 0; igood < ngood; igood++) {
                    gint jd = goodpixels[igood].j, id = goodpixels[igood].i;
                    gint j = col + xc - jd, i = row + yc - id;
                    /* Determine whether the tip apex at (xc,yc) lies within the domain of the translated image, and
                     * if so, if it is inside (i.e. below or on the surface of) the image. If the apex is outside, no
                     * change is made for (jd,id), no matter what. */
                    if (i < 0 || i >= yres || j < 0 || j >= xres || z - d[i*xres + j] > t[id*txres + jd])
                        continue;

                    /* Determine whether the tip point (jtip,itip) under consideration lies within the domain of the
                     * translated image. If the apex is inside and ij is outside, the worst case is translated image
                     * value → G_MAXDOUBLE. This would result in no change for ANY (jd,id). We therefore abort the
                     * loop and try the next good pixel. */
                    gint jt = col + jtip - jd, it = row + itip - id;
                    if (it < 0 || it >= yres || jt < 0 || jt >= xres) {
                        dil = -G_MAXDOUBLE;
                        break;
                    }

                    /* If neither is outside, both are inside. Meaning, we proceed as for the interior. */
                    gdouble tz = t[id*txres + jd];
                    if (z - d[xres*i + j] <= tz)
                        dil = fmax(dil, d[it*xres + jt] + tz - z);
                }
                /* Improve the tip pixel: tip → min(tip, dil+threshold). */
                if (dil > -G_MAXDOUBLE) {
                    gdouble imp = trow[jtip] - (dil + threshold);
                    if (imp > 0.0) {
                        trow[jtip] -= imp;
                        improved++;
                    }
                }
            }
        }
    }

    return improved;
}

static gboolean
set_blind_iteration_message(GwySetMessageFunc set_message, gint iter)
{
    if (!set_message)
        return TRUE;

    gchar *s = g_strdup_printf(_("Blind tip estimation (iteration %d)..."), iter);
    gboolean ok = set_message(s);
    g_free(s);
    return ok;
}

static gboolean
check_improvement(GwyField *tip, GwyField *prevtip, gdouble min_improvement)
{
    gwy_field_invalidate(tip);
    gwy_field_subtract_fields(prevtip, prevtip, tip);
    gdouble improvement = gwy_field_max(prevtip);
    gdouble tip_range = gwy_field_max(tip) - gwy_field_min(tip);
    return improvement > min_improvement*tip_range;
}

/**
 * gwy_field_tip_estimate_partial:
 * @field: A data field with surface topography.
 * @tip: Tip data field to be refined. Its pixel size must match the surface pixel size.
 * @threshold: Threshold for noise supression.
 * @use_edges: Whether use also edges of image.
 * @set_fraction: (scope call) (nullable): Function that sets fraction to output (or %NULL).
 * @set_message: (scope call) (nullable): Function that sets message to output (or %NULL).
 *
 * Performs partial blind tip shape estimation with a data field.
 *
 * The function implements the blind estimation algorithm published by Villarrubia.
 *
 * The tip field must already have the intended pixel dimensions. It may be zero-filled for a fresh estimate or it can
 * be filled with the result of a prior estimate. In the latter case, the initialisation must follow the same
 * convention as the output, i.e. the apex value is zero and all other values are smaller (negative).
 *
 * Note that the threshold value must be chosen sufficently high value to supress small fluctulations due to noise
 * (that would lead to very sharp tip) but sufficiently low value to put algorithm at work. A value similar to 1/10000
 * of surface range can be good. Otherwise we recommend to start with zero threshold and increase it slowly to observe
 * changes and choose right value.
 *
 * If the operation is cancelled the contents of @tip is a partial estimate (including possibly the tip still being in
 * the initial state). Cancellation can only occur if non-%NULL @set_fraction or @set_message is passed.
 *
 * Returns: A positive number if the operation finished and tip was improved, zero if the operation finished without
 *          improving the tip, and a negative number if the operation was cancelled.
 **/
gint
gwy_field_tip_estimate_partial(GwyField *field,
                               GwyField *tip,
                               gdouble threshold,
                               gboolean use_edges,
                               GwySetFractionFunc set_fraction,
                               GwySetMessageFunc set_message)
{
    g_return_val_if_fail(GWY_IS_FIELD(field), -1);
    g_return_val_if_fail(GWY_IS_FIELD(tip), -1);

    gint txres = tip->xres, tyres = tip->yres;
    gint xc = txres/2, yc = tyres/2;
    guint ncandidates;
    GridPoint *candidates = tip_estimate_partial_init(field, tip, xc, yc, &ncandidates);

    const gdouble *d = field->priv->data;
    gdouble *t = tip->priv->data;
    gint xres = field->xres, yres = field->yres;
    gint iter = 1, improved = 0;
    GridPoint *goodpixels = g_new(GridPoint, txres*tyres);
    GwyField *prevtip = gwy_field_new_alike(tip, FALSE);

    do {
        if (!set_blind_iteration_message(set_message, iter)) {
            improved = -1;
            break;
        }

        gwy_field_copy_data(tip, prevtip);
        // FIXME: We can now paralelise this loop because we no longer promise to report exact improvement counts
        // anywhere. However, preselection in improve_at_single_location() makes it pretty fast.
        for (gint i = 0; i < ncandidates; i++) {
            if (improve_at_single_location(d, xres, yres, candidates[i].j, candidates[i].i,
                                           t, txres, tyres, xc, yc,
                                           threshold, use_edges, goodpixels))
                improved++;

            if (set_fraction && !set_fraction((i + 0.5)/ncandidates)) {
                improved = -1;
                break;
            }
        }
        iter++;
    } while (check_improvement(tip, prevtip, 1e-9));

    g_free(goodpixels);
    g_free(candidates);
    g_object_unref(prevtip);

    return improved;
}

static gint
improve_full(GwyField *field, GwyField *eroded, GwyField *opened,
             GwyField *tip, GwyField *invtip, gint xc, gint yc,
             gdouble threshold, gboolean use_edges,
             gint iter,
             GwySetMessageFunc set_message, GwySetFractionFunc set_fraction)
{
    gwy_field_copy_data(tip, invtip);
    gwy_field_flip(invtip, TRUE, TRUE);

    gint xres = field->xres, yres = field->yres;
    gint txres = tip->xres, tyres = tip->yres;
    const gdouble *d = field->priv->data;

    if ((set_message && !set_message(_("Erosion...")))
        || (set_fraction && !set_fraction(0.0))
        || !erode(d, eroded->priv->data, xres, yres,
                  invtip->priv->data, txres, tyres, txres-1 - txres/2, tyres-1 - tyres/2,
                  set_fraction))
        return -1;

    if ((set_message && !set_message(_("Dilation...")))
        || (set_fraction && !set_fraction(0.0))
        || !dilate(eroded->priv->data, opened->priv->data, xres, yres,
                   tip->priv->data, txres, tyres, txres/2, tyres/2,
                   set_fraction))
        return -1;

    const gdouble *odata = opened->priv->data;
    gdouble *t = tip->priv->data;
    gboolean cancelled = FALSE, *pcancelled = &cancelled;
    gint next_row = 0, improved = 0;

    if (!set_blind_iteration_message(set_message, iter))
        return -1;

#ifdef _OPENMP
#pragma omp parallel if (gwy_threads_are_enabled()) default(none) \
            reduction(+:improved) \
            shared(d,odata,t,xres,yres,txres,tyres,xc,yc,threshold,use_edges,next_row,set_fraction,pcancelled)
#endif
    {
        gdouble *tt = gwy_omp_if_threads_dup(t, tyres*txres);
        GridPoint *goodpixels = g_new(GridPoint, txres*tyres);
        gint i, ni = yres+1 - tyres;

        /* Acquire a unique image row number to process. Rows are essentially tasks, but with all the allocation and
         * cancellation around it seems simpler to just express it explicitly.  We need to split the work with row
         * granularity because different blocks take very different time depending on if anything useful is found. */
        while ((i = gwy_omp_atomic_increment_int(&next_row)) < ni) {
            gint row = i + tyres-1 - yc;
            const gdouble *drow = d + row*xres, *orow = odata + row*xres;
            for (gint col = txres-1 - xc; col <= xres-1 - xc; col++) {
                if (drow[col] - orow[col] > threshold) {
                    if (improve_at_single_location(d, xres, yres, col, row, tt, txres, tyres, xc, yc,
                                                   threshold, use_edges, goodpixels))
                        improved++;
                }
            }
            if (gwy_omp_set_fraction_check_cancel(set_fraction, i, 0, ni, pcancelled))
                break;
        }
        gwy_omp_if_threads_min_double(t, tt, tyres*txres);
        g_free(goodpixels);
    }

    if (cancelled)
        return -1;
    return improved;
}

/**
 * gwy_field_tip_estimate_full:
 * @field: A data field with surface topography.
 * @tip: Tip data field to be refined. Its pixel size must match the surface pixel size.
 * @threshold: Threshold for noise supression.
 * @use_edges: Whether use also edges of image.
 * @set_fraction: (scope call) (nullable): Function that sets fraction to output (or %NULL).
 * @set_message: (scope call) (nullable): Function that sets message to output (or %NULL).
 *
 * Performs full blind tip shape estimation with a data field.
 *
 * The function implements the blind estimation algorithm published by Villarrubia.
 *
 * The tip field must already have the intended pixel dimensions. It may be zero-filled for a fresh estimate or it can
 * be filled with the result of a prior estimate, such as using gwy_field_tip_estimate_partial(). In the latter case,
 * the initialisation must follow the same convention as the output, i.e. the apex value is zero and all other values
 * are smaller (negative).
 *
 * Note that the threshold value must be chosen sufficently high value to supress small fluctulations due to noise
 * (that would lead to very sharp tip) but sufficiently low value to put algorithm at work. A value similar to 1/10000
 * of surface range can be good. Otherwise we recommend to start with zero threshold and increase it slowly to observe
 * changes and choose right value.
 *
 * If the operation is cancelled the contents of @tip is a partial estimate (including possibly the tip still being in
 * the initial state). Cancellation can only occur if non-%NULL @set_fraction or @set_message is passed.
 *
 * Returns: A positive number if the operation finished and tip was improved, zero if the operation finished without
 *          improving the tip, and a negative number if the operation was cancelled.
 **/
gint
gwy_field_tip_estimate_full(GwyField *field,
                            GwyField *tip,
                            gdouble threshold,
                            gboolean use_edges,
                            GwySetFractionFunc set_fraction,
                            GwySetMessageFunc set_message)
{
    g_return_val_if_fail(GWY_IS_FIELD(field), -1);
    g_return_val_if_fail(GWY_IS_FIELD(tip), -1);

    gint txres = tip->xres, tyres = tip->yres;
    gint xc = txres/2, yc = tyres/2;
    gint iter = 1, improved = 0;
    GridPoint *goodpixels = g_new(GridPoint, txres*tyres);
    GwyField *prevtip = gwy_field_new_alike(tip, FALSE);
    GwyField *invtip = gwy_field_new_alike(tip, FALSE);
    GwyField *buf1 = gwy_field_new_alike(field, FALSE);
    GwyField *buf2 = gwy_field_new_alike(field, FALSE);

    do {
        gwy_field_copy_data(tip, prevtip);
        gint status = improve_full(field, buf1, buf2, tip, invtip, xc, yc, threshold, use_edges,
                                   iter, set_message, set_fraction);
        if (status < 0) {
            improved = -1;
            break;
        }
        else
            improved += status;

        iter++;
    } while (check_improvement(tip, prevtip, 1e-9));

    g_free(goodpixels);
    g_object_unref(invtip);
    g_object_unref(prevtip);
    g_object_unref(buf1);
    g_object_unref(buf2);

    return improved;
}

/**
 * SECTION: tip
 * @title: Tip
 * @short_description: SPM tip morphological operations
 *
 * The functions implement the algorithms published by Villarrubia: convolution, reconstruction, certainty map and
 * blind tip shape estimations.
 *
 * In tip data fields, the sign convention is that the tip points upwards in Z. In other words, the apex corresponds
 * to the largest value. This does not mean it is the shape you would see when looking at the probe from the apex, for
 * example in a SEM micrograph. It is mirrored. The actual shape would be obtained by changing the sign of Z (making
 * the tip image look like a pit instead of peak) and rotating it in 3D in some way to look from the apex.
 *
 * The parity of symmetries in 3D space does not allow to simultaneously (a) have the apex pointing up (positive Z),
 * (b) avoid mirroring the tip and (c) avoid choosing a preferred direction in the XY plane (for the rotation to look
 * at the tip from the other side). Gwyddion chose to sacrifice (b) and all the operations work consistently with this
 * choice.
 **/

/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
