gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 63b4edd 070/113: Imported work in master, conf


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 63b4edd 070/113: Imported work in master, conflicts fixed
Date: Fri, 16 Apr 2021 10:33:49 -0400 (EDT)

branch: master
commit 63b4edd0ef02bedbbfe626643ff01495b696bf1e
Merge: ee4bf1e 782217a
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Imported work in master, conflicts fixed
    
    A few conflicts with the new `gal_label_clump_significance' function in 3D
    were corrected.
---
 NEWS                  |   1 +
 bin/segment/clumps.c  | 296 ++-----------------------------------
 bin/segment/main.h    |   1 +
 bin/segment/segment.c |  22 ++-
 bootstrap.conf        |   1 +
 doc/Makefile.am       |   4 +-
 doc/gnuastro.texi     | 110 ++++++++++++--
 lib/gnuastro/label.h  |   9 ++
 lib/label.c           | 400 ++++++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 543 insertions(+), 301 deletions(-)

diff --git a/NEWS b/NEWS
index fafe7df..b1b6b06 100644
--- a/NEWS
+++ b/NEWS
@@ -88,6 +88,7 @@ GNU Astronomy Utilities NEWS                          -*- 
outline -*-
     gal_jpeg_write: Writes a `gal_data_t' into a JPEG file.
     gal_label_grow_indexs: grow known indexs into desired areas.
     gal_label_oversegment: apply over-segmentation to an input dataset.
+    gal_label_clump_significance: measure significance of all clumps in region.
     gal_pdf_name_is_pdf: Returns 1 if given filename is PDF.
     gal_pdf_suffix_is_pdf: Returns 1 if given suffix is PDF.
     gal_pdf_write: Writes a dataset into an PDF file.
diff --git a/bin/segment/clumps.c b/bin/segment/clumps.c
index 56d32c8..0a67803 100644
--- a/bin/segment/clumps.c
+++ b/bin/segment/clumps.c
@@ -219,287 +219,6 @@ clumps_grow_prepare_final(struct clumps_thread_params 
*cltprm)
 /**********************************************************************/
 /*****************             S/N threshold          *****************/
 /**********************************************************************/
-/* In this function we want to find the general information for each clump
-   in an over-segmented labeled array. The signal in each clump is the
-   average signal inside it subtracted by the average signal in the river
-   pixels around it. So this function will go over all the pixels in the
-   object (already found in deblendclumps()) and add them appropriately.
-
-   The output is an array of size cltprm->numinitial*INFO_NCOLS. as listed
-   below.*/
-enum infocols
-  {
-    INFO_X,              /* Flux weighted X center col, 0 by C std.       */
-    INFO_Y,              /* Flux weighted Y center col.                   */
-    INFO_Z,              /* Flux weighted Z center col.                   */
-    INFO_SFF,            /* Sum of non-negative pixels (for X,Y).         */
-    INFO_INSTD,          /* Standard deviation at clump center.           */
-    INFO_INAREA,         /* Tatal area within clump.                      */
-    INFO_RIVAREA,        /* Tatal area within rivers around clump.        */
-    INFO_PEAK_RIVER,     /* Peak (min or max) river value around a clump. */
-    INFO_PEAK_CENTER,    /* Peak (min or max) clump value.                */
-
-    INFO_NCOLS,          /* Total number of columns in the `info' table.  */
-  };
-static void
-clumps_get_raw_info(struct clumps_thread_params *cltprm)
-{
-  struct segmentparams *p=cltprm->clprm->p;
-  size_t ndim=p->input->ndim, *dsize=p->input->dsize;
-
-  size_t i, *a, *af, ii, coord[3];
-  double *row, *info=cltprm->info->array;
-  size_t nngb=gal_dimension_num_neighbors(ndim);
-  struct gal_tile_two_layer_params *tl=&p->cp.tl;
-  float *values=p->conv->array, *std=p->std->array;
-  size_t *dinc=gal_dimension_increment(ndim, dsize);
-  int32_t lab, nlab, *ngblabs, *clabel=p->clabel->array;
-
-  /* Allocate the array to keep the neighbor labels of river pixels. */
-  ngblabs=gal_pointer_allocate(GAL_TYPE_INT32, nngb, 0, __func__, "ngblabs");
-
-  /* Go over all the pixels in this region. */
-  af=(a=cltprm->indexs->array)+cltprm->indexs->size;
-  do
-    if( !isnan(values[ *a ]) )
-      {
-        /* This pixel belongs to a clump. */
-        if( clabel[ *a ]>0 )
-          {
-            /* For easy reading. */
-            row = &info [ clabel[*a] * INFO_NCOLS ];
-
-            /* Get the area and flux. */
-            ++row[ INFO_INAREA ];
-            if( values[*a]>0.0f )
-              {
-                gal_dimension_index_to_coord(*a, ndim, dsize, coord);
-                row[   INFO_SFF ] += values[*a];
-                row[   INFO_X   ] += values[*a] * coord[0];
-                row[   INFO_Y   ] += values[*a] * coord[1];
-                if(ndim==3)
-                  row[ INFO_Z   ] += values[*a] * coord[2];
-              }
-
-            /* In the loop `INFO_INAREA' is just the pixel counter of this
-               clump. The pixels are sorted by flux (decreasing for
-               positive clumps and increasing for negative). So the second
-               extremum value is just the second pixel of the clump. */
-            if( row[ INFO_INAREA ]==1.0f )
-              row[ INFO_PEAK_CENTER ] = values[*a];
-          }
-
-        /* This pixel belongs to a river (has a value of zero and isn't
-           blank). */
-        else
-          {
-            /* We are on a river pixel. So the value of this pixel has to
-               be added to any of the clumps in touches. But since it might
-               touch a labeled region more than once, we use `ngblabs' to
-               keep track of which label we have already added its value
-               to. `ii` is the number of different labels this river pixel
-               has already been considered for. `ngblabs' will keep the list
-               labels. */
-            ii=0;
-            memset(ngblabs, 0, nngb*sizeof *ngblabs);
-
-            /* Look into the 8-connected neighbors (recall that a
-               connectivity of `ndim' means all pixels touching it (even on
-               one vertice). */
-            GAL_DIMENSION_NEIGHBOR_OP(*a, ndim, dsize, ndim, dinc, {
-                /* This neighbor's label. */
-                nlab=clabel[ nind ];
-
-                /* We only want those neighbors that are not rivers (>0) or
-                   any of the flag values. */
-                if(nlab>0)
-                  {
-                    /* Go over all already checked labels and make sure
-                       this clump hasn't already been considered. */
-                    for(i=0;i<ii;++i) if(ngblabs[i]==nlab) break;
-
-                    /* This neighbor clump hasn't been considered yet: */
-                    if(i==ii)
-                      {
-                        ngblabs[ii++] = nlab;
-                        row = &info[ nlab * INFO_NCOLS ];
-
-                        ++row[INFO_RIVAREA];
-                        if( row[INFO_RIVAREA]==1.0f )
-                          row[INFO_PEAK_RIVER] = values[*a];
-                      }
-                  }
-              } );
-          }
-      }
-  while(++a<af);
-
-  /* Based on the position of each clump, find a representative standard
-     deviation. */
-  for(lab=1; lab<=cltprm->numinitclumps; ++lab)
-    {
-      /* To help in reading. */
-      row = &info [ lab * INFO_NCOLS ];
-
-      /* The calculations are only necessary for the clumps that satisfy
-         the minimum area. There is no need to waste time on the smaller
-         ones. */
-      if ( row[INFO_INAREA] > p->snminarea )
-        {
-          /* It might happen that none of the pixels were positive
-             (especially over the undetected regions). In that case, set
-             the total area of the clump to zero so it is no longer
-             considered.*/
-          if( row[INFO_SFF]==0.0f )
-            row[INFO_INAREA]=0;
-          else
-            {
-              /* Find the coordinates of the clump's weighted center. */
-              coord[0]=GAL_DIMENSION_FLT_TO_INT(row[INFO_X]/row[INFO_SFF]);
-              coord[1]=GAL_DIMENSION_FLT_TO_INT(row[INFO_Y]/row[INFO_SFF]);
-              if(ndim==3)
-                coord[2]=GAL_DIMENSION_FLT_TO_INT(row[INFO_Z]/row[INFO_SFF]);
-
-              /* Find the corresponding standard deviation. */
-              row[INFO_INSTD]=( p->std->size>1
-                                ? ( p->std->size==p->input->size
-                                    ? std[gal_dimension_coord_to_index(ndim,
-                                                               dsize, coord)]
-                                    : std[gal_tile_full_id_from_coord(tl,
-                                                                    coord)] )
-                                : std[0] );
-              if(p->variance) row[INFO_INSTD] = sqrt(row[INFO_INSTD]);
-
-              /* For a check
-              printf("---------\n");
-              printf("\t%f --> %zu\n", row[INFO_Y]/row[INFO_SFF], coord[1]);
-              printf("\t%f --> %zu\n", row[INFO_X]/row[INFO_SFF], coord[0]);
-              printf("%u: (%zu, %zu): %.3f\n", lab, coord[1]+1,
-                     coord[0]+1, row[INFO_INSTD]);
-              */
-            }
-        }
-    }
-
-  /* Clean up. */
-  free(dinc);
-  free(ngblabs);
-}
-
-
-
-
-/* Make an S/N table for the clumps in a given region. */
-void
-clumps_make_sn_table(struct clumps_thread_params *cltprm)
-{
-  struct segmentparams *p=cltprm->clprm->p;
-  size_t tablen=cltprm->numinitclumps+1;
-
-  float *snarr;
-  int32_t *indarr=NULL;
-  double C, R, std, Ni, *row;
-  int sky0_det1=cltprm->clprm->sky0_det1;
-  size_t i, ind, counter=0, infodsize[2]={tablen, INFO_NCOLS};
-
-  /* If there were no initial clumps, then ignore this function. */
-  if(cltprm->numinitclumps==0) { cltprm->snind=cltprm->sn=NULL; return; }
-
-
-  /* Allocate the arrays to keep the final S/N table (and possibly S/N
-     index) for this object or tile. */
-  cltprm->sn        = &cltprm->clprm->sn[ cltprm->id ];
-  cltprm->sn->ndim  = 1;                        /* Depends on `cltprm->sn' */
-  cltprm->sn->type  = GAL_TYPE_FLOAT32;
-  cltprm->sn->dsize = gal_pointer_allocate(GAL_TYPE_SIZE_T, 1, 0, __func__,
-                                           "cltprm->sn->dsize");
-  cltprm->sn->array = gal_pointer_allocate(cltprm->sn->type, tablen, 0,
-                                           __func__, "cltprm->sn->array");
-  cltprm->sn->size  = cltprm->sn->dsize[0] = tablen;       /* After dsize. */
-  if( cltprm->clprm->snind )
-    {
-      cltprm->snind        = &cltprm->clprm->snind [ cltprm->id ];
-      cltprm->snind->ndim  = 1;              /* Depends on `cltprm->snind' */
-      cltprm->snind->type  = GAL_TYPE_INT32;
-      cltprm->snind->dsize = gal_pointer_allocate(GAL_TYPE_SIZE_T, 1, 0,
-                                                  __func__,
-                                                  "cltprm->snind->dsize");
-      cltprm->snind->size  = cltprm->snind->dsize[0]=tablen;/* After dsize */
-      cltprm->snind->array = gal_pointer_allocate(cltprm->snind->type,
-                                                   tablen, 0, __func__,
-                                                   "cltprm->snind->array");
-    }
-  else cltprm->snind=NULL;
-
-
-  /* Allocate the array to keep the raw information of each clump. Note the
-     `+1' in `infodsize', this is because the labels begin with 1 and we
-     want each label to have one row on the same label.*/
-  cltprm->info=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 2, infodsize,
-                              NULL, 1, p->cp.minmapsize, NULL, NULL, NULL);
-
-
-  /* First get the raw information necessary for making the S/N table. */
-  clumps_get_raw_info(cltprm);
-
-
-  /* Calculate the signal to noise ratio for successful clumps. */
-  snarr=cltprm->sn->array;
-  if(cltprm->snind) indarr=cltprm->snind->array;
-  for(i=1;i<tablen;++i)
-    {
-      /* For readability. */
-      row = &( ((double *)(cltprm->info->array))[ i * INFO_NCOLS ] );
-      Ni  = row[ INFO_INAREA      ];
-      R   = row[ INFO_PEAK_RIVER  ];
-      C   = row[ INFO_PEAK_CENTER ];
-
-
-      /* If the inner flux is smaller than the outer flux (happens only in
-         noise cases) or the area is smaller than the minimum area to
-         calculate signal-to-noise, then set the S/N of this segment to
-         zero. */
-      if( Ni>p->snminarea )
-        {
-          /* Calculate the Signal to noise ratio, if we are on the noise
-             regions, we don't care about the IDs of the clumps anymore, so
-             store the Signal to noise ratios contiguously (for easy
-             sorting and etc). Note that counter will always be smaller and
-             equal to i. */
-          std=row[INFO_INSTD];
-          ind = sky0_det1 ? i : counter++;
-          if(cltprm->snind) indarr[ind]=i;
-          snarr[ind] = ( p->minima ? R-C : C-R ) / std;
-        }
-      else
-        {
-          /* Only over detections, we should put a NaN when the S/N isn't
-             calculated.  */
-          if(sky0_det1)
-            {
-              snarr[i]=NAN;
-              if(cltprm->snind) indarr[i]=i;
-            }
-        }
-    }
-
-
-  /* If we are in Sky mode, the sizes have to be corrected */
-  if(sky0_det1==0)
-    {
-      cltprm->sn->dsize[0] = cltprm->sn->size = counter;
-      if(cltprm->snind) cltprm->snind->dsize[0] = cltprm->snind->size=counter;
-    }
-
-
-  /* Clean up. */
-  gal_data_free(cltprm->info);
-}
-
-
-
-
-
 /* Correct the labels of the clumps that will be used in determining the
    S/N threshold for true clumps.   */
 static void
@@ -743,7 +462,20 @@ clumps_find_make_sn_table(void *in_prm)
 
 
           /* Make the clump S/N table. */
-          clumps_make_sn_table(&cltprm);
+          cltprm.sn    = &cltprm.clprm->sn[cltprm.id];
+          cltprm.snind = ( cltprm.clprm->snind
+                           ? &cltprm.clprm->snind[cltprm.id]
+                           : NULL );
+          gal_label_clump_significance(p->clumpvals, p->std, p->clabel,
+                                       cltprm.indexs, &p->cp.tl,
+                                       cltprm.numinitclumps, p->snminarea,
+                                       p->variance, clprm->sky0_det1,
+                                       cltprm.sn, cltprm.snind);
+
+
+          /* If there were no clumps, then just set the S/N table to NULL. */
+          if( cltprm.clprm->sn[ cltprm.id ].size==0 )
+            cltprm.snind=cltprm.sn=NULL;
 
 
           /* If the user wanted to check the steps, remove the clumps that
diff --git a/bin/segment/main.h b/bin/segment/main.h
index e632ae5..3a72b3b 100644
--- a/bin/segment/main.h
+++ b/bin/segment/main.h
@@ -90,6 +90,7 @@ struct segmentparams
   gal_data_t          *olabel;  /* Object labels.                         */
   gal_data_t          *clabel;  /* Clumps labels.                         */
   gal_data_t             *std;  /* STD of undetected pixels, per tile.    */
+  gal_data_t       *clumpvals;  /* Values to build clumps (avoid bugs).   */
 
   float               cpscorr;  /* Counts/second correction.              */
   size_t        numdetections;  /* Number of final detections.            */
diff --git a/bin/segment/segment.c b/bin/segment/segment.c
index 8dca3ec..a456c1c 100644
--- a/bin/segment/segment.c
+++ b/bin/segment/segment.c
@@ -95,6 +95,11 @@ segment_convolve(struct segmentparams *p)
       if(p->conv->name) free(p->conv->name);
       gal_checkset_allocate_copy("CONVOLVED", &p->conv->name);
     }
+
+  /* Set the values to build clumps on. We are mainly doing this to avoid
+     the accidentially using different arrays when building clumps on the
+     undetected and detected regions. */
+  p->clumpvals=p->conv;
 }
 
 
@@ -582,7 +587,22 @@ segment_on_threads(void *in_prm)
          the user wants to inspect the steps, this function is called
          multiple times. So we need to avoid over-writing the allocations. */
       if( clprm->sn[ cltprm.id ].dsize==NULL )
-        clumps_make_sn_table(&cltprm);
+        {
+          /* Calculate the S/N table. */
+          cltprm.sn    = &cltprm.clprm->sn[ cltprm.id ];
+          cltprm.snind = ( cltprm.clprm->snind
+                           ? &cltprm.clprm->snind[ cltprm.id ]
+                           : NULL );
+          gal_label_clump_significance(p->clumpvals, p->std, p->clabel,
+                                       cltprm.indexs, &p->cp.tl,
+                                       cltprm.numinitclumps, p->snminarea,
+                                       p->variance, clprm->sky0_det1,
+                                       cltprm.sn, cltprm.snind);
+
+          /* If it didn't succeed, then just set the S/N table to NULL. */
+          if( cltprm.clprm->sn[ cltprm.id ].size==0 )
+            cltprm.snind=cltprm.sn=NULL;
+        }
       else cltprm.sn=&clprm->sn[ cltprm.id ];
 
 
diff --git a/bootstrap.conf b/bootstrap.conf
index ff1cecd..dc8120b 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -223,6 +223,7 @@ gnulib_modules="
     getline
     strcase
     gendocs
+    gpl-3.0
     mbstok_r
     inttypes
     git-version-gen
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 5b496a5..7174cb9 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -47,8 +47,8 @@ TEXI2DVI = texi2dvi -I $(bootstrpdoc) -I $(srcdir)
 
 ## Commands to make the texinfo tools.
 info_TEXINFOS = gnuastro.texi
-gnuastro_TEXINFOS = $(bootstrpdoc)/fdl.texi formath.texi       \
-                    $(srcdir)/authors.texi
+gnuastro_TEXINFOS = $(bootstrpdoc)/fdl.texi $(bootstrpdoc)/gpl-3.0.texi \
+                    $(srcdir)/authors.texi formath.texi
 
 
 ## Files not predefined by Automake, and not in dependencies that must
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 87ca66c..bcb3825 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -207,7 +207,8 @@ sub-component to a title is present.
 * Developing::                  The development environment.
 * Gnuastro programs list::      List and short summary of Gnuastro.
 * Other useful software::       Installing other useful software.
-* GNU Free Doc. License::       GNU Free documentation license.
+* GNU Free Doc. License::       Full text of the GNU Free documentation 
license.
+* GNU General Public License::  Full text of the GNU General public license.
 * Index::                       Index of terms
 
 @detailmenu
@@ -1086,12 +1087,11 @@ reputation.
 
 @cindex GNU General Public License
 @cindex GNU Free Documentation License
-The precise conditions of the licenses for the programs currently
-being distributed that relate to Gnuastro are found in the
-@url{http://www.gnu.org/copyleft/gpl.html, GNU General Public license}
-that accompany them.  This book is covered by the
-@url{http://www.gnu.org/copyleft/fdl.html, GNU Free Documentation
-License}.
+The full text of the licenses for the Gnuastro book and software can be
+respectively found in @ref{GNU General Public License}@footnote{Also
+available in @url{http://www.gnu.org/copyleft/gpl.html}} and @ref{GNU Free
+Doc. License}@footnote{Also available in
+@url{http://www.gnu.org/copyleft/fdl.html}}.
 
 
 
@@ -5087,12 +5087,14 @@ confusion (since they are not changed by Gnuastro 
developers).
 The process of automatically building and importing necessary files into
 the cloned directory is known as @emph{bootstrapping}. All the instructions
 for an automatic bootstrapping are available in @file{bootstrap} and
-configured using @file{bootstrap.conf}. @file{bootstrap} is the only file
-not written by Gnuastro developers but is under version control to enable
-simple bootstrapping immediately after cloning. It is maintained by the GNU
-Portability Library (Gnulib) and this file is an identical copy, so do not
-make any changes in this file since it will be replaced when Gnulib
-releases an update. Make all your changes in @file{bootstrap.conf}.
+configured using @file{bootstrap.conf}. @file{bootstrap} and @file{COPYING}
+(which contains the software copyright notice) are the only files not
+written by Gnuastro developers but under version control to enable simple
+bootstrapping and legal information on usage immediately after
+cloning. @file{bootstrap.conf} is maintained by the GNU Portability Library
+(Gnulib) and this file is an identical copy, so do not make any changes in
+this file since it will be replaced when Gnulib releases an update. Make
+all your changes in @file{bootstrap.conf}.
 
 The bootstrapping process has its own separate set of dependencies, the
 full list is given in @ref{Bootstrapping dependencies}. They are generally
@@ -26755,7 +26757,7 @@ minimum, based on @code{min0_max1}) and its surrounding 
pixels will be
 given a unique label. For demonstration, see Figures 8 and 9 of
 @url{http://arxiv.org/abs/1505.01664, Akhlaghi and Ichikawa [2015]}. If
 @code{topinds!=NULL}, it is assumed to point to an already allocated space
-to write the indexs of the local extrema, otherwise, it is ignored.
+to write the index of each clump's local extrema, otherwise, it is ignored.
 
 The @code{values} dataset must have a 32-bit floating point type
 (@code{GAL_TYPE_FLOAT32}, see @ref{Library data types}) and will only be
@@ -26802,6 +26804,71 @@ input->flags &= ~GAL_DATA_FLAG_HASBLANK; /* Set bit to 
0. */
 @end example
 @end deftypefun
 
+@deftypefun void gal_label_clump_significance (gal_data_t @code{*values}, 
gal_data_t @code{*std}, gal_data_t @code{*label}, gal_data_t @code{*indexs}, 
struct gal_tile_two_layer_params @code{*tl}, size_t @code{numclumps}, size_t 
@code{minarea}, int @code{variance}, int @code{keepsmall}, gal_data_t 
@code{*sig}, gal_data_t @code{*sigind})
+@cindex Clump
+This function is usually called after @code{gal_label_oversegment}, and is
+used as a measure to idenfity which over-segmented ``clumps'' are real and
+which are noise.
+
+A measurement is done on each clump (using the @code{values} and @code{std}
+datasets, see below). To help in multi-threaded environments, the operation
+is only done on pixels which are indexed in @code{indexs}. It is expected
+for @code{indexs} to be sorted by their values in @code{values}. If not
+sorted, the measurement may not be reliable. If sorted in a decreasing
+order, then clump building will start from their highest value and
+vice-versa. See the description of @code{gal_label_oversegment} for more on
+@code{indexs}.
+
+Each ``clump'' (identified by a positive integer) is assumed to be
+surrounded by atleast one river/watershed pixel (with a non-positive
+label). This function will parse the pixels identified in @code{indexs} and
+make a measurement on each clump and over all the river/watershed
+pixels. The number of clumps (@code{numclumps}) must be given as an input
+argument and any clump that is smaller than @code{minarea} is ignored
+(because of scatter). If @code{variance} is non-zero, then the @code{std}
+dataset is interpretted as variance, not standard deviation.
+
+The @code{values} and @code{std} datasets must have a @code{float} (32-bit
+floating point) type. Also, @code{label} and @code{indexs} must
+respectively have @code{int32} and @code{size_t} types. @code{values} and
+@code{label} must have the same size, but @code{std} can have three
+possible sizes: 1) a single element (which will be used for the whole
+dataset, 2) the same size as @code{values} (so a different error can be
+assigned to every pixel), 3) a single value for each tile, based on the
+@code{tl} tessellation (see @ref{Tile grid}). In the last case, a
+tile/value will be associated to each clump based on its flux-weighted
+(only positive values) center.
+
+The main output is an internally allocated, 1-dimensional array with one
+value per label. The array information (length, type and etc) will be
+written into the @code{sig} generic data container. Therefore
+@code{sig->array} must be @code{NULL} when this function is called. After
+this function, the details of the array (number of elements, type and size
+and etc) will be written in to the various components of @code{sig}, see
+the definition of @code{gal_data_t} in @ref{Generic data
+container}. Therefore @code{sig} must already be allocated before calling
+this function.
+
+Optionally (when @code{sigind!=NULL}, similar to @code{sig}) the clump
+labels of each measurement in @code{sig} will be written in
+@code{sigind->array}. If @code{keepsmall} zero, small clumps (where no
+measurement is made) will not be included in the output table.
+
+This function is initially intended for a multi-threaded environment. In
+such cases, you will be writing arrays of clump measures from different
+regions in parallel into an array of @code{gal_data_t}s. You can simply
+allocate (and initialize), such an array with the
+@code{gal_data_array_calloc} function in @ref{Arrays of datasets}. For
+example if the @code{gal_data_t} array is called @code{array}, you can pass
+@code{&array[i]} as @code{sig}.
+
+Along with some other functions in @code{label.h}, this function was
+initially written for @ref{Segment}. The description of the parameter used
+to measure a clump's significance is fully given in @ref{Segment changes
+after publication}.
+
+@end deftypefun
+
 @deftypefun void gal_label_grow_indexs (gal_data_t @code{*labels}, gal_data_t 
@code{*indexs}, int @code{withrivers}, int @code{connectivity})
 Grow the (positive) labels of @code{labels} over the pixels in
 @code{indexs} (see description of @code{gal_label_indexs}). The pixels
@@ -29551,7 +29618,7 @@ $ ./pgdemoXX
 
 
 
-@node GNU Free Doc. License, Index, Other useful software, Top
+@node GNU Free Doc. License, GNU General Public License, Other useful 
software, Top
 @appendix GNU Free Doc. License
 
 @cindex GNU Free Documentation License
@@ -29559,10 +29626,21 @@ $ ./pgdemoXX
 
 
 
+@node GNU General Public License, Index, GNU Free Doc. License, Top
+@appendix GNU Gen. Pub. License v3
+
+@cindex GPL
+@cindex GNU GPL
+@cindex GNU General public license
+@include gpl-3.0.texi
+
+
+
+
 
 
 @c Print the index and finish:
-@node Index,  , GNU Free Doc. License, Top
+@node Index,  , GNU General Public License, Top
 @unnumbered Index: Macros, structures and functions
 All Gnuastro library's exported macros start with @code{GAL_}, and its
 exported structures and functions start with @code{gal_}. This abbreviation
diff --git a/lib/gnuastro/label.h b/lib/gnuastro/label.h
index 679202b..1ee91de 100644
--- a/lib/gnuastro/label.h
+++ b/lib/gnuastro/label.h
@@ -26,6 +26,7 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 /* Include other headers if necessary here. Note that other header files
    must be included before the C++ preparations below */
 #include <gnuastro/data.h>
+#include <gnuastro/tile.h>
 
 /* C++ Preparations */
 #undef __BEGIN_C_DECLS
@@ -66,6 +67,14 @@ gal_label_oversegment(gal_data_t *values, gal_data_t *indexs,
                       gal_data_t *label, size_t *topinds, int min0_max1);
 
 void
+gal_label_clump_significance(gal_data_t *values, gal_data_t *std,
+                             gal_data_t *label, gal_data_t *indexs,
+                             struct gal_tile_two_layer_params *tl,
+                             size_t numclumps, size_t minarea, int variance,
+                             int keepsmall, gal_data_t *sig,
+                             gal_data_t *sigind);
+
+void
 gal_label_grow_indexs(gal_data_t *labels, gal_data_t *indexs, int withrivers,
                       int connectivity);
 
diff --git a/lib/label.c b/lib/label.c
index 4fd8e8d..876f1ec 100644
--- a/lib/label.c
+++ b/lib/label.c
@@ -629,3 +629,403 @@ gal_label_grow_indexs(gal_data_t *labels, gal_data_t 
*indexs, int withrivers,
   /* Clean up. */
   free(dinc);
 }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/**********************************************************************/
+/*************            Clump peak intensity            *************/
+/**********************************************************************/
+static int
+label_clump_significance_sanity(gal_data_t *values, gal_data_t *std,
+                                gal_data_t *label, gal_data_t *indexs,
+                                struct gal_tile_two_layer_params *tl,
+                                gal_data_t *sig, const char *func)
+{
+  size_t *a, *af;
+  float first=NAN, second=NAN, *f=values->array;
+
+  /* Type of values. */
+  if( values->type!=GAL_TYPE_FLOAT32 )
+    error(EXIT_FAILURE, 0, "%s: the values dataset must have a `float' "
+          "type, but it has a `%s' type", func,
+          gal_type_name(values->type, 1));
+
+  /* Type of standard deviation. */
+  if( std->type!=GAL_TYPE_FLOAT32 )
+    error(EXIT_FAILURE, 0, "%s: the standard deviation dataset must have a "
+          "`float' (`float32') type, but it has a `%s' type", func,
+          gal_type_name(std->type, 1));
+
+  /* Type of labels image. */
+  if( label->type!=GAL_TYPE_INT32 )
+    error(EXIT_FAILURE, 0, "%s: the labels dataset must have an `int32' "
+          "type, but it has a `%s' type", func,
+          gal_type_name(label->type, 1));
+
+  /* Dimentionality of the values dataset. */
+  if( values->ndim>3 )
+    error(EXIT_FAILURE, 0, "%s: currently only supports 1, 2 or 3 "
+          "dimensional datasets, but a %zu-dimensional dataset is given",
+          func, values->ndim);
+
+  /* Type of indexs image. */
+  if( indexs->type!=GAL_TYPE_SIZE_T )
+    error(EXIT_FAILURE, 0, "%s: the indexs dataset must have a `size_t' "
+          "type, but it has a `%s' type", func,
+          gal_type_name(label->type, 1));
+
+  /* Dimensionality of indexs (must be 1D). */
+  if( indexs->ndim!=1 )
+    error(EXIT_FAILURE, 0, "%s: the indexs dataset must be a 1D dataset, "
+          "but it has %zu dimensions", func, indexs->ndim);
+
+  /* Similar sizes between values and standard deviation. */
+  if( gal_dimension_is_different(values, label) )
+    error(EXIT_FAILURE, 0, "%s: the values and label arrays don't have the "
+          "same size.", func);
+
+  /* Size of the standard deviation. */
+  if( !( std->size==1
+         || std->size==values->size
+         || (tl && std->size==tl->tottiles) ) )
+    error(EXIT_FAILURE, 0, "%s: the standard deviation dataset has %zu "
+          "elements. But it can only have one of these sizes: 1) a "
+          "single value (used for the whole dataset), 2) The size of "
+          "the values dataset (%zu elements, one value for each "
+          "element), 3) The size of the number of tiles in the input "
+          "tessellation (when a tessellation is given)",
+          func, std->size, values->size);
+
+  /* If the `array' and `dsize' elements of `sig' have already been set. */
+  if(sig->array)
+    error(EXIT_FAILURE, 0, "%s: the dataset that will contain the "
+          "significance values must have NULL pointers for its `array' "
+          "and `dsize' pointers (they will be allocated here)", func);
+
+  /* See if the clumps are to be build starting from local maxima or local
+     minima. */
+  af=(a=indexs->array)+indexs->size;
+  do
+    /* A label may have NAN values. */
+    if( !isnan(f[*a]) )
+      {
+        if( isnan(first) )
+          first=f[*a];
+        else
+          {
+            /* Note that the elements may have equal values, so for
+               `second', we want the first non-blank AND different
+               value. */
+            if( isnan(second) && f[*a]!=first )
+              second=f[*a];
+            else
+              break;
+          }
+      }
+  while(++a<af);
+
+  /* Note that if all the values are blank or there is only one value
+     covered by all the indexs, then both (or one) of `first' or `second'
+     will be NAN. In either case, the significance measure is not going to
+     be meaningful if we assume the clumps start from the maxima or
+     minima. So we won't check if they are NaN or not.*/
+  return first>second ? 1 : 0;
+}
+
+
+
+
+
+/* In this function we want to find the general information for each clump
+   in an over-segmented labeled array. The signal in each clump is the
+   average signal inside it subtracted by the average signal in the river
+   pixels around it. So this function will go over all the pixels in the
+   object (already found in deblendclumps()) and add them appropriately.
+
+   The output is an array of size cltprm->numinitial*INFO_NCOLS. as listed
+   below.*/
+enum infocols
+  {
+    INFO_X,              /* Flux weighted X center col, 0 by C std.       */
+    INFO_Y,              /* Flux weighted Y center col.                   */
+    INFO_Z,              /* Flux weighted Z center col.                   */
+    INFO_SFF,            /* Sum of non-negative pixels (for X,Y).         */
+    INFO_INSTD,          /* Standard deviation at clump center.           */
+    INFO_INAREA,         /* Tatal area within clump.                      */
+    INFO_RIVAREA,        /* Tatal area within rivers around clump.        */
+    INFO_PEAK_RIVER,     /* Peak (min or max) river value around a clump. */
+    INFO_PEAK_CENTER,    /* Peak (min or max) clump value.                */
+
+    INFO_NCOLS,          /* Total number of columns in the `info' table.  */
+  };
+static void
+label_clump_significance_raw(gal_data_t *values_d, gal_data_t *std_d,
+                             gal_data_t *label_d, gal_data_t *indexs,
+                             struct gal_tile_two_layer_params *tl,
+                             double *info, size_t numclumps, size_t minarea,
+                             int variance)
+{
+  size_t ndim=values_d->ndim, *dsize=values_d->dsize;
+
+  double *row;
+  size_t i, *a, *af, ii, coord[3];
+  size_t nngb=gal_dimension_num_neighbors(ndim);
+  float *values=values_d->array, *std=std_d->array;
+  size_t *dinc=gal_dimension_increment(ndim, dsize);
+  int32_t lab, nlab, *ngblabs, *label=label_d->array;
+
+  /* Allocate the array to keep the neighbor labels of river pixels. */
+  ngblabs=gal_pointer_allocate(GAL_TYPE_INT32, nngb, 0, __func__, "ngblabs");
+
+  /* Go over all the pixels in this region. */
+  af=(a=indexs->array)+indexs->size;
+  do
+    if( !isnan(values[ *a ]) )
+      {
+        /* This pixel belongs to a clump. */
+        if( label[ *a ]>0 )
+          {
+            /* For easy reading. */
+            row = &info [ label[*a] * INFO_NCOLS ];
+
+            /* Get the area and flux. */
+            ++row[ INFO_INAREA ];
+            if( values[*a]>0.0f )
+              {
+                gal_dimension_index_to_coord(*a, ndim, dsize, coord);
+                row[   INFO_SFF ] += values[*a];
+                row[   INFO_X   ] += values[*a] * coord[0];
+                row[   INFO_Y   ] += values[*a] * coord[1];
+                if(ndim==3)
+                  row[ INFO_Z   ] += values[*a] * coord[2];
+              }
+
+            /* In the loop `INFO_INAREA' is just the pixel counter of this
+               clump. The pixels are sorted by flux (decreasing for
+               positive clumps and increasing for negative). So the second
+               extremum value is just the second pixel of the clump. */
+            if( row[ INFO_INAREA ]==1.0f )
+              row[ INFO_PEAK_CENTER ] = values[*a];
+          }
+
+        /* This pixel belongs to a river (has a value of zero and isn't
+           blank). */
+        else
+          {
+            /* We are on a river pixel. So the value of this pixel has to
+               be added to any of the clumps in touches. But since it might
+               touch a labeled region more than once, we use `ngblabs' to
+               keep track of which label we have already added its value
+               to. `ii` is the number of different labels this river pixel
+               has already been considered for. `ngblabs' will keep the list
+               labels. */
+            ii=0;
+            memset(ngblabs, 0, nngb*sizeof *ngblabs);
+
+            /* Look into the 8-connected neighbors (recall that a
+               connectivity of `ndim' means all pixels touching it (even on
+               one vertice). */
+            GAL_DIMENSION_NEIGHBOR_OP(*a, ndim, dsize, ndim, dinc, {
+                /* This neighbor's label. */
+                nlab=label[ nind ];
+
+                /* We only want those neighbors that are not rivers (>0) or
+                   any of the flag values. */
+                if(nlab>0)
+                  {
+                    /* Go over all already checked labels and make sure
+                       this clump hasn't already been considered. */
+                    for(i=0;i<ii;++i) if(ngblabs[i]==nlab) break;
+
+                    /* This neighbor clump hasn't been considered yet: */
+                    if(i==ii)
+                      {
+                        ngblabs[ii++] = nlab;
+                        row = &info[ nlab * INFO_NCOLS ];
+
+                        ++row[INFO_RIVAREA];
+                        if( row[INFO_RIVAREA]==1.0f )
+                          row[INFO_PEAK_RIVER] = values[*a];
+                      }
+                  }
+              } );
+          }
+      }
+  while(++a<af);
+
+  /* Based on the position of each clump, find a representative standard
+     deviation. */
+  for(lab=1; lab<=numclumps; ++lab)
+    {
+      /* To help in reading. */
+      row = &info [ lab * INFO_NCOLS ];
+
+      /* The calculations are only necessary for the clumps that satisfy
+         the minimum area. There is no need to waste time on the smaller
+         ones. */
+      if ( row[INFO_INAREA] > minarea )
+        {
+          /* It might happen that none of the pixels were positive
+             (especially over the undetected regions). In that case, set
+             the total area of the clump to zero so it is no longer
+             considered.*/
+          if( row[INFO_SFF]==0.0f )
+            row[INFO_INAREA]=0;
+          else
+            {
+              /* Find the coordinates of the clump's weighted center. */
+              coord[0]=GAL_DIMENSION_FLT_TO_INT(row[INFO_X]/row[INFO_SFF]);
+              coord[1]=GAL_DIMENSION_FLT_TO_INT(row[INFO_Y]/row[INFO_SFF]);
+              if(ndim==3)
+                coord[2]=GAL_DIMENSION_FLT_TO_INT(row[INFO_Z]/row[INFO_SFF]);
+
+              /* Find the corresponding standard deviation. */
+              row[INFO_INSTD]=( std_d->size>1
+                                ? ( std_d->size==values_d->size
+                                    ? std[gal_dimension_coord_to_index(ndim,
+                                                             dsize, coord)]
+                                    : std[gal_tile_full_id_from_coord(tl,
+                                                                    coord)] )
+                                : std[0] );
+              if(variance) row[INFO_INSTD] = sqrt(row[INFO_INSTD]);
+
+              /* For a check
+              printf("---------\n");
+              printf("\t%f --> %zu\n", row[INFO_Y]/row[INFO_SFF], coord[1]);
+              printf("\t%f --> %zu\n", row[INFO_X]/row[INFO_SFF], coord[0]);
+              printf("%u: (%zu, %zu): %.3f\n", lab, coord[1]+1,
+                     coord[0]+1, row[INFO_INSTD]);
+              */
+            }
+        }
+    }
+
+  /* Clean up. */
+  free(dinc);
+  free(ngblabs);
+}
+
+
+
+
+/* Make an S/N table for the clumps in a given region. */
+void
+gal_label_clump_significance(gal_data_t *values, gal_data_t *std,
+                             gal_data_t *label, gal_data_t *indexs,
+                             struct gal_tile_two_layer_params *tl,
+                             size_t numclumps, size_t minarea, int variance,
+                             int keepsmall, gal_data_t *sig,
+                             gal_data_t *sigind)
+{
+  double *info;
+  int max1_min0;
+  float *sigarr;
+  int32_t *indarr=NULL;
+  size_t i, ind, counter=0;
+  double C, R, S, Ni, *row;
+  size_t tablen=numclumps+1;
+
+  /* If there were no initial clumps, then ignore this function. */
+  if(numclumps==0) { sig->size=0; return; }
+
+  /* Basic sanity checks. */
+  max1_min0=label_clump_significance_sanity(values, std, label, indexs,
+                                            tl, sig, __func__);
+
+  /* Allocate the arrays to keep the final significance measure (and
+     possibly the indexs). */
+  sig->ndim  = 1;                        /* Depends on `cltprm->sn' */
+  sig->type  = GAL_TYPE_FLOAT32;
+  if(sig->dsize==NULL)
+    sig->dsize = gal_pointer_allocate(GAL_TYPE_SIZE_T, 1, 0, __func__,
+                                      "sig->dsize");
+  sig->array = gal_pointer_allocate(sig->type, tablen, 0, __func__,
+                                    "sig->array");
+  sig->size  = sig->dsize[0] = tablen;  /* MUST BE AFTER dsize. */
+  info=gal_pointer_allocate(GAL_TYPE_FLOAT64, tablen*INFO_NCOLS, 1,
+                            __func__, "info");
+  if( sigind )
+    {
+      sigind->ndim  = 1;
+      sigind->type  = GAL_TYPE_INT32;
+      sigind->dsize = gal_pointer_allocate(GAL_TYPE_SIZE_T, 1, 0, __func__,
+                                           "sigind->dsize");
+      sigind->size  = sigind->dsize[0] = tablen;/* After dsize */
+      sigind->array = gal_pointer_allocate(sigind->type, tablen, 0, __func__,
+                                           "sigind->array");
+    }
+
+
+  /* First, get the raw information necessary for making the S/N table. */
+  label_clump_significance_raw(values, std, label, indexs, tl, info,
+                               numclumps, minarea, variance);
+
+
+  /* Calculate the signal to noise ratio for successful clumps */
+  sigarr=sig->array;
+  if(sigind) indarr=sigind->array;
+  for(i=1;i<tablen;++i)
+    {
+      /* For readability. */
+      row = &info[ i * INFO_NCOLS ];
+
+      /* If we have a sufficient area and any rivers were actually found
+         for this clump, then do the measurement. */
+      Ni=row[ INFO_INAREA ];
+      if( Ni>minarea && row[ INFO_RIVAREA ])
+        {
+          /* Calculate the significance ratio, if `keepsmall' is not
+             called, we don't care about the IDs of the clumps anymore, so
+             store the Signal to noise ratios contiguously (for easy
+             sorting and etc). Note that counter will always be smaller and
+             equal to i. */
+          S   = row[ INFO_INSTD       ];
+          R   = row[ INFO_PEAK_RIVER  ];
+          C   = row[ INFO_PEAK_CENTER ];
+          ind = keepsmall ? i : counter++;
+
+          if(sigind) indarr[ind]=i;
+          sigarr[ind] = ( max1_min0 ? (C-R) : (R-C) ) / S;
+        }
+      else
+        {
+          /* Only over detections, we should put a NaN when the S/N isn't
+             calculated.  */
+          if(keepsmall)
+            {
+              sigarr[i]=NAN;
+              if(sigind) indarr[i]=i;
+            }
+        }
+    }
+
+
+  /* If we don't want to keep the small clumps, the size of the S/N table
+     has to be corrected. */
+  if(keepsmall==0)
+    {
+      sig->dsize[0] = sig->size = counter;
+      if(sigind) sigind->dsize[0] = sigind->size = counter;
+    }
+
+
+  /* Clean up. */
+  free(info);
+}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]