gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master a5bdb45: Query: new --noblank option


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master a5bdb45: Query: new --noblank option
Date: Wed, 20 Jan 2021 08:47:26 -0500 (EST)

branch: master
commit a5bdb4597a0dfc6ce73d785197b559302a5a5051
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Query: new --noblank option
    
    Until now, if we didn't want rows that have a blank value in a certain
    column, we needed to use the '--range' option which was annoying (because
    you had to set very large values and generally know the rough value range
    of the column.
    
    With this commit, Query now has a '--noblank' operator that like '--range'
    is modeled on the similarly named operator in Table. When '--noblank' is
    given, it will use the 'XXXXX IS NOT NULL' structure in ADQL to achieve its
    goal and let the user simply ask for non-blank rows in a certain column
    without worrying about the value range.
---
 bin/query/args.h  |  27 +++++++++++++
 bin/query/main.h  |   1 +
 bin/query/tap.c   | 113 +++++++++++++++++++++++++++++++++++++++---------------
 bin/query/ui.c    |   5 +++
 bin/query/ui.h    |   3 +-
 doc/gnuastro.texi |   6 +++
 6 files changed, 123 insertions(+), 32 deletions(-)

diff --git a/bin/query/args.h b/bin/query/args.h
index d647e43..60b2474 100644
--- a/bin/query/args.h
+++ b/bin/query/args.h
@@ -203,6 +203,33 @@ struct argp_option program_options[] =
       gal_options_parse_name_and_float64s
     },
     {
+      "range",
+      UI_KEY_RANGE,
+      "STR,FLT:FLT",
+      0,
+      "Range of selected targets in given column.",
+      UI_GROUP_BYCENTER,
+      &p->range,
+      GAL_TYPE_STRING,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET,
+      gal_options_parse_name_and_float64s
+    },
+    {
+      "noblank",
+      UI_KEY_NOBLANK,
+      "STR[,STR]",
+      0,
+      "Remove rows with blank in given columns.",
+      GAL_OPTIONS_GROUP_INPUT,
+      &p->noblank,
+      GAL_TYPE_STRLL,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET
+    },
+    {
       "column",
       UI_KEY_COLUMN,
       "STR",
diff --git a/bin/query/main.h b/bin/query/main.h
index 66b777e..fffd53e 100644
--- a/bin/query/main.h
+++ b/bin/query/main.h
@@ -55,6 +55,7 @@ struct queryparams
   gal_data_t           *center;  /* Center position of query.          */
   gal_data_t           *radius;  /* Radius around center.              */
   gal_data_t            *range;  /* Range of magnitudes to query.      */
+  gal_list_str_t      *noblank;  /* Return rows that aren't blank.     */
   gal_data_t            *width;  /* Width of box around center.        */
   char                  *query;  /* Raw query string.                  */
   gal_list_str_t      *columns;  /* Columns to extract from database.  */
diff --git a/bin/query/tap.c b/bin/query/tap.c
index 49e29c5..8bdbf91 100644
--- a/bin/query/tap.c
+++ b/bin/query/tap.c
@@ -218,15 +218,86 @@ tap_query_construct_spatial(struct queryparams *p)
 
 
 
+static void
+tap_query_construct_noblank(struct queryparams *p, char **outstr)
+{
+  gal_list_str_t *tmp;
+  char *noblankstr=NULL, *prevstr=*outstr;
+
+  for(tmp=p->noblank; tmp!=NULL; tmp=tmp->next)
+    {
+      /* Write 'rangestr'. */
+      if(prevstr)
+        {
+          if( asprintf(&noblankstr, "%s AND %s IS NOT NULL",
+                       prevstr, tmp->v) < 0 )
+            error(EXIT_FAILURE, 0, "%s: asprintf allocation ('rangestr')",
+                  __func__);
+          free(prevstr);
+        }
+      else
+        if( asprintf(&noblankstr, "%s IS NOT NULL", tmp->v) < 0 )
+          error(EXIT_FAILURE, 0, "%s: asprintf allocation ('rangestr')",
+                __func__);
+
+      /* Put the 'rangestr' in previous-range string for the next
+         round.*/
+      prevstr=noblankstr;
+    }
+
+  /* Set the final output pointer. */
+  *outstr=noblankstr;
+}
+
+
+
+
+
+static void
+tap_query_construct_range(struct queryparams *p, char **outstr)
+{
+  double *darray;
+  gal_data_t *tmp;
+  char *rangestr=NULL, *prevstr=*outstr;
+
+  for(tmp=p->range; tmp!=NULL; tmp=tmp->next)
+    {
+      /* Write 'rangestr'. */
+      darray=tmp->array;
+      if(prevstr)
+        {
+          if( asprintf(&rangestr, "%s AND %s>=%g AND %s<=%g", prevstr,
+                       tmp->name, darray[0], tmp->name, darray[1]) < 0 )
+            error(EXIT_FAILURE, 0, "%s: asprintf allocation ('rangestr')",
+                  __func__);
+          free(prevstr);
+        }
+      else
+        if( asprintf(&rangestr, "%s>=%g AND %s<=%g",
+                     tmp->name, darray[0], tmp->name, darray[1]) < 0 )
+          error(EXIT_FAILURE, 0, "%s: asprintf allocation ('rangestr')",
+                __func__);
+
+      /* Put the 'rangestr' in previous-range string for the next
+         round.*/
+      prevstr=rangestr;
+    }
+
+  /* Set the final output pointer. */
+  *outstr=rangestr;
+}
+
+
+
+
+
 /* Construct the query for data download. */
 static char *
 tap_query_construct_data(struct queryparams *p)
 {
-  double *darray;
-  gal_data_t *tmp;
+  char *datasetstr, *valuelimitstr=NULL;
   char *headstr=NULL, allcols[]="*";
   char *querystr, *columns, *spatialstr=NULL;
-  char *datasetstr, *rangestr=NULL, *prevrange;
 
   /* If the dataset has special characters (like a slash) it needs to
      be quoted. */
@@ -246,31 +317,11 @@ tap_query_construct_data(struct queryparams *p)
   if(p->overlapwith || p->center)
     spatialstr=tap_query_construct_spatial(p);
 
-  /* Set the range criteria on the requested columns. */
-  prevrange=NULL;
-  if(p->range)
-    for(tmp=p->range; tmp!=NULL; tmp=tmp->next)
-      {
-        /* Write 'rangestr'. */
-        darray=tmp->array;
-        if(prevrange)
-          {
-            if( asprintf(&rangestr, "%s AND %s>=%g AND %s<=%g", prevrange,
-                         tmp->name, darray[0], tmp->name, darray[1]) < 0 )
-              error(EXIT_FAILURE, 0, "%s: asprintf allocation ('rangestr')",
-                    __func__);
-            free(prevrange);
-          }
-        else
-          if( asprintf(&rangestr, "%s>=%g AND %s<=%g",
-                       tmp->name, darray[0], tmp->name, darray[1]) < 0 )
-            error(EXIT_FAILURE, 0, "%s: asprintf allocation ('rangestr')",
-                  __func__);
-
-        /* Put the 'rangestr' in previous-range string for the next
-           round.*/
-        prevrange=rangestr;
-      }
+  /* Build the 'noblank' and 'range' criteria. No blank goes first because
+     it is easier to check (for the server), thus the more time-consuming
+     range check can be done on fewer rows. */
+  if(p->noblank) tap_query_construct_noblank(p, &valuelimitstr);
+  if(p->range) tap_query_construct_range(p, &valuelimitstr);
 
   /* Write the automatically generated query string.  */
   if( asprintf(&querystr,  "'SELECT %s %s FROM %s %s %s %s'",
@@ -278,10 +329,10 @@ tap_query_construct_data(struct queryparams *p)
                columns,
                datasetstr,
                spatialstr ? spatialstr : "",
-               ( rangestr && spatialstr
+               ( valuelimitstr && spatialstr
                  ? "AND"
-                 : rangestr ? "WHERE" : "" ),
-               rangestr ? rangestr : "")<0 )
+                 : valuelimitstr ? "WHERE" : "" ),
+               valuelimitstr ? valuelimitstr : "")<0 )
     error(EXIT_FAILURE, 0, "%s: asprintf allocation ('querystr')",
           __func__);
 
diff --git a/bin/query/ui.c b/bin/query/ui.c
index b57d0a5..852a91b 100644
--- a/bin/query/ui.c
+++ b/bin/query/ui.c
@@ -309,6 +309,11 @@ ui_read_check_only_options(struct queryparams *p)
       p->dec_name=p->ccol->next->v;
     }
 
+  /* If '--noblank' is given (possibly multiple times, each with multiple
+     column names) break it up into individual names. */
+  if(p->noblank)
+    gal_options_merge_list_of_csv(&p->noblank);
+
   /* Make sure that '--query' and '--center' are not called together. */
   if(p->query && (p->center || p->overlapwith) )
     error(EXIT_FAILURE, 0, "the '--query' option cannot be called together "
diff --git a/bin/query/ui.h b/bin/query/ui.h
index 0a71ea4..c317ea5 100644
--- a/bin/query/ui.h
+++ b/bin/query/ui.h
@@ -42,7 +42,7 @@ enum program_args_groups
 
 /* Available letters for short options:
 
-   a b e f j m n p t u x y z
+   a e f j m n p t u x y z
    A B E G J R W X Y
 */
 enum option_keys_enum
@@ -58,6 +58,7 @@ enum option_keys_enum
   UI_KEY_OVERLAPWITH     = 'v',
   UI_KEY_RADIUS          = 'r',
   UI_KEY_RANGE           = 'g',
+  UI_KEY_NOBLANK         = 'b',
   UI_KEY_COLUMN          = 'c',
   UI_KEY_WIDTH           = 'w',
   UI_KEY_HEAD            = 'H',
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 9f4a3a0..e1d22e7 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -10708,6 +10708,12 @@ For example, if you want objects with SDSS 
spectroscopic redshifts larger than 2
 
 If you want the interval to not be inclusive on both sides, you can run 
@code{astquery} once and get the command that it executes.
 Then you can edit it to be non-inclusive on your desired side.
+
+@item -b STR[,STR]
+@item --noblank=STR[,STR]
+Only ask for rows that don't have a blank value in the @code{STR} column.
+This option can be called many times, and each call can have multiple column 
names (separated by a comma or @key{,}).
+For example if you want the retrieved rows to not have a blank value in 
columns @code{A}, @code{B}, @code{C} and @code{D}, you can use 
@command{--noblank=A -bB,C,D}.
 @end table
 
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]