gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master c49b8b1: Query: new --keeprawdownload option


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master c49b8b1: Query: new --keeprawdownload option
Date: Tue, 12 Jan 2021 19:25:19 -0500 (EST)

branch: master
commit c49b8b1e9675cf90e8189d90d2d9c7c47f199f3a
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Query: new --keeprawdownload option
    
    Until now, the Query program would read the raw downloaded table, read it
    into memory and write that into the output file with CFITSIO. Giving all
    output tables the same format, irrespective of which database they are
    downloaded from and also un-compressing the data in case the server gives a
    compressed FITS file, like bug #59469).
    
    In the end (after internally processing the raw downloaded file), query
    would delete the raw downloaded file. However, the raw downloaded file can
    possibly have important metadata that the user may need (for example the
    version of the database version and etc).
    
    With this commit, the new '--keeprawdownload' option has been added to
    Query for this purpose: when given, the raw downloaded file will not be
    deleted and it will remain in the same directory as the output.
---
 bin/query/args.h  | 19 +++++++++++++++++++
 bin/query/gaia.c  |  1 -
 bin/query/main.h  |  2 +-
 bin/query/query.c | 23 ++++++++++-------------
 bin/query/ui.c    | 24 +++++++++++++++---------
 bin/query/ui.h    |  3 ++-
 doc/gnuastro.texi | 28 +++++++++++++++++++++-------
 7 files changed, 68 insertions(+), 32 deletions(-)

diff --git a/bin/query/args.h b/bin/query/args.h
index ce22d50..632d7e7 100644
--- a/bin/query/args.h
+++ b/bin/query/args.h
@@ -31,6 +31,25 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 /* Array of acceptable options. */
 struct argp_option program_options[] =
   {
+    /* Output related options. */
+    {
+      "keeprawdownload",
+      UI_KEY_KEEPRAWDOWNLOAD,
+      0,
+      0,
+      "Don't delete raw downloaded file.",
+      GAL_OPTIONS_GROUP_OUTPUT,
+      &p->keeprawdownload,
+      GAL_OPTIONS_NO_ARG_TYPE,
+      GAL_OPTIONS_RANGE_0_OR_1,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET
+    },
+
+
+
+
+
     /* Database and dataset. */
     {
       "database",
diff --git a/bin/query/gaia.c b/bin/query/gaia.c
index 10427dc..7031df2 100644
--- a/bin/query/gaia.c
+++ b/bin/query/gaia.c
@@ -111,7 +111,6 @@ gaia_query(struct queryparams *p)
   /* Make sure everything is fine. */
   gaia_sanitycheck(p);
 
-
   /* If the raw query has been given, use it. */
   if(p->query)
     querystr=p->query;
diff --git a/bin/query/main.h b/bin/query/main.h
index 0de0511..c18d9df 100644
--- a/bin/query/main.h
+++ b/bin/query/main.h
@@ -44,6 +44,7 @@ struct queryparams
 {
   /* From command-line */
   struct gal_options_common_params cp; /* Common parameters.           */
+  uint8_t      keeprawdownload;  /* Keep raw downloaded file.          */
   int                 database;  /* ID of database to use.             */
   char             *datasetstr;  /* ID of dataset in database to use.  */
   char            *overlapwith;  /* Image to use instead of center.    */
@@ -57,7 +58,6 @@ struct queryparams
   /* Internal variables. */
   char            *databasestr;  /* Name of input database.            */
   char           *downloadname;  /* Temporary output name.             */
-  char          *processedname;  /* Temporary output name.             */
   size_t       outtableinfo[2];  /* To print in output.                */
 
   /* Output: */
diff --git a/bin/query/query.c b/bin/query/query.c
index 54aed11..31f7cbe 100644
--- a/bin/query/query.c
+++ b/bin/query/query.c
@@ -62,21 +62,16 @@ query_check_download(struct queryparams *p)
                            GAL_TABLE_SEARCH_NAME, 1, p->cp.minmapsize,
                            p->cp.quietmmap, NULL);
       gal_table_write(table, NULL, NULL, p->cp.tableformat,
-                      p->cp.output ? p->cp.output : p->processedname,
+                      p->cp.output ? p->cp.output : p->cp.output,
                       "QUERY", 0);
 
-      /* Delete the raw downloaded file. */
-      remove(p->downloadname);
-      free(p->downloadname);
-
-      /* If no output name was specified, use the 'processedname'. */
-      if(p->cp.output==NULL)
-        p->cp.output=p->processedname;
-
       /* Get basic information about the table and free it. */
-      p->outtableinfo[0]=gal_list_data_number(table);
-      p->outtableinfo[1]=table->size;
+      p->outtableinfo[0]=table->size;
+      p->outtableinfo[1]=gal_list_data_number(table);
       gal_list_data_free(table);
+
+      /* Delete the raw downloaded file if necessary. */
+      if(p->keeprawdownload==0) remove(p->downloadname);
     }
   else
     {
@@ -125,8 +120,10 @@ query(struct queryparams *p)
   /* Let the user know that things went well. */
   if(p->cp.quiet==0)
     {
-      printf("Query resulted in %zu columns and %zu rows.\n",
+      printf("\nQuery resulted in %zu rows and %zu columns.\n",
              p->outtableinfo[0], p->outtableinfo[1]);
-      printf("Query output written to: %s\n", p->cp.output);
+      if(p->keeprawdownload)
+        printf("Query's raw downloaded file: %s\n", p->downloadname);
+      printf("Query's final output: %s\n", p->cp.output);
     }
 }
diff --git a/bin/query/ui.c b/bin/query/ui.c
index bebb73a..15d541b 100644
--- a/bin/query/ui.c
+++ b/bin/query/ui.c
@@ -334,16 +334,21 @@ ui_read_check_only_options(struct queryparams *p)
   gal_checkset_writable_remove(p->cp.output, p->cp.keep,
                                p->cp.dontdelete);
 
-  /* Set the name for the downloaded and processed files. These are due to
-     an internal low-level processing that will be done on the raw
+  /* Set the name for the downloaded and final output name. These are due
+     to an internal low-level processing that will be done on the raw
      downloaded file. */
-  basename=gal_checkset_malloc_cat(p->databasestr, ".fits");
-  p->processedname=gal_checkset_make_unique_suffix(p->cp.output
-                                                   ? p->cp.output
-                                                   : basename,
-                                                   ".fits");
-  p->downloadname=gal_checkset_make_unique_suffix(p->processedname, NULL);
-  free(basename);
+  if(p->cp.output==NULL)
+    {
+      basename=gal_checkset_malloc_cat(p->databasestr, ".fits");
+      p->cp.output=gal_checkset_make_unique_suffix(basename, ".fits");
+      free(basename);
+    }
+
+  /* Make sure the output name doesn't exist (and report an error if
+     '--dontdelete' is called. */
+  gal_checkset_writable_remove(p->cp.output, 0, p->cp.dontdelete);
+  p->downloadname=gal_checkset_automatic_output(&p->cp, p->cp.output,
+                                                "-raw-download.fits");
 }
 
 
@@ -493,4 +498,5 @@ ui_free_report(struct queryparams *p, struct timeval *t1)
   /* Free the allocated arrays: */
   free(p->cp.hdu);
   free(p->cp.output);
+  free(p->downloadname);
 }
diff --git a/bin/query/ui.h b/bin/query/ui.h
index 83f0e07..248bba0 100644
--- a/bin/query/ui.h
+++ b/bin/query/ui.h
@@ -42,12 +42,13 @@ enum program_args_groups
 
 /* Available letters for short options:
 
-   a b e f i j k m n p t u x y z
+   a b e f i j m n p t u x y z
    A B E G H J L R W X Y
 */
 enum option_keys_enum
 {
   /* With short-option version. */
+  UI_KEY_KEEPRAWDOWNLOAD = 'k',
   UI_KEY_DATABASE        = 'd',
   UI_KEY_QUERY           = 'Q',
   UI_KEY_DATASET         = 's',
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 9efe5cb..6f302be 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -10346,13 +10346,6 @@ $ astquery --database=gaia --overlapwith=image.fits
 $ astquery --database=gaia --query="XXXX YYYY" --output=my-gaia.fits
 @end example
 
-The name of the downloaded output file can be set with @option{--output}.
-The requested output format can any of the @ref{Recognized table formats} 
(currently @file{.txt} or @file{.fits}).
-Like all Gnuastro programs, if the output is a FITS file, the zero-th/first 
HDU of the output will contain all the command-line options given to Query.
-If @option{--output} is not set, the output name will be in the format of 
@file{NAME-STRING.fits}, where @file{NAME} is the name of the database (same 
value given to @option{--database}), and @file{STRING} is a randomly selected 
6-character set of numbers and alphabetic characters.
-With this feature, a second run of @command{astquery} that isn't called with 
@option{--output} will not over-write an already downloaded one.
-Generally, when calling Query more than once, it is recommended to set an 
output name for each call based on your project's context.
-
 Query doesn't take any input argument, because the main goal is to retreive 
data from external sources.
 The main input to Query is the @option{--database} option which specifies 
which database should be contacted for submitting the query.
 There are two methods to query the database, each is more fully discussed in 
its option's description below.
@@ -10368,12 +10361,33 @@ So @option{--query} is more powerful, however, in 
this mode, you don't need any
 When query is run, before contacting the server, it will print the full 
command that it executes which contains the raw server query that is 
constructed.
 @end itemize
 
+The name of the downloaded output file can be set with @option{--output}.
+The requested output format can any of the @ref{Recognized table formats} 
(currently @file{.txt} or @file{.fits}).
+Like all Gnuastro programs, if the output is a FITS file, the zero-th/first 
HDU of the output will contain all the command-line options given to Query.
+If @option{--output} is not set, the output name will be in the format of 
@file{NAME-STRING.fits}, where @file{NAME} is the name of the database (same 
value given to @option{--database}), and @file{STRING} is a randomly selected 
6-character set of numbers and alphabetic characters.
+With this feature, a second run of @command{astquery} that isn't called with 
@option{--output} will not over-write an already downloaded one.
+Generally, when calling Query more than once, it is recommended to set an 
output name for each call based on your project's context.
+
+The outputs of Query will have a common output format, irrespective of the 
used database.
+To achieve this, Query will ask the databases to provide a FITS table output 
(for larger tables, FITS can consume much less download volume).
+After downloading is complete, the raw downloaded file will be read into 
memory once by Query, and written into the file given to @option{--output}.
+The raw downloaded file will be deleted by default, but can be preserved with 
the @option{--keeprawdownload} option.
+This strategy avoids unnecessary surprises depending on database.
+For example some databases can download a compressed FITS table, even though 
we ask for FITS.
+But with the strategy above, the final output will be an uncompressed FITS 
file if requested even be in plain text (@ref{Gnuastro text table format}).
+
 @strong{Under development, request for feedback:} Query is a new member of the 
Gnuastro family of programs.
 It currently requires that the @command{curl} executable (for the cURL 
downloading program) to be present on the host and the number of databases it 
supports is still limited, see the list under the @option{--database} option 
below.
 More downloader tools, and databases will be added in the near future as it is 
used more often, so please don't hesitate to suggest any that you may need.
 
 @table @option
 
+@item -k
+@itemx --keeprawdownload
+Don't delete the raw downloaded file from the database.
+The name of the raw download will have a @file{OUTPUT-raw-download.fits} 
format.
+Where @file{OUTPUT} is either the base-name of the final output file (without 
a suffix).
+
 @item -d STR
 @itemx --database=STR
 Identifer for the database for sending the query.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]