[bug #59900] Add truncated normal distribution

bug-gsl
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[bug #59900] Add truncated normal distribution

From:	Patrick Alken
Subject:	[bug #59900] Add truncated normal distribution
Date:	Sun, 17 Jan 2021 17:26:27 -0500 (EST)
User-agent:	Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0
URL:
  <https://savannah.gnu.org/bugs/?59900>

                 Summary: Add truncated normal distribution
                 Project: GNU Scientific Library
            Submitted by: psa
            Submitted on: Sun 17 Jan 2021 10:26:25 PM UTC
                Category: None
                Severity: 3 - Normal
        Operating System: 
                  Status: None
             Assigned to: None
             Open/Closed: Open
                 Release: 
         Discussion Lock: Any

    _______________________________________________________

Details:

from msunet =at= shellblade =dot= net

Please find attached an implementation of a truncated normal distribution
based on John Burkardt's presentation linked below.

Please note that I am neither a mathematician nor an expert in numerical
computing, so any feedback is appreciated. This is also my first contribution
to GSL (and a GNU project in general), so please excuse any inefficiencies.

Thank you,

Marc



Adds a truncated normal distribution as described in John Burkardt's
"The Truncated Normal Distribution":

https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
---
 cdf/gauss.c           | 22 ++++++++++++++++
 cdf/gaussinv.c        | 20 ++++++++++++++
 cdf/gsl_cdf.h         |  6 +++++
 cdf/test.c            | 61 +++++++++++++++++++++++++++++++++++++++++++
 filter/Makefile.am    |  2 +-
 movstat/Makefile.am   |  2 +-
 randist/gauss.c       | 19 ++++++++++++++
 randist/gsl_randist.h |  3 +++
 randist/test.c        | 20 ++++++++++++++
 rstat/Makefile.am     |  2 +-
 10 files changed, 154 insertions(+), 3 deletions(-)

diff --git a/cdf/gauss.c b/cdf/gauss.c
index cdc8b0a3..110e58f1 100644
--- a/cdf/gauss.c
+++ b/cdf/gauss.c
@@ -349,3 +349,25 @@ gsl_cdf_gaussian_Q (const double x, const double sigma)
 {
   return gsl_cdf_ugaussian_Q (x / sigma);
 }
+
+double
+gsl_cdf_tgaussian_P (const double x, const double a, const double b, const
double sigma)
+{
+  if (x <= a)
+    {
+      return 0.0;
+    }
+  if (x >= b)
+    {
+      return 1.0;
+    }
+  const double n = gsl_cdf_gaussian_P (x, sigma) - gsl_cdf_gaussian_P (a,
sigma);
+  const double d = gsl_cdf_gaussian_P (b, sigma) - gsl_cdf_gaussian_P (a,
sigma);
+  return n / d;
+}
+
+double
+gsl_cdf_tgaussian_Q (const double x, const double a, const double b, const
double sigma)
+{
+  return 1.0 - gsl_cdf_tgaussian_P (x, a, b, sigma);
+}
diff --git a/cdf/gaussinv.c b/cdf/gaussinv.c
index dcbdca1d..28c9866c 100644
--- a/cdf/gaussinv.c
+++ b/cdf/gaussinv.c
@@ -200,3 +200,23 @@ gsl_cdf_gaussian_Qinv (const double Q, const double
sigma)
 {
   return sigma * gsl_cdf_ugaussian_Qinv (Q);
 }
+
+double
+gsl_cdf_tgaussian_Pinv (const double P, const double a, const double b, const
double sigma)
+{
+  const double k
+    = gsl_cdf_gaussian_P (a, sigma)
+    + P * (gsl_cdf_gaussian_P (b, sigma) - gsl_cdf_gaussian_P (a, sigma));
+
+  return gsl_cdf_gaussian_Pinv (k, sigma);
+}
+
+double
+gsl_cdf_tgaussian_Qinv (const double Q, const double a, const double b, const
double sigma)
+{
+  const double k
+    = gsl_cdf_gaussian_P (b, sigma)
+    - Q * (gsl_cdf_gaussian_P (b, sigma) - gsl_cdf_gaussian_P (a, sigma));
+
+  return gsl_cdf_gaussian_Pinv (k, sigma);
+}
diff --git a/cdf/gsl_cdf.h b/cdf/gsl_cdf.h
index 2bc3fed5..61379a82 100644
--- a/cdf/gsl_cdf.h
+++ b/cdf/gsl_cdf.h
@@ -46,6 +46,12 @@ double gsl_cdf_gaussian_Q (const double x, const double
sigma);
 double gsl_cdf_gaussian_Pinv (const double P, const double sigma);
 double gsl_cdf_gaussian_Qinv (const double Q, const double sigma);
 
+double gsl_cdf_tgaussian_P (const double x, const double a, const double b,
const double sigma);
+double gsl_cdf_tgaussian_Q (const double x, const double a, const double b,
const double sigma);
+
+double gsl_cdf_tgaussian_Pinv (const double P, const double a, const double
b, const double sigma);
+double gsl_cdf_tgaussian_Qinv (const double Q, const double a, const double
b, const double sigma);
+
 double gsl_cdf_gamma_P (const double x, const double a, const double b);
 double gsl_cdf_gamma_Q (const double x, const double a, const double b);
 
diff --git a/cdf/test.c b/cdf/test.c
index 1b4cf777..65eeaa6e 100644
--- a/cdf/test.c
+++ b/cdf/test.c
@@ -39,6 +39,8 @@
 
 void test_ugaussian (void);
 void test_ugaussianinv (void);
+void test_tgaussian (void);
+void test_tgaussianinv (void);
 void test_exponential (void);
 void test_exponentialinv (void);
 void test_exppow (void);
@@ -398,6 +400,7 @@ main (void)
      Function values computed with PARI, 28 digits precision */
   
   test_ugaussian ();
+  test_tgaussian ();
   test_exponential ();
   test_exppow ();
   test_tdist (); 
@@ -407,6 +410,7 @@ main (void)
   test_beta (); 
 
   test_ugaussianinv ();
+  test_tgaussianinv ();
   test_exponentialinv ();
   test_gammainv (); 
   test_chisqinv (); 
@@ -535,6 +539,63 @@ void test_ugaussianinv (void) {
 }
 
 
+  /* Test values from John Burkardt, The Truncated Normal Distribution
+     Page 23. */
+
+void test_tgaussian (void)
+{
+  TEST (gsl_cdf_tgaussian_P, (-18.37, -50.0, 50.0, 25.0),
0.218418626765711416, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_P, (37.962, -50.0, 50.0, 25.0),
0.956315769095676504, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_P, (22.367, -50.0, 50.0, 25.0),
0.82951388212098065, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_P, (3.704, -50.0, 50.0, 25.0),
0.561699074822187949, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_P, (-5.1, -50.0, 50.0, 25.0), 0.415323967636885893,
TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_P, (-34.1674, -50.0, 50.0, 25.0),
0.0661185873044326383, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_P, (-15.4257, -50.0, 50.0, 25.0),
0.257577857199668192, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_P, (-28.4328, -50.0, 50.0, 25.0),
0.109956874884255498, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_P, (-37.9346, -50.0, 50.0, 25.0),
0.0438289787355652244, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_P, (8.155, -50.0, 50.0, 25.0),
0.633958632430714042, TEST_TOL0);
+
+  TEST (gsl_cdf_tgaussian_Q, (-18.37, -50.0, 50.0, 25.0), 0.7815813732342887,
TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Q, (37.962, -50.0, 50.0, 25.0),
0.043684230904323385, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Q, (22.367, -50.0, 50.0, 25.0),
0.17048611787901935, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Q, (3.704, -50.0, 50.0, 25.0), 0.43830092517781205,
TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Q, (-5.1, -50.0, 50.0, 25.0), 0.584676032363114162,
TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Q, (-34.1674, -50.0, 50.0, 25.0),
0.9338814126955673, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Q, (-15.4257, -50.0, 50.0, 25.0),
0.7424221428003318, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Q, (-28.4328, -50.0, 50.0, 25.0),
0.8900431251157445, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Q, (-37.9346, -50.0, 50.0, 25.0),
0.9561710212644348, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Q, (8.155, -50.0, 50.0, 25.0), 0.36604136756928596,
TEST_TOL0);
+}
+
+  /* Test values from John Burkardt, The Truncated Normal Distribution
+     Page 24. */
+
+void test_tgaussianinv (void)
+{
+  TEST (gsl_cdf_tgaussian_Pinv, (0.218418626765711305, -50.0, 50.0, 25.0),
-18.37, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Pinv, (0.956315769095676504, -50.0, 50.0, 25.0),
37.961999999999982, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Pinv, (0.82951388212098065, -50.0, 50.0, 25.0),
22.367, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Pinv, (0.561699074822187949, -50.0, 50.0, 25.0),
3.7039999999999913, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Pinv, (0.415307593603450431, -50.0, 50.0, 25.0),
-5.1009999999999982, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Pinv, (0.0661185873044326383, -50.0, 50.0, 25.0),
-34.1674, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Pinv, (0.257577857199668192, -50.0, 50.0, 25.0),
-15.4257, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Pinv, (0.109956874884255498, -50.0, 50.0, 25.0),
-28.4328, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Pinv, (0.0438289787355652244, -50.0, 50.0, 25.0),
-37.9346, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Pinv, (0.633958632430714042, -50.0, 50.0, 25.0),
8.155, TEST_TOL0);
+
+  TEST (gsl_cdf_tgaussian_Qinv, (0.7815813732342887, -50.0, 50.0, 25.0),
-18.37, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Qinv, (0.043684230904323496, -50.0, 50.0, 25.0),
37.961999999999982, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Qinv, (0.17048611787901935, -50.0, 50.0, 25.0),
22.3669999999999902, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Qinv, (0.43830092517781205, -50.0, 50.0, 25.0),
3.7039999999999913, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Qinv, (0.5846924063965495, -50.0, 50.0, 25.0),
-5.10099999999999554, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Qinv, (0.9338814126955673, -50.0, 50.0, 25.0),
-34.1674, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Qinv, (0.7424221428003318, -50.0, 50.0, 25.0),
-15.4257000000000151, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Qinv, (0.8900431251157445, -50.0, 50.0, 25.0),
-28.4328, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Qinv, (0.9561710212644348, -50.0, 50.0, 25.0),
-37.9345999999999819, TEST_TOL0);
+  TEST (gsl_cdf_tgaussian_Qinv, (0.36604136756928596, -50.0, 50.0, 25.0),
8.155, TEST_TOL0);
+}
+
+
   /* Tests for exponential cumulative distribution function
      Function values computed with PARI, 28 digits precision */
 
diff --git a/filter/Makefile.am b/filter/Makefile.am
index 03684916..fa426c5d 100644
--- a/filter/Makefile.am
+++ b/filter/Makefile.am
@@ -12,4 +12,4 @@ check_PROGRAMS = test
 TESTS = $(check_PROGRAMS)
 
 test_SOURCES = test.c
-test_LDADD = libgslfilter.la ../movstat/libgslmovstat.la
../statistics/libgslstatistics.la ../sort/libgslsort.la
../ieee-utils/libgslieeeutils.la ../randist/libgslrandist.la
../rng/libgslrng.la ../specfunc/libgslspecfunc.la ../complex/libgslcomplex.la
../err/libgslerr.la ../test/libgsltest.la ../vector/libgslvector.la
../blas/libgslblas.la ../cblas/libgslcblas.la ../block/libgslblock.la
../poly/libgslpoly.la ../sys/libgslsys.la ../utils/libutils.la
+test_LDADD = libgslfilter.la ../movstat/libgslmovstat.la
../statistics/libgslstatistics.la ../sort/libgslsort.la
../ieee-utils/libgslieeeutils.la ../randist/libgslrandist.la
../rng/libgslrng.la ../specfunc/libgslspecfunc.la ../complex/libgslcomplex.la
../err/libgslerr.la ../test/libgsltest.la ../vector/libgslvector.la
../blas/libgslblas.la ../cblas/libgslcblas.la ../block/libgslblock.la
../poly/libgslpoly.la ../cdf/libgslcdf.la ../sys/libgslsys.la
../utils/libutils.la
diff --git a/movstat/Makefile.am b/movstat/Makefile.am
index 851b241b..3abd7ead 100644
--- a/movstat/Makefile.am
+++ b/movstat/Makefile.am
@@ -33,4 +33,4 @@ check_PROGRAMS = test
 TESTS = $(check_PROGRAMS)
 
 test_SOURCES = test.c
-test_LDADD = libgslmovstat.la ../statistics/libgslstatistics.la
../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la
../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la
../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la
../vector/libgslvector.la ../blas/libgslblas.la ../cblas/libgslcblas.la
../block/libgslblock.la ../sys/libgslsys.la ../utils/libutils.la
+test_LDADD = libgslmovstat.la ../statistics/libgslstatistics.la
../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la
../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la
../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la
../vector/libgslvector.la ../blas/libgslblas.la ../cblas/libgslcblas.la
../block/libgslblock.la ../cdf/libgslcdf.la ../sys/libgslsys.la
../utils/libutils.la
diff --git a/randist/gauss.c b/randist/gauss.c
index 1b3442ea..1a072186 100644
--- a/randist/gauss.c
+++ b/randist/gauss.c
@@ -20,6 +20,7 @@
 
 #include <config.h>
 #include <math.h>
+#include <gsl/gsl_cdf.h>
 #include <gsl/gsl_math.h>
 #include <gsl/gsl_rng.h>
 #include <gsl/gsl_randist.h>
@@ -141,3 +142,21 @@ gsl_ran_ugaussian_pdf (const double x)
   return gsl_ran_gaussian_pdf (x, 1.0);
 }
 
+double
+gsl_ran_tgaussian (const gsl_rng * r, const double a, const double b, const
double sigma)
+{
+  const double p = gsl_rng_uniform_pos (r);
+  return gsl_cdf_tgaussian_Pinv (p, a, b, sigma);
+}
+
+double
+gsl_ran_tgaussian_pdf (const double x, const double a, const double b, const
double sigma)
+{
+  if (x <= a || x >= b)
+    {
+      return 0.0;
+    }
+  const double n = gsl_ran_gaussian_pdf (x, sigma);
+  const double d = gsl_cdf_gaussian_P (b, sigma) - gsl_cdf_gaussian_P (a,
sigma);
+  return n / d;
+}
diff --git a/randist/gsl_randist.h b/randist/gsl_randist.h
index d38ccb36..69027212 100644
--- a/randist/gsl_randist.h
+++ b/randist/gsl_randist.h
@@ -92,6 +92,9 @@ double gsl_ran_gaussian_tail_pdf (const double x, const
double a, const double s
 double gsl_ran_ugaussian_tail (const gsl_rng * r, const double a);
 double gsl_ran_ugaussian_tail_pdf (const double x, const double a);
 
+double gsl_ran_tgaussian (const gsl_rng * r, const double a, const double b,
const double sigma);
+double gsl_ran_tgaussian_pdf (const double x, const double a, const double b,
const double sigma);
+
 void gsl_ran_bivariate_gaussian (const gsl_rng * r, double sigma_x, double
sigma_y, double rho, double *x, double *y);
 double gsl_ran_bivariate_gaussian_pdf (const double x, const double y, const
double sigma_x, const double sigma_y, const double rho);
 
diff --git a/randist/test.c b/randist/test.c
index c841f8b9..1b644259 100644
--- a/randist/test.c
+++ b/randist/test.c
@@ -156,6 +156,8 @@ double test_ugaussian_ratio_method (void);
 double test_ugaussian_ratio_method_pdf (double x);
 double test_ugaussian_tail (void);
 double test_ugaussian_tail_pdf (double x);
+double test_tgaussian (void);
+double test_tgaussian_pdf (double x);
 double test_bivariate_gaussian1 (void);
 double test_bivariate_gaussian1_pdf (double x);
 double test_bivariate_gaussian2 (void);
@@ -282,6 +284,11 @@ main (void)
   testMoments (FUNC (ugaussian), -1.0, 1.0, 0.6826895);
   testMoments (FUNC (ugaussian), 3.0, 3.5, 0.0011172689);
   testMoments (FUNC (ugaussian_tail), 3.0, 3.5, 0.0011172689 /
0.0013498981);
+  testMoments (FUNC (tgaussian), -3.0, 3.0, 1.0);
+  testMoments (FUNC (tgaussian), -9.0, -3.1, 0.0);
+  testMoments (FUNC (tgaussian), 3.1, 9.0, 0.0);
+  testMoments (FUNC (tgaussian), 0.0, 3.0, 0.5);
+  testMoments (FUNC (tgaussian), -3.0, 0.0, 0.5);
   testMoments (FUNC (exponential), 0.0, 1.0, 1 - exp (-0.5));
   testMoments (FUNC (cauchy), 0.0, 10000.0, 0.5);
 
@@ -339,6 +346,7 @@ main (void)
   testPDF (FUNC2 (gaussian_tail1));
   testPDF (FUNC2 (gaussian_tail2));
   testPDF (FUNC2 (ugaussian_tail));
+  testPDF (FUNC2 (tgaussian));
 
   testPDF (FUNC2 (bivariate_gaussian1));
   testPDF (FUNC2 (bivariate_gaussian2));
@@ -1626,6 +1634,18 @@ test_ugaussian_tail_pdf (double x)
   return gsl_ran_ugaussian_tail_pdf (x, 3.0);
 }
 
+double
+test_tgaussian (void)
+{
+  return gsl_ran_tgaussian (r_global, -3.0, 3.0, 1.5);
+}
+
+double
+test_tgaussian_pdf (double x)
+{
+  return gsl_ran_tgaussian_pdf (x, -3.0, 3.0, 1.5);
+}
+
 double
 test_bivariate_gaussian1 (void)
 {
diff --git a/rstat/Makefile.am b/rstat/Makefile.am
index 85db2013..5dd18e7b 100644
--- a/rstat/Makefile.am
+++ b/rstat/Makefile.am
@@ -10,6 +10,6 @@ check_PROGRAMS = test
 TESTS = $(check_PROGRAMS)
 
 test_SOURCES = test.c
-test_LDADD = libgslrstat.la ../statistics/libgslstatistics.la
../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la
../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la
../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la
../sys/libgslsys.la ../utils/libutils.la ../vector/libgslvector.la
+test_LDADD = libgslrstat.la ../statistics/libgslstatistics.la
../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la
../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la
../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la
../sys/libgslsys.la ../utils/libutils.la ../vector/libgslvector.la
../cdf/libgslcdf.la
 
 
-- 
2.27.0






    _______________________________________________________

Reply to this item at:

  <https://savannah.gnu.org/bugs/?59900>

_______________________________________________
  Message sent via Savannah
  https://savannah.gnu.org/
[Prev in Thread]
Current Thread
[Next in Thread]
[bug #59900] Add truncated normal distribution, Patrick Alken <=
Prev by Date: [bug #59624] Buffer overflow in gsl_stats_quantile_from_sorted_data
Next by Date: [bug #59910] Update information for Python wrapper
Previous by thread: [bug #59624] Buffer overflow in gsl_stats_quantile_from_sorted_data
Next by thread: [bug #59910] Update information for Python wrapper
Index(es):
- Date
- Thread