pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Covariance Matrix


From: Jason Stover
Subject: Re: Covariance Matrix
Date: Tue, 6 Oct 2009 11:37:40 -0400
User-agent: Mutt/1.5.18 (2008-05-17)

On Sun, Oct 04, 2009 at 03:21:51PM +0000, John Darrington wrote:
> It's currently implemented as a single pass algorithm, but it will
> be straightforward to change that.  There's no categorical variables, 
> or interactions, at present.
> 
...
> I'd be interested in any comments and suggestions on how to proceed
> with generalising the implementation to accept categorical variables.

I have a patch below to start this. Right now, it just allocates space. It does
not properly retrieve entries with categorical variables, but it is a start:

diff --git a/src/language/stats/correlations.c 
b/src/language/stats/correlations.c
index e397dae..65679f3 100644
--- a/src/language/stats/correlations.c
+++ b/src/language/stats/correlations.c
@@ -324,7 +324,7 @@ run_corr (struct casereader *r, const struct corr_opts 
*opts, const struct corr
   const gsl_matrix *var_matrix,  *samples_matrix, *mean_matrix;
   const gsl_matrix *cov_matrix;
   gsl_matrix *corr_matrix;
-  struct covariance *cov = covariance_create (corr->n_vars_total, corr->vars,
+  struct covariance *cov = covariance_create_pass1 (corr->n_vars_total, 
corr->vars,
                                              opts->wv, opts->exclude);
 
   for ( ; (c = casereader_read (r) ); case_unref (c))
diff --git a/src/math/covariance.c b/src/math/covariance.c
index ba0de0b..350cab4 100644
--- a/src/math/covariance.c
+++ b/src/math/covariance.c
@@ -31,6 +31,7 @@ struct covariance
 {
   /* The variables for which the covariance matrix is to be calculated */
   size_t n_vars;
+  size_t dim; /* This value equals n_vars if all variables are continuous. */
   const struct variable **vars;
   
   /* The weight variable (or NULL if none) */
@@ -64,11 +65,29 @@ covariance_moments (const struct covariance *cov, int m)
 }
 
 
+static void
+covariance_create_part2 (struct covariance *cov, enum mv_class exclude)
+{
+  size_t i;
+
+  cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
+
+  for (i = 0; i < n_MOMENTS; ++i)
+    cov->moments[i] = gsl_matrix_calloc (cov->dim, cov->dim);
 
-/* Create a covariance struct */
+  cov->exclude = exclude;
+
+  cov->n_cm = (cov->dim * (cov->dim - 1)  ) / 2;
+
+  cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
+}
+  
+/* Create a covariance struct to be computed in one data pass.
+   No categorical variables are allowed. 
+*/
 struct covariance *
-covariance_create (size_t n_vars, const struct variable **vars,
-                  const struct variable *weight, enum mv_class exclude)
+covariance_create_pass1 (size_t n_vars, const struct variable **vars,
+                        const struct variable *weight, enum mv_class exclude)
 {
   size_t i;
   struct covariance *cov = xmalloc (sizeof *cov);
@@ -76,20 +95,61 @@ covariance_create (size_t n_vars, const struct variable 
**vars,
 
   cov->wv = weight;
   cov->n_vars = n_vars;
+  cov->dim = n_vars; /* Only numeric variables are allowed in a single data 
pass,
+                       so these values are equal.
+                     */
 
   for (i = 0; i < n_vars; ++i)
-    cov->vars[i] = vars[i];
+    {
+      assert (var_is_numeric (vars[i]));
+      cov->vars[i] = vars[i];
+    }
 
-  cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
-  
-  for (i = 0; i < n_MOMENTS; ++i)
-    cov->moments[i] = gsl_matrix_calloc (n_vars, n_vars);
+  covariance_create_part2 (cov, exclude);
 
-  cov->exclude = exclude;
+  return cov;
+}
 
-  cov->n_cm = (n_vars * (n_vars - 1)  ) / 2;
+static size_t
+get_dim (size_t n_vars, struct variable **vars)
+{
+  size_t i;
+  size_t dim = 0;
 
-  cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
+  for (i = 0; i < n_vars; i++)
+    {
+      if (var_is_numeric (vars[i]))
+       {
+         dim++;
+       }
+      else
+       {
+         dim += cat_get_n_categories (vars[i]);
+       }
+    }
+  return dim;
+}
+/* Create a covariance struct with categorical variables.
+   Call this function after the first data pass.
+*/
+struct covariance *
+covariance_create_pass2 (size_t n_vars, const struct variable **vars,
+                        const struct variable *weight, enum mv_class exclude)
+{
+  size_t i;
+  struct covariance *cov = xmalloc (sizeof *cov);
+  cov->vars = xmalloc (sizeof *cov->vars * n_vars);
+
+  cov->wv = weight;
+  cov->n_vars = n_vars;
+
+  for (i = 0; i < n_vars; ++i)
+    cov->vars[i] = vars[i];
+
+  cov->dim = get_dim (n_vars, vars);
+
+  
+  covariance_create_part2 (cov, exclude);
 
   return cov;
 }
diff --git a/src/math/covariance.h b/src/math/covariance.h
index 8b8de88..7a13cd2 100644
--- a/src/math/covariance.h
+++ b/src/math/covariance.h
@@ -27,8 +27,14 @@ struct covariance;
 struct variable;
 struct ccase ;
 
-struct covariance * covariance_create (size_t n_vars, const struct variable 
**vars, 
-                                      const struct variable *wv, enum mv_class 
excl);
+struct covariance * covariance_create_pass1 (size_t n_vars, 
+                                            const struct variable **vars,
+                                            const struct variable *wv, 
+                                            enum mv_class excl);
+struct covariance * covariance_create_pass2 (size_t n_vars, 
+                                            const struct variable **vars,
+                                            const struct variable *wv, 
+                                            enum mv_class excl);
 
 void covariance_accumulate (struct covariance *, const struct ccase *);
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]