matthias muntwiler bbd16d0f94 add files for public distribution
based on internal repository 0a462b6 2017-11-22 14:41:39 +0100
2017-11-22 14:55:20 +01:00

328 lines
9.4 KiB
C

#include "S.h"
#include "loess.h"
static char *surf_stat;
void
loess_setup(x, y, n, p, lo)
double *x, *y;
int n, p;
struct loess_struct *lo;
{
int i, max_kd;
max_kd = n > 200 ? n : 200;
lo->in.y = (double *) malloc(n * sizeof(double));
lo->in.x = (double *) malloc(n * p * sizeof(double));
lo->in.weights = (double *) malloc(n * sizeof(double));
for(i = 0; i < (n * p); i++)
lo->in.x[i] = x[i];
for(i = 0; i < n; i++) {
lo->in.y[i] = y[i];
lo->in.weights[i] = 1;
}
lo->in.n = n;
lo->in.p = p;
lo->model.span = 0.75;
lo->model.degree = 2;
lo->model.normalize = TRUE;
for(i = 0; i < 8; i++)
lo->model.parametric[i] = lo->model.drop_square[i] = FALSE;
lo->model.family = "gaussian";
lo->control.surface = "interpolate";
lo->control.statistics = "approximate";
lo->control.cell = 0.2;
lo->control.trace_hat = "wait.to.decide";
lo->control.iterations = 4;
lo->out.fitted_values = (double *) malloc(n * sizeof(double));
lo->out.fitted_residuals = (double *) malloc(n * sizeof(double));
lo->out.pseudovalues = (double *) malloc(n * sizeof(double));
lo->out.diagonal = (double *) malloc(n * sizeof(double));
lo->out.robust = (double *) malloc(n * sizeof(double));
lo->out.divisor = (double *) malloc(p * sizeof(double));
lo->kd_tree.parameter = (int *) malloc(7 * sizeof(int));
lo->kd_tree.a = (int *) malloc(max_kd * sizeof(int));
lo->kd_tree.xi = (double *) malloc(max_kd * sizeof(double));
lo->kd_tree.vert = (double *) malloc(p * 2 * sizeof(double));
lo->kd_tree.vval = (double *) malloc((p + 1) * max_kd * sizeof(double));
}
void
loess(lo)
struct loess_struct *lo;
{
int size_info[2], iterations;
void loess_();
size_info[0] = lo->in.p;
size_info[1] = lo->in.n;
iterations = (!strcmp(lo->model.family, "gaussian")) ? 0 :
lo->control.iterations;
if(!strcmp(lo->control.trace_hat, "wait.to.decide")) {
if(!strcmp(lo->control.surface, "interpolate"))
lo->control.trace_hat = (lo->in.n < 500) ? "exact" : "approximate";
else
lo->control.trace_hat = "exact";
}
loess_(lo->in.y, lo->in.x, size_info, lo->in.weights,
&lo->model.span,
&lo->model.degree,
lo->model.parametric,
lo->model.drop_square,
&lo->model.normalize,
&lo->control.statistics,
&lo->control.surface,
&lo->control.cell,
&lo->control.trace_hat,
&iterations,
lo->out.fitted_values,
lo->out.fitted_residuals,
&lo->out.enp,
&lo->out.s,
&lo->out.one_delta,
&lo->out.two_delta,
lo->out.pseudovalues,
&lo->out.trace_hat,
lo->out.diagonal,
lo->out.robust,
lo->out.divisor,
lo->kd_tree.parameter,
lo->kd_tree.a,
lo->kd_tree.xi,
lo->kd_tree.vert,
lo->kd_tree.vval);
}
void
loess_(y, x_, size_info, weights, span, degree, parametric, drop_square,
normalize, statistics, surface, cell, trace_hat_in, iterations,
fitted_values, fitted_residuals, enp, s, one_delta, two_delta,
pseudovalues, trace_hat_out, diagonal, robust, divisor,
parameter, a, xi, vert, vval)
double *y, *x_, *weights, *span, *cell, *pseudovalues,
*fitted_values, *fitted_residuals, *enp, *s, *one_delta, *two_delta,
*trace_hat_out, *diagonal, *robust, *divisor, *xi, *vert, *vval;
int *size_info, *degree, *parametric, *drop_square, *normalize,
*iterations, *parameter, *a;
char **statistics, **surface, **trace_hat_in;
{
double *x, *x_tmp, new_cell, trL, delta1, delta2, sum_squares = 0,
*pseudo_resid, *temp, *xi_tmp, *vert_tmp, *vval_tmp,
*diag_tmp, trL_tmp = 0, d1_tmp = 0, d2_tmp = 0, sum, mean;
int i, j, k, p, N, D, sum_drop_sqr = 0, sum_parametric = 0,
setLf, nonparametric = 0, *order_parametric,
*order_drop_sqr, zero = 0, max_kd, *a_tmp, *param_tmp;
int cut, comp();
char *new_stat;
void condition();
D = size_info[0];
N = size_info[1];
max_kd = (N > 200 ? N : 200);
*one_delta = *two_delta = *trace_hat_out = 0;
x = (double *) malloc(D * N * sizeof(double));
x_tmp = (double *) malloc(D * N * sizeof(double));
temp = (double *) malloc(N * sizeof(double));
a_tmp = (int *) malloc(max_kd * sizeof(int));
xi_tmp = (double *) malloc(max_kd * sizeof(double));
vert_tmp = (double *) malloc(D * 2 * sizeof(double));
vval_tmp = (double *) malloc((D + 1) * max_kd * sizeof(double));
diag_tmp = (double *) malloc(N * sizeof(double));
param_tmp = (int *) malloc(N * sizeof(int));
order_parametric = (int *) malloc(D * sizeof(int));
order_drop_sqr = (int *) malloc(D * sizeof(int));
if((*iterations) > 0)
pseudo_resid = (double *) malloc(N * sizeof(double));
new_cell = (*span) * (*cell);
for(i = 0; i < N; i++)
robust[i] = 1;
for(i = 0; i < (N * D); i++)
x_tmp[i] = x_[i];
if((*normalize) && (D > 1)) {
cut = ceil(0.100000000000000000001 * N);
for(i = 0; i < D; i++) {
k = i * N;
for(j = 0; j < N; j++)
temp[j] = x_[k + j];
qsort(temp, N, sizeof(double), comp);
sum = 0;
for(j = cut; j <= (N - cut - 1); j++)
sum = sum + temp[j];
mean = sum / (N - 2 * cut);
sum = 0;
for(j = cut; j <= (N - cut - 1); j++) {
temp[j] = temp[j] - mean;
sum = sum + temp[j] * temp[j];
}
divisor[i] = sqrt(sum / (N - 2 * cut - 1));
for(j = 0; j < N; j++) {
p = k + j;
x_tmp[p] = x_[p] / divisor[i];
}
}
}
else
for(i = 0; i < D; i++) divisor[i] = 1;
j = D - 1;
for(i = 0; i < D; i++) {
sum_drop_sqr = sum_drop_sqr + drop_square[i];
sum_parametric = sum_parametric + parametric[i];
if(parametric[i])
order_parametric[j--] = i;
else
order_parametric[nonparametric++] = i;
}
for(i = 0; i < D; i++) {
order_drop_sqr[i] = 2 - drop_square[order_parametric[i]];
k = i * N;
p = order_parametric[i] * N;
for(j = 0; j < N; j++)
x[k + j] = x_tmp[p + j];
}
if((*degree) == 1 && sum_drop_sqr) {
fprintf(stderr, "Specified the square of a factor predictor to be dropped when degree = 1");
exit(1);
}
if(D == 1 && sum_drop_sqr) {
fprintf(stderr, "Specified the square of a predictor to be dropped with only one numeric predictor");
exit(1);
}
if(sum_parametric == D) {
fprintf(stderr, "Specified parametric for all predictors");
exit(1);
}
for(j = 0; j <= (*iterations); j++) {
new_stat = j ? "none" : *statistics;
for(i = 0; i < N; i++)
robust[i] = weights[i] * robust[i];
condition(surface, new_stat, trace_hat_in);
setLf = !strcmp(surf_stat, "interpolate/exact");
loess_raw(y, x, weights, robust, &D, &N, span, degree,
&nonparametric, order_drop_sqr, &sum_drop_sqr,
&new_cell, &surf_stat, fitted_values, parameter, a,
xi, vert, vval, diagonal, &trL, &delta1, &delta2,
&setLf);
if(j == 0) {
*trace_hat_out = trL;
*one_delta = delta1;
*two_delta = delta2;
}
for(i = 0; i < N; i++)
fitted_residuals[i] = y[i] - fitted_values[i];
if(j < (*iterations))
F77_SUB(lowesw)(fitted_residuals, &N, robust, temp);
}
if((*iterations) > 0) {
F77_SUB(lowesp)(&N, y, fitted_values, weights, robust, temp, pseudovalues);
loess_raw(pseudovalues, x, weights, weights, &D, &N, span,
degree, &nonparametric, order_drop_sqr, &sum_drop_sqr,
&new_cell, &surf_stat, temp, param_tmp, a_tmp, xi_tmp,
vert_tmp, vval_tmp, diag_tmp, &trL_tmp, &d1_tmp, &d2_tmp, &zero);
for(i = 0; i < N; i++)
pseudo_resid[i] = pseudovalues[i] - temp[i];
}
if((*iterations) == 0)
for(i = 0; i < N; i++)
sum_squares = sum_squares + weights[i] *
fitted_residuals[i] * fitted_residuals[i];
else
for(i = 0; i < N; i++)
sum_squares = sum_squares + weights[i] *
pseudo_resid[i] * pseudo_resid[i];
*enp = (*one_delta) + 2 * (*trace_hat_out) - N;
*s = sqrt(sum_squares / (*one_delta));
free(x);
free(x_tmp);
free(temp);
free(xi_tmp);
free(vert_tmp);
free(vval_tmp);
free(diag_tmp);
free(a_tmp);
free(param_tmp);
free(order_parametric);
free(order_drop_sqr);
if((*iterations) > 0)
free(pseudo_resid);
}
void
loess_free_mem(lo)
struct loess_struct *lo;
{
free(lo->in.x);
free(lo->in.y);
free(lo->in.weights);
free(lo->out.fitted_values);
free(lo->out.fitted_residuals);
free(lo->out.pseudovalues);
free(lo->out.diagonal);
free(lo->out.robust);
free(lo->out.divisor);
free(lo->kd_tree.parameter);
free(lo->kd_tree.a);
free(lo->kd_tree.xi);
free(lo->kd_tree.vert);
free(lo->kd_tree.vval);
}
void
loess_summary(lo)
struct loess_struct *lo;
{
printf("Number of Observations: %d\n", lo->in.n);
printf("Equivalent Number of Parameters: %.1f\n", lo->out.enp);
if(!strcmp(lo->model.family, "gaussian"))
printf("Residual Standard Error: ");
else
printf("Residual Scale Estimate: ");
printf("%.4f\n", lo->out.s);
}
void
condition(surface, new_stat, trace_hat_in)
char **surface, *new_stat, **trace_hat_in;
{
if(!strcmp(*surface, "interpolate")) {
if(!strcmp(new_stat, "none"))
surf_stat = "interpolate/none";
else if(!strcmp(new_stat, "exact"))
surf_stat = "interpolate/exact";
else if(!strcmp(new_stat, "approximate"))
{
if(!strcmp(*trace_hat_in, "approximate"))
surf_stat = "interpolate/2.approx";
else if(!strcmp(*trace_hat_in, "exact"))
surf_stat = "interpolate/1.approx";
}
}
else if(!strcmp(*surface, "direct")) {
if(!strcmp(new_stat, "none"))
surf_stat = "direct/none";
else if(!strcmp(new_stat, "exact"))
surf_stat = "direct/exact";
else if(!strcmp(new_stat, "approximate"))
surf_stat = "direct/approximate";
}
}
int
comp(d1, d2)
double *d1, *d2;
{
if(*d1 < *d2)
return(-1);
else if(*d1 == *d2)
return(0);
else
return(1);
}