GRASS GIS 8 Programmer's Manual  8.5.0dev(2025)-299a28a7d0
class.c
Go to the documentation of this file.
1 /* functions to classify sorted arrays of doubles and fill a vector of
2  * classbreaks */
3 
4 #include <stdbool.h>
5 
6 #include <grass/glocale.h>
7 #include <grass/arraystats.h>
8 
9 int AS_option_to_algorithm(const struct Option *option)
10 {
11  if (G_strcasecmp(option->answer, "int") == 0)
12  return CLASS_INTERVAL;
13  if (G_strcasecmp(option->answer, "std") == 0)
14  return CLASS_STDEV;
15  if (G_strcasecmp(option->answer, "qua") == 0)
16  return CLASS_QUANT;
17  if (G_strcasecmp(option->answer, "equ") == 0)
18  return CLASS_EQUIPROB;
19  if (G_strcasecmp(option->answer, "dis") == 0)
20  return CLASS_DISCONT;
21 
22  G_fatal_error(_("Unknown algorithm '%s'"), option->answer);
23 }
24 
25 double AS_class_apply_algorithm(int algo, const double data[], int nrec,
26  int *nbreaks, double classbreaks[])
27 {
28  double finfo = 0.0;
29 
30  switch (algo) {
31  case CLASS_INTERVAL:
32  finfo = AS_class_interval(data, nrec, *nbreaks, classbreaks);
33  break;
34  case CLASS_STDEV:
35  finfo = AS_class_stdev(data, nrec, *nbreaks, classbreaks);
36  break;
37  case CLASS_QUANT:
38  finfo = AS_class_quant(data, nrec, *nbreaks, classbreaks);
39  break;
40  case CLASS_EQUIPROB:
41  finfo = AS_class_equiprob(data, nrec, nbreaks, classbreaks);
42  break;
43  case CLASS_DISCONT:
44  finfo = AS_class_discont(data, nrec, *nbreaks, classbreaks);
45  break;
46  default:
47  break;
48  }
49 
50  if (finfo == 0)
51  G_fatal_error(_("Classification algorithm failed"));
52 
53  return finfo;
54 }
55 
56 int AS_class_interval(const double data[], int count, int nbreaks,
57  double classbreaks[])
58 {
59  double min, max;
60  double step;
61  int i = 0;
62 
63  min = data[0];
64  max = data[count - 1];
65 
66  step = (max - min) / (nbreaks + 1);
67 
68  for (i = 0; i < nbreaks; i++)
69  classbreaks[i] = min + (step * (i + 1));
70 
71  return (1);
72 }
73 
74 double AS_class_stdev(const double data[], int count, int nbreaks,
75  double classbreaks[])
76 {
77  struct GASTATS stats;
78  int i;
79  int nbclass;
80  double scale = 1.0;
81 
82  AS_basic_stats(data, count, &stats);
83 
84  nbclass = nbreaks + 1;
85 
86  if (nbclass % 2 ==
87  1) { /* number of classes is uneven so we center middle class on mean */
88 
89  /* find appropriate fraction of stdev for step */
90  i = 1;
91  while (i) {
92  if (((stats.mean + stats.stdev * scale / 2) +
93  (stats.stdev * scale * (nbclass / 2 - 1)) >
94  stats.max) ||
95  ((stats.mean - stats.stdev * scale / 2) -
96  (stats.stdev * scale * (nbclass / 2 - 1)) <
97  stats.min))
98  scale = scale / 2;
99  else
100  i = 0;
101  }
102 
103  /* classbreaks below the mean */
104  for (i = 0; i < nbreaks / 2; i++)
105  classbreaks[i] = (stats.mean - stats.stdev * scale / 2) -
106  stats.stdev * scale * (nbreaks / 2 - (i + 1));
107  /* classbreaks above the mean */
108  for (; i < nbreaks; i++)
109  classbreaks[i] = (stats.mean + stats.stdev * scale / 2) +
110  stats.stdev * scale * (i - nbreaks / 2);
111  }
112  else { /* number of classes is even so mean is a classbreak */
113 
114  /* decide whether to use 1*stdev or 0.5*stdev as step */
115  i = 1;
116  while (i) {
117  if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
118  stats.max) ||
119  ((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
120  stats.min))
121  scale = scale / 2;
122  else
123  i = 0;
124  }
125 
126  /* classbreaks below the mean and on the mean */
127  for (i = 0; i <= nbreaks / 2; i++)
128  classbreaks[i] =
129  stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
130  /* classbreaks above the mean */
131  for (; i < nbreaks; i++)
132  classbreaks[i] =
133  stats.mean + stats.stdev * scale * (i - nbreaks / 2);
134  }
135 
136  return (scale);
137 }
138 
139 int AS_class_quant(const double data[], int count, int nbreaks,
140  double classbreaks[])
141 {
142  int i, step;
143 
144  step = count / (nbreaks + 1);
145 
146  for (i = 0; i < nbreaks; i++)
147  classbreaks[i] = data[step * (i + 1)];
148 
149  return (1);
150 }
151 
152 int AS_class_equiprob(const double data[], int count, int *nbreaks,
153  double classbreaks[])
154 {
155  int i, j;
156  double *lequi; /*Vector of scale factors for probabilities of the normal
157  distribution */
158  struct GASTATS stats;
159  int nbclass;
160 
161  nbclass = *nbreaks + 1;
162 
163  lequi = G_malloc(*nbreaks * sizeof(double));
164 
165  /* The following values come from the normal distribution and will be used
166  * as: classbreak[i] = (lequi[i] * stdev) + mean;
167  */
168 
169  if (nbclass < 3) {
170  lequi[0] = 0;
171  }
172  else if (nbclass == 3) {
173  lequi[0] = -0.43076;
174  lequi[1] = 0.43076;
175  }
176  else if (nbclass == 4) {
177  lequi[0] = -0.6745;
178  lequi[1] = 0;
179  lequi[2] = 0.6745;
180  }
181  else if (nbclass == 5) {
182  lequi[0] = -0.8416;
183  lequi[1] = -0.2533;
184  lequi[2] = 0.2533;
185  lequi[3] = 0.8416;
186  }
187  else if (nbclass == 6) {
188  lequi[0] = -0.9676;
189  lequi[1] = -0.43076;
190  lequi[2] = 0;
191  lequi[3] = 0.43076;
192  lequi[4] = 0.9676;
193  }
194  else if (nbclass == 7) {
195  lequi[0] = -1.068;
196  lequi[1] = -0.566;
197  lequi[2] = -0.18;
198  lequi[3] = 0.18;
199  lequi[4] = 0.566;
200  lequi[5] = 1.068;
201  }
202  else if (nbclass == 8) {
203  lequi[0] = -1.1507;
204  lequi[1] = -0.6745;
205  lequi[2] = -0.3187;
206  lequi[3] = 0;
207  lequi[4] = 0.3187;
208  lequi[5] = 0.6745;
209  lequi[6] = 1.1507;
210  }
211  else if (nbclass == 9) {
212  lequi[0] = -1.2208;
213  lequi[1] = -0.7648;
214  lequi[2] = -0.4385;
215  lequi[3] = -0.1397;
216  lequi[4] = 0.1397;
217  lequi[5] = 0.4385;
218  lequi[6] = 0.7648;
219  lequi[7] = 1.2208;
220  }
221  else if (nbclass == 10) {
222  lequi[0] = -1.28155;
223  lequi[1] = -0.84162;
224  lequi[2] = -0.5244;
225  lequi[3] = -0.25335;
226  lequi[4] = 0;
227  lequi[5] = 0.25335;
228  lequi[6] = 0.5244;
229  lequi[7] = 0.84162;
230  lequi[8] = 1.28155;
231  }
232  else {
234  _("Equiprobable classbreaks currently limited to 10 classes"));
235  }
236 
237  AS_basic_stats(data, count, &stats);
238 
239  /* Check if any of the classbreaks would fall outside of the range min-max
240  */
241  j = 0;
242  for (i = 0; i < *nbreaks; i++) {
243  if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
244  (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
245  j++;
246  }
247  }
248 
249  if (j < (*nbreaks)) {
250  G_warning(
251  _("There are classbreaks outside the range min-max. Number of "
252  "classes reduced to %i, but using probabilities for %i classes."),
253  j + 1, *nbreaks + 1);
254  for (i = 0; i < j; i++)
255  classbreaks[i] = 0.0;
256  }
257 
258  j = 0;
259  for (i = 0; i < *nbreaks; i++) {
260  if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
261  (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
262  classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
263  j++;
264  }
265  }
266  *nbreaks = j;
267 
268  G_free(lequi);
269  return (1);
270 }
271 
272 double AS_class_discont(const double data[], int count, int nbreaks,
273  double classbreaks[])
274 {
275  int i, j;
276  double chi2 = 1000.0;
277 
278  /* get the number of values */
279  int n = count;
280 
281  int nbclass = nbreaks + 1;
282 
283  int *num = G_calloc((nbclass + 2), sizeof(int));
284  int *no = G_calloc((nbclass + 1), sizeof(int));
285  double *zz = G_malloc((nbclass + 1) * sizeof(double));
286  double *xn = G_malloc((n + 1) * sizeof(double));
287  double *co = G_malloc((nbclass + 1) * sizeof(double));
288 
289  /* We copy the array of values to x, in order to be able
290  to standardize it */
291  double *x = G_malloc((n + 1) * sizeof(double));
292  x[0] = 0.0;
293  xn[0] = 0.0;
294 
295  double min = data[0];
296  double max = data[count - 1];
297  for (i = 1; i <= n; i++)
298  x[i] = data[i - 1];
299 
300  double rangemax = max - min;
301  double rangemin = rangemax;
302 
303  for (i = 2; i <= n; i++) {
304  if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
305  rangemin = x[i] - x[i - 1]; /* rangemin = minimal distance */
306  }
307 
308  /* STANDARDIZATION
309  * and creation of the number vector (xn) */
310 
311  for (i = 1; i <= n; i++) {
312  x[i] = (x[i] - min) / rangemax;
313  xn[i] = i / (double)n;
314  }
315  double xlim = rangemin / rangemax;
316  rangemin = rangemin / 2.0;
317  /* Searching for the limits */
318  num[1] = n;
319 
320  /* Loop through possible solutions */
321  for (i = 1; i <= nbclass; i++) {
322  double dmax = 0.0;
323  int nmax = 0;
324  int nf = 0; /* End number */
325 
326  /* Loop through classes */
327  for (j = 1; j <= i; j++) {
328  double a = 0.0, b = 0.0, c = 0.0, d = 0.0;
329  int nd = nf; /* Start number */
330 
331  nf = num[j];
332  co[j] = 10e37;
333  AS_eqdrt(x, xn, nd, nf, &a, &b, &c);
334  double den = sqrt(pow(b, 2) + 1.0);
335  nd++;
336  /* Loop through observations */
337  for (int k = nd; k <= nf; k++) {
338  if (fabs(c) >= GRASS_EPSILON)
339  d = fabs(x[k] - c);
340  else
341  d = fabs((-1.0 * b * x[k]) + xn[k] - a) / den;
342 
343  if (x[k] - x[nd] < xlim)
344  continue;
345  if (x[nf] - x[k] < xlim)
346  continue;
347  if (d <= dmax)
348  continue;
349  dmax = d;
350  nmax = k;
351  }
352  nd--;
353  if (fabs(x[nf] - x[nd]) > GRASS_EPSILON)
354  co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
355  }
356  for (j = 1; j <= i; j++) {
357  no[j] = num[j];
358  zz[j] = x[num[j]] * rangemax + min;
359  if (j == i)
360  continue;
361  if (co[j] > co[j + 1]) {
362  zz[j] = zz[j] + rangemin;
363  continue;
364  }
365  else {
366  zz[j] = zz[j] - rangemin;
367  no[j] = no[j] - 1;
368  }
369  }
370  int im = i - 1;
371  if (im != 0) {
372  for (j = 1; j <= im; j++) {
373  int ji = i + 1 - j;
374  no[ji] -= no[ji - 1];
375  }
376  }
377  if (nmax == 0) {
378  break;
379  }
380 
381  int jj = 0;
382  int nff = i + 2;
383  bool do_reset = true;
384  for (j = 1; j <= i; j++) {
385  jj = nff - j;
386  if (num[jj - 1] < nmax) {
387  num[jj] = nmax;
388  do_reset = false;
389  break;
390  }
391  num[jj] = num[jj - 1];
392  }
393  if (do_reset) {
394  num[1] = nmax;
395  jj = 1;
396  }
397  int no1 = (int)((xn[num[jj]] - xn[num[jj - 1]]) * n);
398  int no2 = (int)((xn[num[jj + 1]] - xn[num[jj]]) * n);
399  double f = (xn[num[jj + 1]] - xn[num[jj - 1]]) /
400  (x[num[jj + 1]] - x[num[jj - 1]]);
401  f *= n;
402  double xt1 = (x[num[jj]] - x[num[jj - 1]]) * f;
403  double xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
404  if (fabs(xt1 * xt2) <= GRASS_EPSILON) {
405  if (fabs(xt2) > GRASS_EPSILON) {
406  xt2 = rangemin / 2.0 / rangemax * f;
407  xt1 = xt1 - xt2;
408  }
409  else {
410  xt1 = rangemin / 2.0 / rangemax * f;
411  xt2 = xt2 - xt1;
412  }
413  }
414 
415  /* calculate chi-square to indicate statistical significance of new
416  * class, i.e. how probable would it be that the new class could be the
417  * result of purely random choice */
418  double ch = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
419  if (chi2 > ch)
420  chi2 = ch;
421  }
422 
423  /* Fill up classbreaks of i <nbclass classes */
424  for (j = 1; j < nbclass; j++)
425  classbreaks[j - 1] = zz[j];
426 
427  G_free(co);
428  G_free(no);
429  G_free(num);
430  G_free(x);
431  G_free(xn);
432  G_free(zz);
433 
434  return (chi2);
435 }
436 
437 int AS_class_frequencies(const double data[], int count, int nbreaks,
438  double classbreaks[], int frequencies[])
439 {
440  int i, j;
441 
442  /* min = data[0];
443  max = data[count - 1]; */
444  /* count cases in all classes, except for last class */
445  i = 0;
446  for (j = 0; j < nbreaks; j++) {
447  while (data[i] <= classbreaks[j]) {
448  frequencies[j]++;
449  i++;
450  }
451  }
452 
453  /*Now count cases in last class */
454  for (; i < count; i++) {
455  frequencies[nbreaks]++;
456  }
457 
458  return (1);
459 }
#define CLASS_EQUIPROB
Definition: arraystats.h:27
#define CLASS_QUANT
Definition: arraystats.h:26
#define CLASS_INTERVAL
Definition: arraystats.h:24
#define CLASS_DISCONT
Definition: arraystats.h:28
#define CLASS_STDEV
Definition: arraystats.h:25
double AS_class_discont(const double data[], int count, int nbreaks, double classbreaks[])
Definition: class.c:272
double AS_class_apply_algorithm(int algo, const double data[], int nrec, int *nbreaks, double classbreaks[])
Definition: class.c:25
int AS_class_quant(const double data[], int count, int nbreaks, double classbreaks[])
Definition: class.c:139
int AS_class_frequencies(const double data[], int count, int nbreaks, double classbreaks[], int frequencies[])
Definition: class.c:437
int AS_class_equiprob(const double data[], int count, int *nbreaks, double classbreaks[])
Definition: class.c:152
int AS_class_interval(const double data[], int count, int nbreaks, double classbreaks[])
Definition: class.c:56
int AS_option_to_algorithm(const struct Option *option)
Definition: class.c:9
double AS_class_stdev(const double data[], int count, int nbreaks, double classbreaks[])
Definition: class.c:74
void AS_eqdrt(double[], double[], int, int, double *, double *, double *)
Definition: basic.c:39
void AS_basic_stats(const double[], int, struct GASTATS *)
Definition: basic.c:7
void G_free(void *)
Free allocated memory.
Definition: gis/alloc.c:150
#define G_calloc(m, n)
Definition: defs/gis.h:95
void void void void G_fatal_error(const char *,...) __attribute__((format(printf
void G_warning(const char *,...) __attribute__((format(printf
#define G_malloc(n)
Definition: defs/gis.h:94
int int G_strcasecmp(const char *, const char *)
String compare ignoring case (upper or lower)
Definition: strings.c:47
#define min(x, y)
Definition: draw2.c:29
#define max(x, y)
Definition: draw2.c:30
#define GRASS_EPSILON
Definition: gis.h:172
#define _(str)
Definition: glocale.h:10
GRASS_INTERPFL_EXPORT int count
double b
Definition: r_raster.c:39
double mean
Definition: arraystats.h:18
double max
Definition: arraystats.h:14
double stdev
Definition: arraystats.h:21
double min
Definition: arraystats.h:13
Structure that stores option information.
Definition: gis.h:557
char * answer
Definition: gis.h:571
#define x