GRASS GIS 8 Programmer's Manual  8.5.0dev(2025)-fbabf32052
remove_duplicates.c
Go to the documentation of this file.
1 /*!
2  \file lib/vector/Vlib/remove_duplicates.c
3 
4  \brief Vector library - clean geometry (remove duplicates)
5 
6  Higher level functions for reading/writing/manipulating vectors.
7 
8  (C) 2001-2009 by the GRASS Development Team
9 
10  This program is free software under the GNU General Public License
11  (>=v2). Read the file COPYING that comes with GRASS for details.
12 
13  \author Radim Blazek
14  */
15 
16 #include <stdlib.h>
17 #include <grass/vector.h>
18 #include <grass/glocale.h>
19 
20 static int cmp_int(const void *a, const void *b)
21 {
22  return (*(int *)a - *(int *)b);
23 }
24 
25 static int boxlist_add_sorted(struct boxlist *list, int id)
26 {
27  int i;
28 
29  if (list->n_values > 0) {
30  if (bsearch(&id, list->id, list->n_values, sizeof(int), cmp_int))
31  return 0;
32  }
33 
34  if (list->n_values == list->alloc_values) {
35  size_t size = (list->n_values + 100) * sizeof(int);
36 
37  list->id = (int *)G_realloc((void *)list->id, size);
38  list->alloc_values = list->n_values + 100;
39  }
40 
41  i = 0;
42  if (list->n_values > 0) {
43  for (i = list->n_values; i > 0; i--) {
44  if (list->id[i - 1] < id)
45  break;
46  list->id[i] = list->id[i - 1];
47  }
48  }
49  list->id[i] = id;
50  list->n_values++;
51 
52  return 1;
53 }
54 
55 /*!
56  \brief Remove duplicate features from vector map.
57 
58  Remove duplicate lines of given types from vector map. Duplicate
59  lines may be optionally written to error map. Input map must be
60  opened on level 2 for update. Categories are merged.
61  GV_BUILD_BASE is sufficient.
62 
63  \param[in,out] Map vector map where duplicate lines will be deleted
64  \param type type of line to be delete
65  \param[out] Err vector map where duplicate lines will be written or NULL
66 
67  \return void
68  */
69 void Vect_remove_duplicates(struct Map_info *Map, int type,
70  struct Map_info *Err)
71 {
72  struct line_pnts *APoints, *BPoints;
73  struct line_cats *ACats, *BCats;
74  int i, c, atype, aline, bline;
75  int nlines, nacats_orig, npoints;
76  int na1, na2, nb1, nb2, nodelines, nline;
77  struct bound_box ABox;
78  struct boxlist *List;
79  int ndupl, is_dupl;
80 
81  APoints = Vect_new_line_struct();
82  BPoints = Vect_new_line_struct();
83  ACats = Vect_new_cats_struct();
84  BCats = Vect_new_cats_struct();
85  List = Vect_new_boxlist(0);
86 
87  nlines = Vect_get_num_lines(Map);
88 
89  G_debug(1, "nlines = %d", nlines);
90  /* Go through all lines in vector, for each line select lines which
91  * overlap with the first vertex of this line and check if a
92  * selected line is identical. If yes, remove the selected line.
93  * If the line vertices are identical with those of any other line,
94  * merge categories and rewrite the current line.
95  */
96 
97  ndupl = 0;
98 
99  for (aline = 1; aline <= nlines; aline++) {
100  G_percent(aline, nlines, 1);
101  if (!Vect_line_alive(Map, aline))
102  continue;
103 
104  atype = Vect_read_line(Map, APoints, ACats, aline);
105  if (!(atype & type))
106  continue;
107 
108  npoints = APoints->n_points;
109  Vect_line_prune(APoints);
110 
111  if (npoints != APoints->n_points) {
112  G_debug(3, "Line %d pruned, %d vertices removed", aline,
113  npoints - APoints->n_points);
114  Vect_rewrite_line(Map, aline, atype, APoints, ACats);
115  nlines = Vect_get_num_lines(Map);
116  continue;
117  }
118 
119  na1 = na2 = -1;
120  if (atype & GV_LINES) {
121  /* faster than Vect_select_lines_by_box() */
122  Vect_reset_boxlist(List);
123  Vect_get_line_nodes(Map, aline, &na1, &na2);
124  nodelines = Vect_get_node_n_lines(Map, na1);
125 
126  for (i = 0; i < nodelines; i++) {
127  nline = abs(Vect_get_node_line(Map, na1, i));
128 
129  if (nline == aline)
130  continue;
131  if (Vect_get_line_type(Map, nline) != atype)
132  continue;
133 
134  boxlist_add_sorted(List, nline);
135  }
136  }
137  else {
138  /* select potential duplicates */
139  ABox.E = ABox.W = APoints->x[0];
140  ABox.N = ABox.S = APoints->y[0];
141  ABox.T = ABox.B = APoints->z[0];
142  Vect_select_lines_by_box(Map, &ABox, atype, List);
143  G_debug(3, " %d lines selected by box", List->n_values);
144  }
145 
146  is_dupl = 0;
147 
148  for (i = 0; i < List->n_values; i++) {
149  bline = List->id[i];
150  G_debug(3, " j = %d bline = %d", i, bline);
151 
152  /* compare aline and bline only once */
153  if (aline <= bline)
154  continue;
155 
156  nb1 = nb2 = -1;
157 
158  if (atype & GV_LINES) {
159  Vect_get_line_nodes(Map, bline, &nb1, &nb2);
160  if ((na1 == nb1 && na2 != nb2) || (na1 == nb2 && na2 != nb1))
161  continue;
162  }
163 
164  Vect_read_line(Map, BPoints, BCats, bline);
165  Vect_line_prune(BPoints);
166 
167  /* check for duplicate */
168  if (!Vect_line_check_duplicate(APoints, BPoints, Vect_is_3d(Map)))
169  continue;
170 
171  /* bline is identical to aline */
172  if (!is_dupl) {
173  if (Err) {
174  Vect_write_line(Err, atype, APoints, ACats);
175  }
176  is_dupl = 1;
177  }
178  Vect_delete_line(Map, bline);
179 
180  /* merge categories */
181  nacats_orig = ACats->n_cats;
182 
183  for (c = 0; c < BCats->n_cats; c++)
184  Vect_cat_set(ACats, BCats->field[c], BCats->cat[c]);
185 
186  if (ACats->n_cats > nacats_orig) {
187  G_debug(4, "cats merged: n_cats %d -> %d", nacats_orig,
188  ACats->n_cats);
189  }
190 
191  ndupl++;
192  }
193  if (is_dupl) {
194  Vect_rewrite_line(Map, aline, atype, APoints, ACats);
195  nlines = Vect_get_num_lines(Map);
196  G_debug(3, "nlines = %d\n", nlines);
197  }
198  }
199  G_verbose_message(_("Removed duplicates: %d"), ndupl);
200  Vect_destroy_line_struct(APoints);
201  Vect_destroy_line_struct(BPoints);
204  Vect_destroy_boxlist(List);
205 }
206 
207 /*!
208  \brief Check for duplicate lines
209 
210  Note that lines must be pruned with Vect_line_prune() before passed
211  to Vect_line_check_duplicate(), as done by Vect_remove_duplicates()
212 
213  \param APoints first line geometry
214  \param BPoints second line geometry
215 
216  \return 1 duplicate
217  \return 0 not duplicate
218  */
219 int Vect_line_check_duplicate(const struct line_pnts *APoints,
220  const struct line_pnts *BPoints, int with_z)
221 {
222  int k;
223  int npoints;
224  int forw, backw;
225 
226  if (APoints->n_points != BPoints->n_points)
227  return 0;
228 
229  npoints = APoints->n_points;
230 
231  /* Forward */
232  forw = 1;
233  for (k = 0; k < APoints->n_points; k++) {
234  if (APoints->x[k] != BPoints->x[k] || APoints->y[k] != BPoints->y[k] ||
235  (with_z && APoints->z[k] != BPoints->z[k])) {
236  forw = 0;
237  break;
238  }
239  }
240 
241  /* Backward */
242  backw = 1;
243  for (k = 0; k < APoints->n_points; k++) {
244  if (APoints->x[k] != BPoints->x[npoints - k - 1] ||
245  APoints->y[k] != BPoints->y[npoints - k - 1] ||
246  (with_z && APoints->z[k] != BPoints->z[npoints - k - 1])) {
247  backw = 0;
248  break;
249  }
250  }
251 
252  if (!forw && !backw)
253  return 0;
254 
255  return 1;
256 }
void G_percent(long, long, int)
Print percent complete messages.
Definition: percent.c:61
#define G_realloc(p, n)
Definition: defs/gis.h:96
void void G_verbose_message(const char *,...) __attribute__((format(printf
int G_debug(int, const char *,...) __attribute__((format(printf
void Vect_destroy_line_struct(struct line_pnts *)
Frees all memory associated with a line_pnts structure, including the structure itself.
Definition: line.c:77
off_t Vect_rewrite_line(struct Map_info *, off_t, int, const struct line_pnts *, const struct line_cats *)
Rewrites existing feature (topological level required)
int Vect_reset_boxlist(struct boxlist *)
Reset boxlist structure.
int Vect_get_line_nodes(struct Map_info *, int, int *, int *)
Get line nodes.
Definition: level_two.c:303
plus_t Vect_get_num_lines(struct Map_info *)
Fetch number of features (points, lines, boundaries, centroids) in vector map.
Definition: level_two.c:75
int Vect_cat_set(struct line_cats *, int, int)
Add new field/cat to category structure if doesn't exist yet.
int Vect_get_line_type(struct Map_info *, int)
Get line type.
Definition: level_two.c:254
struct boxlist * Vect_new_boxlist(int)
Creates and initializes a struct boxlist.
void Vect_destroy_boxlist(struct boxlist *)
Frees all memory associated with a struct boxlist, including the struct itself.
void Vect_destroy_cats_struct(struct line_cats *)
Frees all memory associated with line_cats structure, including the struct itself.
int Vect_read_line(struct Map_info *, struct line_pnts *, struct line_cats *, int)
Read vector feature (topological level required)
int Vect_line_alive(struct Map_info *, int)
Check if feature is alive or dead (topological level required)
int Vect_delete_line(struct Map_info *, off_t)
Delete existing feature (topological level required)
off_t Vect_write_line(struct Map_info *, int, const struct line_pnts *, const struct line_cats *)
Writes a new feature.
struct line_pnts * Vect_new_line_struct(void)
Creates and initializes a line_pnts structure.
Definition: line.c:45
int Vect_select_lines_by_box(struct Map_info *, const struct bound_box *, int, struct boxlist *)
Select lines with bounding boxes by box.
Definition: sindex.c:32
int Vect_get_node_n_lines(struct Map_info *, int)
Get number of lines for node.
Definition: level_two.c:380
int Vect_get_node_line(struct Map_info *, int, int)
Get line id for node line index.
Definition: level_two.c:396
int Vect_line_prune(struct line_pnts *)
Remove duplicate points, i.e. zero length segments.
Definition: line.c:279
struct line_cats * Vect_new_cats_struct(void)
Creates and initializes line_cats structure.
int Vect_is_3d(struct Map_info *)
Check if vector map is 3D.
#define GV_LINES
Definition: dig_defines.h:193
#define _(str)
Definition: glocale.h:10
double b
Definition: r_raster.c:39
int Vect_line_check_duplicate(const struct line_pnts *APoints, const struct line_pnts *BPoints, int with_z)
Check for duplicate lines.
void Vect_remove_duplicates(struct Map_info *Map, int type, struct Map_info *Err)
Remove duplicate features from vector map.
Vector map info.
Definition: dig_structs.h:1243
Bounding box.
Definition: dig_structs.h:64
double W
West.
Definition: dig_structs.h:80
double T
Top.
Definition: dig_structs.h:84
double S
South.
Definition: dig_structs.h:72
double N
North.
Definition: dig_structs.h:68
double E
East.
Definition: dig_structs.h:76
double B
Bottom.
Definition: dig_structs.h:88
List of bounding boxes with id.
Definition: dig_structs.h:1723
int * id
Array of ids.
Definition: dig_structs.h:1727
int n_values
Number of items in the list.
Definition: dig_structs.h:1739
Feature category info.
Definition: dig_structs.h:1677
int * field
Array of layers (fields)
Definition: dig_structs.h:1681
int * cat
Array of categories.
Definition: dig_structs.h:1685
int n_cats
Number of categories attached to element.
Definition: dig_structs.h:1689
Feature geometry info - coordinates.
Definition: dig_structs.h:1651
double * y
Array of Y coordinates.
Definition: dig_structs.h:1659
double * x
Array of X coordinates.
Definition: dig_structs.h:1655
int n_points
Number of points.
Definition: dig_structs.h:1667
double * z
Array of Z coordinates.
Definition: dig_structs.h:1663
Definition: manage.h:4