GRASS GIS 8 Programmer's Manual  8.5.0dev(2024)-d6dec75dd4
gis/token.c
Go to the documentation of this file.
1 /*!
2  \file lib/gis/token.c
3 
4  \brief GIS Library - Tokenize strings
5 
6  (C) 2001-2008, 2011-2013 by the GRASS Development Team
7 
8  This program is free software under the GNU General Public License
9  (>=v2). Read the file COPYING that comes with GRASS for details.
10 
11  \author USA CERL and others
12  */
13 
14 #include <stdlib.h>
15 #include <string.h>
16 #include <grass/gis.h>
17 #include <grass/glocale.h>
18 
19 static char **tokenize(const char *, const char *, const char *);
20 
21 /*!
22  \brief Tokenize string
23 
24  Given a string, <em>buf</em>, turn delimiter, <em>delim</em>, into
25  '\0' (NULL) and place pointers to tokens in tokens. <em>buf</em>
26  must not contain a new line (\n). <em>delim</em> may consist of more
27  than one character. G_free_tokens() must be called when finished
28  with tokens to release memory.
29 
30  Example:
31  \code
32  char **tokens;
33  int ntok, i;
34  tokens = G_tokenize(buf, " |:,");
35  ntok = G_number_of_tokens(tokens);
36  for (i=0; i < ntok; i++) {
37  G_debug(1, "%d=[%s]", i, tokens[i]);
38  }
39  G_free_tokens(tokens);
40  \endcode
41 
42  \param buf input string
43  \param delim string delimiter
44 
45  \return pointer to string token
46  */
47 char **G_tokenize(const char *buf, const char *delim)
48 {
49  return tokenize(buf, delim, NULL);
50 }
51 
52 /*!
53  \brief Tokenize string
54 
55  This function behaves similarly to G_tokenize().
56 
57  It introduces <em>valchar</em> which defines borders of token. Within
58  token <em>delim</em> is ignored.
59 
60  Example:
61  \code
62  char *str = "a,'b,c',d";
63 
64  char **tokens1, **tokens2;
65  int ntok1, ntok2;
66 
67  tokens1 = G_tokenize(str, ",");
68  ntok1 = G_number_of_tokens(tokens1);
69 
70  tokens1 = G_tokenize2(str, ",", "'");
71  ntok2 = G_number_of_tokens(tokens2);
72  \endcode
73 
74  In this example <em>ntok1</em> will be 4, <em>ntok2</em> only 3,
75  i.e. { "a", "'b, c'", "d"}
76 
77  \param buf input string
78  \param delim string delimiter
79  \param valchar character defining border of token
80 
81  \return pointer to string token
82  */
83 char **G_tokenize2(const char *buf, const char *delim, const char *valchar)
84 {
85  return tokenize(buf, delim, valchar);
86 }
87 
88 char **tokenize(const char *buf, const char *delim, const char *inchar)
89 {
90  int i;
91  char **tokens;
92  const char *p;
93  char *q;
94  enum {
95  S_START,
96  S_IN_QUOTE,
97  S_AFTER_QUOTE,
98  };
99  enum { A_NO_OP, A_ADD_CHAR, A_NEW_FIELD, A_END_RECORD, A_ERROR };
100  int state;
101  int quo = inchar ? *inchar : -1;
102 
103  /* do not modify buf, make a copy */
104  p = q = G_store(buf);
105 
106  i = 0;
107  tokens = (char **)G_malloc(2 * sizeof(char *));
108 
109  /* always one token */
110  tokens[i++] = q;
111 
112  for (state = S_START;; p++) {
113  int c = *p;
114  int action = A_NO_OP;
115 
116  switch (state) {
117  case S_START:
118  if (c == quo)
119  state = S_IN_QUOTE;
120  else if (c == '\0')
121  action = A_END_RECORD;
122  else if (strchr(delim, c))
123  action = A_NEW_FIELD;
124  else
125  action = A_ADD_CHAR;
126  break;
127  case S_IN_QUOTE:
128  if (c == quo)
129  state = S_AFTER_QUOTE;
130  else if (c == '\0')
131  action = A_ERROR;
132  else
133  action = A_ADD_CHAR;
134  break;
135  case S_AFTER_QUOTE:
136  if (c == quo)
137  state = S_IN_QUOTE, action = A_ADD_CHAR;
138  else if (c == '\0')
139  action = A_END_RECORD;
140  else if (strchr(delim, c))
141  state = S_START, action = A_NEW_FIELD;
142  else
143  action = A_ERROR;
144  break;
145  }
146 
147  switch (action) {
148  case A_NO_OP:
149  break;
150  case A_ADD_CHAR:
151  *q++ = *p;
152  break;
153  case A_NEW_FIELD:
154  *q++ = '\0';
155  tokens[i++] = q;
156  tokens = G_realloc(tokens, (i + 2) * sizeof(char *));
157  break;
158  case A_END_RECORD:
159  *q++ = '\0';
160  tokens[i++] = NULL;
161  return tokens;
162  case A_ERROR:
163  G_warning(_("parse error"));
164  *q++ = '\0';
165  tokens[i++] = NULL;
166  return tokens;
167  }
168  }
169 }
170 
171 /*!
172  \brief Return number of tokens
173 
174  \param tokens
175 
176  \return number of tokens
177  */
178 
179 int G_number_of_tokens(char **tokens)
180 {
181  int n;
182 
183  n = 0;
184  for (n = 0; tokens[n] != NULL; n++)
185  ;
186 
187  return n;
188 }
189 
190 /*!
191  \brief Free memory allocated to tokens.
192 
193  <b>Note:</b> <i>G_free_tokens()</i> must be called when finished with
194  tokens to release memory.
195 
196  \param[out] tokens
197  */
198 void G_free_tokens(char **tokens)
199 {
200  if (tokens[0] != NULL)
201  G_free(tokens[0]);
202  G_free(tokens);
203 }
#define NULL
Definition: ccmath.h:32
void G_free(void *)
Free allocated memory.
Definition: gis/alloc.c:150
#define G_realloc(p, n)
Definition: defs/gis.h:96
void G_warning(const char *,...) __attribute__((format(printf
#define G_malloc(n)
Definition: defs/gis.h:94
char * G_store(const char *)
Copy string to allocated memory.
Definition: strings.c:87
void G_free_tokens(char **tokens)
Free memory allocated to tokens.
Definition: gis/token.c:198
int G_number_of_tokens(char **tokens)
Return number of tokens.
Definition: gis/token.c:179
char ** G_tokenize2(const char *buf, const char *delim, const char *valchar)
Tokenize string.
Definition: gis/token.c:83
char ** G_tokenize(const char *buf, const char *delim)
Tokenize string.
Definition: gis/token.c:47
#define _(str)
Definition: glocale.h:10
struct state state
Definition: parser.c:103