HomePort
httpd_url_parser.c
Go to the documentation of this file.
1 /*
2  * Copyright 2011 Aalborg University. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are
5  * permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright notice, this list of
8  * conditions and the following disclaimer.
9  *
10  * 2. Redistributions in binary form must reproduce the above copyright notice, this list
11  * of conditions and the following disclaimer in the documentation and/or other materials
12  * provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY Aalborg University ''AS IS'' AND ANY EXPRESS OR IMPLIED
15  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
16  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Aalborg University OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
22  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  * The views and conclusions contained in the software and documentation are those of the
25  * authors and should not be interpreted as representing official policies, either expressed
26  */
27 
28 #include <string.h>
29 #include <stdlib.h>
30 
31 #include "httpd_url_parser.h"
32 #include "hpd/hpd_shared_api.h"
33 
35 enum up_state {
47 };
48 
50 struct up {
53  void *data;
54 
55  enum up_state state;
56  char *buffer;
57 
58  size_t protocol;
59  size_t protocol_l;
60  size_t host;
61  size_t host_l;
62  size_t port;
63  size_t port_l;
64  size_t path;
65  size_t path_l;
66  size_t key_value;
67  size_t end;
68 
69  size_t last_key;
70  size_t last_key_l;
71  size_t last_value;
72  size_t last_value_l;
73  size_t last_path;
74  size_t last_path_l;
75 
76  size_t parser;
77  size_t insert;
78 };
79 
96 hpd_error_t up_create(struct up **instance, struct up_settings *settings, const hpd_module_t *context, void *data)
97 {
98  if (!context) return HPD_E_NULL;
99  if (!instance || !settings) HPD_LOG_RETURN_E_NULL(context);
100 
101  (*instance) = malloc(sizeof(struct up));
102  if (!(*instance)) return HPD_E_ALLOC;
103 
104  (*instance)->context = context;
105 
106  // Store settings
107  (*instance)->settings = malloc(sizeof(struct up_settings));
108  if (!(*instance)->settings) {
109  free(*instance);
110  HPD_LOG_RETURN_E_ALLOC(context);
111  }
112  memcpy((*instance)->settings, settings, sizeof(struct up_settings));
113 
114  // Set state
115  (*instance)->state = S_START;
116  (*instance)->buffer = NULL;
117 
118  // Set pointers
119  (*instance)->protocol = 0;
120  (*instance)->protocol_l = 0;
121  (*instance)->host = 0;
122  (*instance)->host_l = 0;
123  (*instance)->port = 0;
124  (*instance)->port_l = 0;
125  (*instance)->path = 0;
126  (*instance)->path_l = 0;
127  (*instance)->key_value = 0;
128  (*instance)->end = 0;
129  (*instance)->last_key = 0;
130  (*instance)->last_key_l = 0;
131  (*instance)->last_value = 0;
132  (*instance)->last_value_l = 0;
133  (*instance)->last_path = 0;
134  (*instance)->last_path_l = 0;
135  (*instance)->parser = 0;
136  (*instance)->insert = 0;
137 
138  // Store data
139  (*instance)->data = data;
140 
141  return HPD_E_SUCCESS;
142 }
143 
153 {
154  if (!instance) return HPD_E_NULL;
155 
156  if(instance->settings) free(instance->settings);
157  if(instance->buffer) free(instance->buffer);
158  free(instance);
159 
160  return HPD_E_SUCCESS;
161 }
162 
172 static int up_isLegalURLChar(char c)
173 {
174  if ((c >= '0' && c <= '9') ||
175  (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
176  c == '-' || c == '.' || c == '_' || c == '~' || c == ':' ||
177  c == '/' || c == '?' || c == '#' || c == '[' || c == ']' ||
178  c == '@' || c == '!' || c == '$' || c == '&' || c == '\'' ||
179  c == '(' || c == ')' || c == '*' || c == '+' || c == ',' ||
180  c == ';' || c == '=' || c == '%')
181  return 1;
182 
183  return 0;
184 }
185 
186 #define UP_CALL(X, ...) do { \
187  hpd_error_t rc; \
188  if (settings->X != NULL) { \
189  if ((rc = settings->X(instance->data, ##__VA_ARGS__))) { \
190  instance->state = S_ERROR; \
191  return rc; \
192  } \
193  } \
194 } while(0)
195 
209 hpd_error_t up_add_chunk(struct up *instance, const char *chunk, size_t len)
210 {
211  if (!instance || !chunk) return HPD_E_NULL;
212 
213  const struct up_settings *settings = instance->settings;
214  char *buffer;
215 
216  // Add chunk to buffer
217  instance->end += len;
218  buffer = realloc(instance->buffer, instance->end * sizeof(char));
219  if (!buffer) {
220  instance->state = S_ERROR;
221  HPD_LOG_RETURN_E_ALLOC(instance->context);
222  return HPD_E_ALLOC;
223  }
224  instance->buffer = buffer;
225  memcpy(&buffer[instance->insert], chunk, len);
226  instance->insert += len;
227 
228  // Parse the new chunk in buffer
229  for(; instance->parser < instance->end; instance->parser++)
230  {
231  char c = instance->buffer[instance->parser];
232 
233  // Check if it is a valid URL char. If not, print an error message
234  // and set parser to error state
235  if (!up_isLegalURLChar(c))
236  {
237  instance->state = S_ERROR;
238  HPD_LOG_RETURN(instance->context, HPD_E_ARGUMENT, "Invalid character ('%c') in URL.", c);
239  }
240 
241  switch(instance->state)
242  {
243  case S_START:
244  UP_CALL(on_begin);
245  if (c == '/') {
246  instance->state = S_SEGMENT;
247  instance->path = instance->parser;
248  instance->path_l++;
249  instance->last_path = instance->parser + 1;
250  } else {
251  instance->state = S_PROTOCOL;
252  instance->protocol = instance->parser;
253  instance->protocol_l++;
254  }
255  break;
256  case S_PROTOCOL:
257  if (c == ':') {
258  UP_CALL(on_protocol, &instance->buffer[instance->protocol], instance->parser - instance->protocol);
259  instance->state = S_SLASH1;
260  } else {
261  instance->protocol_l++;
262  }
263  break;
264  case S_SLASH1:
265  if(c == '/') {
266  instance->state = S_SLASH2;
267  } else {
268  instance->state = S_ERROR;
269  HPD_LOG_RETURN(instance->context, HPD_E_ARGUMENT, "URL Parse error.");
270  }
271  break;
272  case S_SLASH2:
273  if(c == '/') {
274  instance->state = S_HOST;
275  instance->host = instance->parser + 1;
276  } else {
277  instance->state = S_ERROR;
278  HPD_LOG_RETURN(instance->context, HPD_E_ARGUMENT, "URL Parse error.");
279  }
280  break;
281  case S_HOST:
282  if(c == ':' || c == '/') {
283  UP_CALL(on_host, &instance->buffer[instance->host], instance->host_l);
284  if (c == ':') instance->state = S_PREPORT;
285  if (c == '/') {
286  instance->state = S_SEGMENT;
287  instance->path = instance->parser;
288  instance->path_l++;
289  instance->last_path = instance->parser + 1;
290  }
291  break;
292  } else {
293  instance->host_l++;
294  }
295  break;
296  case S_PREPORT:
297  if(c == '/') {
298  instance->state = S_ERROR;
299  HPD_LOG_RETURN(instance->context, HPD_E_ARGUMENT, "URL Parse error.");
300  }
301  instance->state = S_PORT;
302  instance->port = instance->parser;
303  instance->port_l++;
304  break;
305  case S_PORT:
306  if (c == '/')
307  {
308  UP_CALL(on_port, &instance->buffer[instance->port], instance->port_l);
309  instance->state = S_SEGMENT;
310  instance->path = instance->parser;
311  instance->path_l++;
312  instance->last_path = instance->parser + 1;
313  } else {
314  instance->port_l++;
315  }
316  break;
317  case S_SEGMENT:
318  if (c == '/' || c == '?') {
319  UP_CALL(on_path_segment, &instance->buffer[instance->last_path], instance->last_path_l);
320  instance->last_path = instance->parser + 1;
321  instance->last_path_l = 0;
322  } else {
323  instance->last_path_l++;
324  }
325  if (c == '?') {
326  UP_CALL(on_path_complete, &instance->buffer[instance->path], instance->path_l);
327  instance->state = S_KEY;
328  instance->key_value = instance->parser + 1;
329  instance->last_key = instance->parser + 1;
330  } else {
331  instance->path_l++;
332  }
333  break;
334  case S_KEY:
335  if (c == '=') {
336  instance->state = S_VALUE;
337  instance->last_value = instance->parser + 1;
338  instance->last_value_l = 0;
339  } else {
340  instance->last_key_l++;
341  }
342  break;
343  case S_VALUE:
344  if (c == '&') {
345  UP_CALL (on_key_value, &instance->buffer[instance->last_key], instance->last_key_l, &instance->buffer[instance->last_value], instance->last_value_l);
346  instance->state = S_KEY;
347  instance->last_key = instance->parser + 1;
348  instance->last_key_l = 0;
349  } else {
350  instance->last_value_l++;
351  }
352  break;
353 
354  case S_ERROR:
355  HPD_LOG_RETURN(instance->context, HPD_E_STATE, "URL Parser in error state.");
356  default:
357  HPD_LOG_RETURN(instance->context, HPD_E_STATE, "Unexpected state.");
358  }
359  }
360 
361  return HPD_E_SUCCESS;
362 }
363 
378 {
379  if (!instance) return HPD_E_NULL;
380 
381  const struct up_settings *settings = instance->settings;
382 
383  // Check if we need to send a last chunk and that we are in a valid
384  // end state
385  switch(instance->state)
386  {
387  case S_SEGMENT:
388  UP_CALL(on_path_segment, &instance->buffer[instance->last_path], instance->last_path_l);
389  UP_CALL (on_path_complete, &instance->buffer[instance->path], instance->path_l);
390  break;
391  case S_VALUE:
392  UP_CALL (on_key_value, &instance->buffer[instance->last_key], instance->last_key_l, &instance->buffer[instance->last_value], instance->last_value_l);
393  break;
394  case S_HOST:
395  UP_CALL(on_host, &instance->buffer[instance->host], instance->host_l);
396  break;
397  case S_PORT:
398  UP_CALL(on_port, &instance->buffer[instance->port], instance->port_l);
399  break;
400  default:
401  HPD_LOG_RETURN(instance->context, HPD_E_STATE, "Unexpected state.");
402  }
403 
404  UP_CALL(on_complete, instance->buffer, instance->parser);
405  return HPD_E_SUCCESS;
406 }
407 
Used for ignoring the : before a port.
size_t path_l
Path length.
#define HPD_LOG_RETURN(CONTEXT, E, FMT,...)
struct up * instance
settings on_protocol
size_t last_key
Last seen key start.
size_t last_key_l
Last seen key length.
Used for ignoring the second slash after protocol.
size_t last_value
Last seen value start.
An URL Parser instance.
up_state
The possible states of the URL Parser.
#define UP_CALL(X,...)
size_t protocol_l
Protocol length.
size_t host_l
Host length.
The parser is parsing a path segment of an url.
free(data.url)
enum up_state state
State.
hpd_error_t up_add_chunk(struct up *instance, const char *chunk, size_t len)
Parse a chunk of an URL.
struct hp_settings settings
const hpd_module_t * context
size_t key_value
Arguments start.
struct up_settings * settings
Settings.
size_t path
Path start.
char * buffer
URL Buffer.
size_t protocol
Protocol start.
settings on_begin
settings on_path_segment
hpd_error_t up_destroy(struct up *instance)
Destroy URL parser instance.
The parser is parsing the port of an URL.
size_t port_l
Port length.
enum hpd_error hpd_error_t
Definition: hpd_types.h:167
#define HPD_LOG_RETURN_E_ALLOC(CONTEXT)
up_string_cb on_complete
#define HPD_LOG_RETURN_E_NULL(CONTEXT)
Here the parser is receiving the first part of a key/value pair.
settings on_key_value
size_t insert
Location to insert new chunks.
static int up_isLegalURLChar(char c)
Check if a given char is valid in an URL.
struct data data
The parser goes here after receiving a key, and is now expecting a value. It may go back to key if an...
The parser is parsing the host of an URL.
size_t port
Port start.
hpd_error_t up_complete(struct up *instance)
Informs the parser that the URL is complete.
size_t last_path
Last seen path segment start.
settings on_path_complete
size_t host
Host start.
Used for ignoring the first slash after protocol.
settings on_port
Settings struct for the URL Parser.
size_t end
Length of full URL.
The initial state. From here the parser can either go to S_PROTOCOL, S_SEGMENT or S_ERROR...
The error state. The parser will go here if the input char is not valid in an URL, or if it received an invalid char at some point.
hpd_error_t up_create(struct up **instance, struct up_settings *settings, const hpd_module_t *context, void *data)
Create URL parser instance.
void * data
User data.
size_t parser
Location of parser.
size_t last_value_l
Last seen value length.
settings on_host
In this state, the parser is parsing the protocol.
size_t last_path_l
Last seen path segment length.