From fe0954cd629c17dc03eba883ba7450b87910f1c5 Mon Sep 17 00:00:00 2001 From: Sebastien Lorquet Date: Fri, 26 Apr 2019 13:08:38 -0600 Subject: [PATCH] netutils/netlib/netlib_parseurl.c: Add a more flexible version of netlib_parsehttpurl. This one can parse any URL, and is extensible so future improvements keep the same API. --- include/netutils/netlib.h | 26 ++++ netutils/netlib/Kconfig | 8 ++ netutils/netlib/Makefile | 5 + netutils/netlib/netlib_parseurl.c | 199 ++++++++++++++++++++++++++++++ 4 files changed, 238 insertions(+) create mode 100644 netutils/netlib/netlib_parseurl.c diff --git a/include/netutils/netlib.h b/include/netutils/netlib.h index c390cf75b..a9cd9b109 100644 --- a/include/netutils/netlib.h +++ b/include/netutils/netlib.h @@ -185,6 +185,28 @@ struct netlib_ipv6_route_s #endif #endif /* HAVE_ROUTE_PROCFS */ +#ifdef CONFIG_NETUTILS_NETLIB_GENERICURLPARSER +struct url_s +{ + FAR char *scheme; + int schemelen; + FAR char *user; + int userlen; + FAR char *password; + int passwordlen; + FAR char *host; + int hostlen; + int port; + FAR char *path; + int pathlen; + FAR char *parameters; + int parameterslen; + FAR char *bookmark; + int bookmarklen; +}; +#endif + + /**************************************************************************** * Public Data ****************************************************************************/ @@ -323,6 +345,10 @@ int netlib_parsehttpurl(FAR const char *url, uint16_t *port, FAR char *hostname, int hostlen, FAR char *filename, int namelen); +#ifdef CONFIG_NETUTILS_NETLIB_GENERICURLPARSER +int netlib_parseurl(FAR const char *str, FAR struct url_s *url); +#endif + /* Generic server logic */ int netlib_listenon(uint16_t portno); diff --git a/netutils/netlib/Kconfig b/netutils/netlib/Kconfig index 72414977c..20f157f8f 100644 --- a/netutils/netlib/Kconfig +++ b/netutils/netlib/Kconfig @@ -12,4 +12,12 @@ config NETUTILS_NETLIB Enable support for the network support library. if NETUTILS_NETLIB + +config NETUTILS_NETLIB_GENERICURLPARSER + bool "Build the generic URL parser" + default n + ---help--- + If this option is selected, a generic URL parser + is included in the build. It is more flexible than + the basic netlib_parsehttpurl routine. endif diff --git a/netutils/netlib/Makefile b/netutils/netlib/Makefile index f2ec67973..446b0703a 100644 --- a/netutils/netlib/Makefile +++ b/netutils/netlib/Makefile @@ -40,6 +40,11 @@ CSRCS = netlib_ipv4addrconv.c netlib_ethaddrconv.c netlib_parsehttpurl.c CSRCS += netlib_setifstatus.c netlib_getifstatus.c +# Generic URL parsing support +ifeq ($(CONFIG_NETUTILS_NETLIB_GENERICURLPARSER),y) +CSRCS += netlib_parseurl.c +endif + # IP address support ifeq ($(CONFIG_NET_IPv4),y) diff --git a/netutils/netlib/netlib_parseurl.c b/netutils/netlib/netlib_parseurl.c new file mode 100644 index 000000000..ee3299ad9 --- /dev/null +++ b/netutils/netlib/netlib_parseurl.c @@ -0,0 +1,199 @@ +/**************************************************************************** + * netutils/netlib/netlib_parseurl.c + * + * Copyright (C) 2019 Gregory Nutt. All rights reserved. + * Author: Sebastien Lorquet + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name NuttX nor the names of its contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include + +#include +#include +#include + +#include "netutils/netlib.h" + +/**************************************************************************** + * Public Functions + ****************************************************************************/ + +/**************************************************************************** + * Name: netlib_parseurl + * + * Description: + * Parse an URL, not only HTTP ones. The parsing is according to this rule: + * SCHEME :// HOST [: PORT] / PATH + * - scheme is everything before the first colon + * - scheme must be followed by :// + * - host is everything until colon or slash + * - port is optional, parsed only if host ends with colon + * - path is everything after the host. + * This is noticeably simpler that the official URL parsing method, since + * - it does not take into account the user:pass@ part that can be present + * before the host. Support of these fields is planned in the url_s + * structure, but it is not parsed yet/ + * - it does not separate the URL parameters nor the bookmark + * Note: see here for the documentation of a complete URL parsing routine: + * https://www.php.net/manual/fr/function.parse-url.php + * + ****************************************************************************/ + +int netlib_parseurl(FAR const char *str, FAR struct url_s *url) +{ + FAR const char *src = str; + FAR char *dest; + int bytesleft; + int ret = OK; + + /* extract the protocol field, a set of a-z letters */ + + dest = url->scheme; + bytesleft = url->schemelen; + + while (*src != '\0' && *src != ':') + { + /* Make sure that there is space for another character in the + * scheme (reserving space for the null terminator). + */ + + if (bytesleft > 1) + { + /* Copy the byte */ + + *dest++ = *src++; + bytesleft--; + } + else + { + /* Note the error, but continue parsing until the end of the + * hostname + */ + + src++; + ret = -E2BIG; + } + } + + *dest = '\0'; + + /* Parse and skip the scheme separator */ + + if (*src != ':') + { + ret = -EINVAL; + } + + src++; + + if (*src != '/') + { + ret = -EINVAL; + } + + src++; + + if (*src != '/') + { + ret = -EINVAL; + } + + src++; + + /* Concatenate the hostname following http:// and up to the termnator */ + + dest = url->host; + bytesleft = url->hostlen; + + while (*src != '\0' && *src != '/' && *src != ' ' && *src != ':') + { + /* Make sure that there is space for another character in the + * hostname (reserving space for the null terminator). + */ + + if (bytesleft > 1) + { + /* Copy the byte */ + + *dest++ = *src++; + bytesleft--; + } + else + { + /* Note the error, but continue parsing until the end of the + * hostname + */ + + src++; + ret = -E2BIG; + } + } + + *dest = '\0'; + + /* Check if the hostname is following by a port number */ + + if (*src == ':') + { + uint16_t accum = 0; + src++; /* Skip over the colon */ + + while (*src >= '0' && *src <= '9') + { + accum = 10*accum + *src - '0'; + src++; + } + + url->port = accum; + } + + /* Make sure the file name starts with exactly one '/' */ + + dest = url->path; + bytesleft = url->pathlen; + + while (*src == '/') + { + src++; + } + + *dest++ = '/'; + bytesleft--; + + /* The copy the rest of the file name to the user buffer */ + + strncpy(dest, src, bytesleft); + url->path[bytesleft - 1] = '\0'; + return ret; +}