nuttx-apps/netutils/netlib/netlib_parseurl.c

/****************************************************************************
 * netutils/netlib/netlib_parseurl.c
 *
 *   Copyright (C) 2019 Gregory Nutt. All rights reserved.
 *   Author: Sebastien Lorquet <sebastien@lorquet.fr>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 * 3. Neither the name NuttX nor the names of its contributors may be
 *    used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 ****************************************************************************/

/****************************************************************************
 * Included Files
 ****************************************************************************/

#include <nuttx/config.h>

#include <stdint.h>
#include <string.h>
#include <errno.h>

#include "netutils/netlib.h"

/****************************************************************************
 * Public Functions
 ****************************************************************************/

/****************************************************************************
 * Name: netlib_parseurl
 *
 * Description:
 *   Parse an URL, not only HTTP ones. The parsing is according to this rule:
 *   SCHEME :// HOST [: PORT] / PATH
 *   - scheme is everything before the first colon
 *   - scheme must be followed by ://
 *   - host is everything until colon or slash
 *   - port is optional, parsed only if host ends with colon
 *   - path is everything after the host.
 *   This is noticeably simpler that the official URL parsing method, since
 *   - it does not take into account the user:pass@ part that can be present
 *     before the host. Support of these fields is planned in the url_s
 *     structure, but it is not parsed yet/
 *   - it does not separate the URL parameters nor the bookmark
 *   Note: see here for the documentation of a complete URL parsing routine:
 *   https://www.php.net/manual/fr/function.parse-url.php
 *
 ****************************************************************************/

int netlib_parseurl(FAR const char *str, FAR struct url_s *url)
{
  FAR const char *src = str;
  FAR char *dest;
  int bytesleft;
  int ret = OK;

  /* extract the protocol field, a set of a-z letters */

  dest      = url->scheme;
  bytesleft = url->schemelen;

  while (*src != '\0' && *src != ':')
    {
      /* Make sure that there is space for another character in the
       * scheme (reserving space for the null terminator).
       */

      if (bytesleft > 1)
        {
          /* Copy the byte */

          *dest++ = *src++;
          bytesleft--;
        }
      else
        {
          /* Note the error, but continue parsing until the end of the
           * hostname
           */

          src++;
          ret = -E2BIG;
        }
    }

  *dest = '\0';

  /* Parse and skip the scheme separator */

  if (*src != ':')
    {
      ret = -EINVAL;
    }

  src++;

  if (*src != '/')
    {
      ret = -EINVAL;
    }

  src++;

  if (*src != '/')
    {
      ret = -EINVAL;
    }

  src++;

  /* Concatenate the hostname following http:// and up to the termnator */

  dest      = url->host;
  bytesleft = url->hostlen;

  while (*src != '\0' && *src != '/' && *src != ' ' && *src != ':')
    {
      /* Make sure that there is space for another character in the
       * hostname (reserving space for the null terminator).
       */

      if (bytesleft > 1)
        {
          /* Copy the byte */

          *dest++ = *src++;
          bytesleft--;
        }
      else
        {
          /* Note the error, but continue parsing until the end of the
           * hostname
           */

          src++;
          ret = -E2BIG;
        }
    }

  *dest = '\0';

  /* Check if the hostname is following by a port number */

  if (*src == ':')
    {
      uint16_t accum = 0;
      src++; /* Skip over the colon */

      while (*src >= '0' && *src <= '9')
        {
          accum = 10*accum + *src - '0';
          src++;
        }

      url->port = accum;
    }

  /* Make sure the file name starts with exactly one '/' */

  dest      = url->path;
  bytesleft = url->pathlen;

  while (*src == '/')
    {
      src++;
    }

  *dest++ = '/';
  bytesleft--;

  /* The copy the rest of the file name to the user buffer */

  strncpy(dest, src, bytesleft);
  url->path[bytesleft - 1] = '\0';
  return ret;
}
netutils/netlib/netlib_parseurl.c: Add a more flexible version of netlib_parsehttpurl. This one can parse any URL, and is extensible so future improvements keep the same API. 2019-04-26 21:08:38 +02:00			`/****************************************************************************`
			`* netutils/netlib/netlib_parseurl.c`
			`*`
			`* Copyright (C) 2019 Gregory Nutt. All rights reserved.`
			`* Author: Sebastien Lorquet <sebastien@lorquet.fr>`
			`*`
			`* Redistribution and use in source and binary forms, with or without`
			`* modification, are permitted provided that the following conditions`
			`* are met:`
			`*`
			`* 1. Redistributions of source code must retain the above copyright`
			`* notice, this list of conditions and the following disclaimer.`
			`* 2. Redistributions in binary form must reproduce the above copyright`
			`* notice, this list of conditions and the following disclaimer in`
			`* the documentation and/or other materials provided with the`
			`* distribution.`
			`* 3. Neither the name NuttX nor the names of its contributors may be`
			`* used to endorse or promote products derived from this software`
			`* without specific prior written permission.`
			`*`
			`* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS`
			`* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE`
			`* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,`
			`* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,`
			`* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS`
			`* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED`
			`* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT`
			`* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN`
			`* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`* POSSIBILITY OF SUCH DAMAGE.`
			`*`
			`****************************************************************************/`

			`/****************************************************************************`
			`* Included Files`
			`****************************************************************************/`

			`#include <nuttx/config.h>`

			`#include <stdint.h>`
			`#include <string.h>`
			`#include <errno.h>`

			`#include "netutils/netlib.h"`

			`/****************************************************************************`
			`* Public Functions`
			`****************************************************************************/`

			`/****************************************************************************`
			`* Name: netlib_parseurl`
			`*`
			`* Description:`
			`* Parse an URL, not only HTTP ones. The parsing is according to this rule:`
			`* SCHEME :// HOST [: PORT] / PATH`
			`* - scheme is everything before the first colon`
			`* - scheme must be followed by ://`
			`* - host is everything until colon or slash`
			`* - port is optional, parsed only if host ends with colon`
			`* - path is everything after the host.`
			`* This is noticeably simpler that the official URL parsing method, since`
			`* - it does not take into account the user:pass@ part that can be present`
			`* before the host. Support of these fields is planned in the url_s`
			`* structure, but it is not parsed yet/`
			`* - it does not separate the URL parameters nor the bookmark`
			`* Note: see here for the documentation of a complete URL parsing routine:`
			`* https://www.php.net/manual/fr/function.parse-url.php`
			`*`
			`****************************************************************************/`

			`int netlib_parseurl(FAR const char str, FAR struct url_s url)`
			`{`
			`FAR const char *src = str;`
			`FAR char *dest;`
			`int bytesleft;`
			`int ret = OK;`

			`/* extract the protocol field, a set of a-z letters */`

			`dest = url->scheme;`
			`bytesleft = url->schemelen;`

			`while (src != '\0' && src != ':')`
			`{`
			`/* Make sure that there is space for another character in the`
			`* scheme (reserving space for the null terminator).`
			`*/`

			`if (bytesleft > 1)`
			`{`
			`/* Copy the byte */`

			`dest++ = src++;`
			`bytesleft--;`
			`}`
			`else`
			`{`
			`/* Note the error, but continue parsing until the end of the`
			`* hostname`
			`*/`

			`src++;`
			`ret = -E2BIG;`
			`}`
			`}`

			`*dest = '\0';`

			`/* Parse and skip the scheme separator */`

			`if (*src != ':')`
			`{`
			`ret = -EINVAL;`
			`}`

			`src++;`

			`if (*src != '/')`
			`{`
			`ret = -EINVAL;`
			`}`

			`src++;`

			`if (*src != '/')`
			`{`
			`ret = -EINVAL;`
			`}`

			`src++;`

			`/* Concatenate the hostname following http:// and up to the termnator */`

			`dest = url->host;`
			`bytesleft = url->hostlen;`

			`while (src != '\0' && src != '/' && src != ' ' && src != ':')`
			`{`
			`/* Make sure that there is space for another character in the`
			`* hostname (reserving space for the null terminator).`
			`*/`

			`if (bytesleft > 1)`
			`{`
			`/* Copy the byte */`

			`dest++ = src++;`
			`bytesleft--;`
			`}`
			`else`
			`{`
			`/* Note the error, but continue parsing until the end of the`
			`* hostname`
			`*/`

			`src++;`
			`ret = -E2BIG;`
			`}`
			`}`

			`*dest = '\0';`

			`/* Check if the hostname is following by a port number */`

			`if (*src == ':')`
			`{`
			`uint16_t accum = 0;`
			`src++; /* Skip over the colon */`

			`while (src >= '0' && src <= '9')`
			`{`
			`accum = 10accum + src - '0';`
			`src++;`
			`}`

			`url->port = accum;`
			`}`

			`/* Make sure the file name starts with exactly one '/' */`

			`dest = url->path;`
			`bytesleft = url->pathlen;`

			`while (*src == '/')`
			`{`
			`src++;`
			`}`

			`*dest++ = '/';`
			`bytesleft--;`

			`/* The copy the rest of the file name to the user buffer */`

			`strncpy(dest, src, bytesleft);`
			`url->path[bytesleft - 1] = '\0';`
			`return ret;`
			`}`