nuttx-apps/netutils/netlib/netlib_parseurl.c

211 lines
5.7 KiB
C
Raw Normal View History

/****************************************************************************
* apps/netutils/netlib/netlib_parseurl.c
*
* Copyright (C) 2019 Gregory Nutt. All rights reserved.
* Author: Sebastien Lorquet <sebastien@lorquet.fr>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name NuttX nor the names of its contributors may be
* used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <nuttx/config.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include "netutils/netlib.h"
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: netlib_parseurl
*
* Description:
* Parse an URL, not only HTTP ones. The parsing is according to this rule:
* SCHEME :// HOST [: PORT] / PATH
* - scheme is everything before the first colon
* - scheme must be followed by ://
* - host is everything until colon or slash
* - port is optional, parsed only if host ends with colon
* - path is everything after the host.
* This is noticeably simpler that the official URL parsing method, since
* - it does not take into account the user:pass@ part that can be present
* before the host. Support of these fields is planned in the url_s
* structure, but it is not parsed yet/
* - it does not separate the URL parameters nor the bookmark
* Note: see here for the documentation of a complete URL parsing routine:
* https://www.php.net/manual/fr/function.parse-url.php
*
****************************************************************************/
int netlib_parseurl(FAR const char *str, FAR struct url_s *url)
{
FAR const char *src = str;
FAR char *dest;
int bytesleft;
int ret = OK;
2020-05-28 17:22:37 +09:00
size_t pathlen;
/* extract the protocol field, a set of a-z letters */
dest = url->scheme;
bytesleft = url->schemelen;
while (*src != '\0' && *src != ':')
{
/* Make sure that there is space for another character in the
* scheme (reserving space for the null terminator).
*/
if (bytesleft > 1)
{
/* Copy the byte */
*dest++ = *src++;
bytesleft--;
}
else
{
/* Note the error, but continue parsing until the end of the
* hostname
*/
src++;
ret = -E2BIG;
}
}
*dest = '\0';
/* Parse and skip the scheme separator */
if (*src != ':')
{
netlib_parseurl.c: Fix string overruns For EINVAL, it doesn't make sense to keep parsing. (For E2BIG, it might make some sense.) Found by LLVM ASan. ``` ================================================================= ==81622==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6020000000f2 at pc 0x00010d2746ca bp 0x7ffee29a9980 sp 0x7ffee29a9978 READ of size 1 at 0x6020000000f2 thread T0 #0 0x10d2746c9 in netlib_parseurl netlib_parseurl.c:121 #1 0x10d26b293 in parseurl webclient.c:479 #2 0x10d265e48 in webclient_perform webclient.c:690 #3 0x10d277c5b in main main.c:210 #4 0x7fff7a06f3d4 in start+0x0 (libdyld.dylib:x86_64+0x163d4) 0x6020000000f2 is located 0 bytes to the right of 2-byte region [0x6020000000f0,0x6020000000f2) allocated by thread T0 here: #0 0x10d3996d3 in wrap_strdup+0x203 (libclang_rt.asan_osx_dynamic.dylib:x86_64+0x3e6d3) #1 0x10d276abe in main main.c:147 #2 0x7fff7a06f3d4 in start+0x0 (libdyld.dylib:x86_64+0x163d4) SUMMARY: AddressSanitizer: heap-buffer-overflow netlib_parseurl.c:121 in netlib_parseurl Shadow bytes around the buggy address: 0x1c03ffffffc0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c03ffffffd0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c03ffffffe0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c03fffffff0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c0400000000: fa fa fd fd fa fa fd fd fa fa 00 00 fa fa 00 00 =>0x1c0400000010: fa fa 00 fa fa fa 00 00 fa fa 00 06 fa fa[02]fa 0x1c0400000020: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000030: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000040: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000050: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000060: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Shadow gap: cc ==81622==ABORTING ```
2021-05-07 15:20:34 +09:00
return -EINVAL;
}
src++;
if (*src != '/')
{
netlib_parseurl.c: Fix string overruns For EINVAL, it doesn't make sense to keep parsing. (For E2BIG, it might make some sense.) Found by LLVM ASan. ``` ================================================================= ==81622==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6020000000f2 at pc 0x00010d2746ca bp 0x7ffee29a9980 sp 0x7ffee29a9978 READ of size 1 at 0x6020000000f2 thread T0 #0 0x10d2746c9 in netlib_parseurl netlib_parseurl.c:121 #1 0x10d26b293 in parseurl webclient.c:479 #2 0x10d265e48 in webclient_perform webclient.c:690 #3 0x10d277c5b in main main.c:210 #4 0x7fff7a06f3d4 in start+0x0 (libdyld.dylib:x86_64+0x163d4) 0x6020000000f2 is located 0 bytes to the right of 2-byte region [0x6020000000f0,0x6020000000f2) allocated by thread T0 here: #0 0x10d3996d3 in wrap_strdup+0x203 (libclang_rt.asan_osx_dynamic.dylib:x86_64+0x3e6d3) #1 0x10d276abe in main main.c:147 #2 0x7fff7a06f3d4 in start+0x0 (libdyld.dylib:x86_64+0x163d4) SUMMARY: AddressSanitizer: heap-buffer-overflow netlib_parseurl.c:121 in netlib_parseurl Shadow bytes around the buggy address: 0x1c03ffffffc0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c03ffffffd0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c03ffffffe0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c03fffffff0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c0400000000: fa fa fd fd fa fa fd fd fa fa 00 00 fa fa 00 00 =>0x1c0400000010: fa fa 00 fa fa fa 00 00 fa fa 00 06 fa fa[02]fa 0x1c0400000020: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000030: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000040: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000050: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000060: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Shadow gap: cc ==81622==ABORTING ```
2021-05-07 15:20:34 +09:00
return -EINVAL;
}
src++;
if (*src != '/')
{
netlib_parseurl.c: Fix string overruns For EINVAL, it doesn't make sense to keep parsing. (For E2BIG, it might make some sense.) Found by LLVM ASan. ``` ================================================================= ==81622==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6020000000f2 at pc 0x00010d2746ca bp 0x7ffee29a9980 sp 0x7ffee29a9978 READ of size 1 at 0x6020000000f2 thread T0 #0 0x10d2746c9 in netlib_parseurl netlib_parseurl.c:121 #1 0x10d26b293 in parseurl webclient.c:479 #2 0x10d265e48 in webclient_perform webclient.c:690 #3 0x10d277c5b in main main.c:210 #4 0x7fff7a06f3d4 in start+0x0 (libdyld.dylib:x86_64+0x163d4) 0x6020000000f2 is located 0 bytes to the right of 2-byte region [0x6020000000f0,0x6020000000f2) allocated by thread T0 here: #0 0x10d3996d3 in wrap_strdup+0x203 (libclang_rt.asan_osx_dynamic.dylib:x86_64+0x3e6d3) #1 0x10d276abe in main main.c:147 #2 0x7fff7a06f3d4 in start+0x0 (libdyld.dylib:x86_64+0x163d4) SUMMARY: AddressSanitizer: heap-buffer-overflow netlib_parseurl.c:121 in netlib_parseurl Shadow bytes around the buggy address: 0x1c03ffffffc0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c03ffffffd0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c03ffffffe0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c03fffffff0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c0400000000: fa fa fd fd fa fa fd fd fa fa 00 00 fa fa 00 00 =>0x1c0400000010: fa fa 00 fa fa fa 00 00 fa fa 00 06 fa fa[02]fa 0x1c0400000020: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000030: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000040: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000050: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c0400000060: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Shadow gap: cc ==81622==ABORTING ```
2021-05-07 15:20:34 +09:00
return -EINVAL;
}
src++;
/* Concatenate the hostname following http:// and up to the termnator */
dest = url->host;
bytesleft = url->hostlen;
while (*src != '\0' && *src != '/' && *src != ' ' && *src != ':')
{
/* Make sure that there is space for another character in the
* hostname (reserving space for the null terminator).
*/
if (bytesleft > 1)
{
/* Copy the byte */
*dest++ = *src++;
bytesleft--;
}
else
{
/* Note the error, but continue parsing until the end of the
* hostname
*/
src++;
ret = -E2BIG;
}
}
*dest = '\0';
/* Check if the hostname is following by a port number */
if (*src == ':')
{
uint16_t accum = 0;
src++; /* Skip over the colon */
while (*src >= '0' && *src <= '9')
{
accum = 10*accum + *src - '0';
src++;
}
url->port = accum;
}
/* Make sure the file name starts with exactly one '/' */
dest = url->path;
bytesleft = url->pathlen;
while (*src == '/')
{
src++;
}
*dest++ = '/';
bytesleft--;
/* The copy the rest of the file name to the user buffer */
2020-05-28 17:22:37 +09:00
pathlen = strlen(src);
if (bytesleft >= pathlen + 1)
{
memcpy(dest, src, pathlen);
dest[pathlen] = '\0';
}
else
{
dest[0] = '\0';
ret = -E2BIG;
}
return ret;
}