/**************************************************************************** * netutils/webclient/webclient.c * Implementation of the HTTP client. * * Copyright (C) 2007, 2009, 2011-2012, 2014, 2020 Gregory Nutt. * All rights reserved. * Author: Gregory Nutt * * Based on uIP which also has a BSD style license: * * Author: Adam Dunkels * Copyright (c) 2002, Adam Dunkels. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ****************************************************************************/ /* This example shows a HTTP client that is able to download web pages * and files from web servers. It requires a number of callback * functions to be implemented by the module that utilizes the code: * webclient_datahandler(). */ /**************************************************************************** * Included Files ****************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netutils/netlib.h" #include "netutils/webclient.h" #if defined(CONFIG_NETUTILS_CODECS) # if defined(CONFIG_CODECS_URLCODE) # define WGET_USE_URLENCODE 1 # include "netutils/urldecode.h" # endif # if defined(CONFIG_CODECS_BASE64) # include "netutils/base64.h" # endif #else # undef CONFIG_CODECS_URLCODE # undef CONFIG_CODECS_BASE64 #endif #ifndef CONFIG_NSH_WGET_USERAGENT # if CONFIG_VERSION_MAJOR != 0 || CONFIG_VERSION_MINOR != 0 # define CONFIG_NSH_WGET_USERAGENT \ "NuttX/" CONFIG_VERSION_STRING " (; http://www.nuttx.org/)" # else # define CONFIG_NSH_WGET_USERAGENT \ "NuttX/6.xx.x (; http://www.nuttx.org/)" # endif #endif /**************************************************************************** * Pre-processor Definitions ****************************************************************************/ #ifndef CONFIG_WEBCLIENT_TIMEOUT # define CONFIG_WEBCLIENT_TIMEOUT 10 #endif #ifndef CONFIG_WEBCLIENT_MAX_REDIRECT /* The default value 50 is taken from curl's --max-redirs option. */ # define CONFIG_WEBCLIENT_MAX_REDIRECT 50 #endif #define WEBCLIENT_STATE_STATUSLINE 0 #define WEBCLIENT_STATE_HEADERS 1 #define WEBCLIENT_STATE_DATA 2 #define WEBCLIENT_STATE_CLOSE 3 #define HTTPSTATUS_NONE 0 #define HTTPSTATUS_OK 1 #define HTTPSTATUS_MOVED 2 #define HTTPSTATUS_ERROR 3 #define ISO_NL 0x0a #define ISO_CR 0x0d #define ISO_SPACE 0x20 #define WGET_MODE_GET 0 #define WGET_MODE_POST 1 /**************************************************************************** * Private Types ****************************************************************************/ struct wget_s { /* Internal status */ uint8_t state; uint8_t httpstatus; uint16_t port; /* The port number to use in the connection */ /* These describe the just-received buffer of data */ FAR char *buffer; /* user-provided buffer */ int buflen; /* Length of the user provided buffer */ int offset; /* Offset to the beginning of interesting data */ int datend; /* Offset+1 to the last valid byte of data in the buffer */ /* Buffer HTTP header data and parse line at a time */ char line[CONFIG_WEBCLIENT_MAXHTTPLINE]; int ndx; #ifdef CONFIG_WEBCLIENT_GETMIMETYPE char mimetype[CONFIG_WEBCLIENT_MAXMIMESIZE]; #endif char scheme[sizeof("https") + 1]; char hostname[CONFIG_WEBCLIENT_MAXHOSTNAME]; char filename[CONFIG_WEBCLIENT_MAXFILENAME]; }; /**************************************************************************** * Private Data ****************************************************************************/ static const char g_http10[] = "HTTP/1.0"; static const char g_http11[] = "HTTP/1.1"; #ifdef CONFIG_WEBCLIENT_GETMIMETYPE static const char g_httpcontenttype[] = "content-type: "; #endif static const char g_httphost[] = "host: "; static const char g_httplocation[] = "location: "; static const char g_httpget[] = "GET "; static const char g_httppost[] = "POST "; static const char g_httpuseragentfields[] = "Connection: close\r\n" "User-Agent: " CONFIG_NSH_WGET_USERAGENT "\r\n\r\n"; static const char g_http200[] = "200 "; static const char g_http301[] = "301 "; static const char g_http302[] = "302 "; static const char g_httpcrnl[] = "\r\n"; static const char g_httpform[] = "Content-Type: " "application/x-www-form-urlencoded"; static const char g_httpcontsize[] = "Content-Length: "; #if 0 static const char g_httpconn[] = "Connection: Keep-Alive"; static const char g_httpcache[] = "Cache-Control: no-cache"; #endif /**************************************************************************** * Private Functions ****************************************************************************/ /**************************************************************************** * Name: wget_strcpy ****************************************************************************/ static char *wget_strcpy(char *dest, const char *src) { int len = strlen(src); memcpy(dest, src, len); dest[len] = '\0'; return dest + len; } /**************************************************************************** * Name: wget_urlencode_strcpy ****************************************************************************/ #ifdef WGET_USE_URLENCODE static char *wget_urlencode_strcpy(char *dest, const char *src) { int len = strlen(src); int d_len; d_len = urlencode_len(src, len); urlencode(src, len, dest, &d_len); return dest + d_len; } #endif /**************************************************************************** * Name: wget_parsestatus ****************************************************************************/ static inline int wget_parsestatus(struct wget_s *ws) { int offset; int ndx; char *dest; offset = ws->offset; ndx = ws->ndx; while (offset < ws->datend) { ws->line[ndx] = ws->buffer[offset]; if (ws->line[ndx] == ISO_NL) { ws->line[ndx] = '\0'; if ((strncmp(ws->line, g_http10, strlen(g_http10)) == 0) || (strncmp(ws->line, g_http11, strlen(g_http11)) == 0)) { dest = &(ws->line[9]); ws->httpstatus = HTTPSTATUS_NONE; /* Check for 200 OK */ if (strncmp(dest, g_http200, strlen(g_http200)) == 0) { ws->httpstatus = HTTPSTATUS_OK; } /* Check for 301 Moved permanently or 302 Found. * Location: header line will contain the new location. */ else if (strncmp(dest, g_http301, strlen(g_http301)) == 0 || strncmp(dest, g_http302, strlen(g_http302)) == 0) { ws->httpstatus = HTTPSTATUS_MOVED; } } else { return - ECONNABORTED; } /* We're done parsing the status line, so start parsing * the HTTP headers. */ ws->state = WEBCLIENT_STATE_HEADERS; break; } else { offset++; ndx++; } } ws->offset = offset; ws->ndx = ndx; return OK; } /**************************************************************************** * Name: parseurl ****************************************************************************/ static int parseurl(FAR const char *url, FAR struct wget_s *ws) { struct url_s url_s; int ret; memset(&url_s, 0, sizeof(url_s)); url_s.scheme = ws->scheme; url_s.schemelen = sizeof(ws->scheme); url_s.host = ws->hostname; url_s.hostlen = sizeof(ws->hostname); url_s.path = ws->filename; url_s.pathlen = sizeof(ws->filename); ret = netlib_parseurl(url, &url_s); if (ret < 0) { return ret; } if (url_s.port == 0) { ws->port = 80; } else { ws->port = url_s.port; } return 0; } /**************************************************************************** * Name: wget_parseheaders ****************************************************************************/ static inline int wget_parseheaders(struct wget_s *ws) { int offset; int ndx; int ret = OK; offset = ws->offset; ndx = ws->ndx; while (offset < ws->datend) { ws->line[ndx] = ws->buffer[offset]; if (ws->line[ndx] == ISO_NL) { /* We have an entire HTTP header line in s.line, so * we parse it. */ if (ndx > 0) /* Should always be true */ { if (ws->line[0] == ISO_CR) { /* This was the last header line (i.e., and empty "\r\n"), * so we are done with the headers and proceed with the * actual data. */ ws->state = WEBCLIENT_STATE_DATA; goto exit; } /* Truncate the trailing \r\n */ ws->line[ndx - 1] = '\0'; /* Check for specific HTTP header fields. */ #ifdef CONFIG_WEBCLIENT_GETMIMETYPE if (strncasecmp(ws->line, g_httpcontenttype, strlen(g_httpcontenttype)) == 0) { /* Found Content-type field. */ char *dest = strchr(ws->line, ';'); if (dest != NULL) { *dest = 0; } strncpy(ws->mimetype, ws->line + strlen(g_httpcontenttype), sizeof(ws->mimetype)); } else #endif if (strncasecmp(ws->line, g_httplocation, strlen(g_httplocation)) == 0) { /* Parse the new host and filename from the URL. */ ninfo("Redirect to location: '%s'\n", ws->line + strlen(g_httplocation)); ret = parseurl(ws->line + strlen(g_httplocation), ws); ninfo("New hostname='%s' filename='%s'\n", ws->hostname, ws->filename); } } /* We're done parsing this line, so we reset the index to the start * of the next line. */ ndx = 0; } else { ndx++; } offset++; } exit: ws->offset = ++offset; ws->ndx = ndx; return ret; } /**************************************************************************** * Name: wget_gethostip * * Description: * Call getaddrinfo() to get the IPv4 address associated with a hostname. * * Input Parameters * hostname - The host name to use in the nslookup. * * Output Parameters * dest - The location to return the IPv4 address. * * Returned Value: * Zero (OK) on success; ERROR on failure. * ****************************************************************************/ static int wget_gethostip(FAR char *hostname, FAR struct in_addr *dest) { #ifdef CONFIG_LIBC_NETDB FAR struct addrinfo hint; FAR struct addrinfo *info; FAR struct sockaddr_in *addr; memset(&hint, 0, sizeof(hint)); hint.ai_family = AF_INET; if (getaddrinfo(hostname, NULL, &hint, &info) != OK) { return ERROR; } addr = (FAR struct sockaddr_in *)info->ai_addr; memcpy(dest, &addr->sin_addr, sizeof(struct in_addr)); freeaddrinfo(info); return OK; #else /* No host name support */ /* Convert strings to numeric IPv4 address */ int ret = inet_pton(AF_INET, hostname, dest); /* The inet_pton() function returns 1 if the conversion succeeds. It will * return 0 if the input is not a valid IPv4 dotted-decimal string or -1 * with errno set to EAFNOSUPPORT if the address family argument is * unsupported. */ return (ret > 0) ? OK : ERROR; #endif } /**************************************************************************** * Name: wget_base * * Description: * Obtain the requested file from an HTTP server using the GET method. * * Input Parameters * url - A pointer to a string containing either the full URL to * the file to get (e.g., http://www.nutt.org/index.html, or * http://192.168.23.1:80/index.html). * buffer - A user provided buffer to receive the file data (also * used for the outgoing GET request * buflen - The size of the user provided buffer * callback - As data is obtained from the host, this function is * to dispose of each block of file data as it is received. * arg - User argument passed to callback. * mode - Indicates GET or POST modes * * Returned Value: * 0: if the GET operation completed successfully; * -1: On a failure with errno set appropriately * ****************************************************************************/ static int wget_base(FAR const char *url, FAR char *buffer, int buflen, wget_callback_t callback, FAR void *arg, FAR const char *posts, uint8_t mode) { struct sockaddr_in server; struct wget_s *ws; struct timeval tv; bool redirected; unsigned int nredirect = 0; char *dest; int sockfd; int len; int ret; /* Initialize the state structure */ ws = calloc(1, sizeof(struct wget_s)); if (!ws) { return ERROR; } ws->buffer = buffer; ws->buflen = buflen; /* Parse the hostname (with optional port number) and filename * from the URL. */ ret = parseurl(url, ws); if (ret != 0) { nwarn("WARNING: Malformed URL: %s\n", url); free(ws); errno = -ret; return ERROR; } ninfo("hostname='%s' filename='%s'\n", ws->hostname, ws->filename); /* The following sequence may repeat indefinitely if we are redirected */ do { if (strcmp(ws->scheme, "http")) { nerr("ERROR: unsupported scheme: %s\n", ws->scheme); free(ws); return ERROR; } /* Re-initialize portions of the state structure that could have * been left from the previous time through the loop and should not * persist with the new connection. */ ws->httpstatus = HTTPSTATUS_NONE; ws->offset = 0; ws->datend = 0; ws->ndx = 0; /* Create a socket */ sockfd = socket(AF_INET, SOCK_STREAM, 0); if (sockfd < 0) { /* socket failed. It will set the errno appropriately */ nerr("ERROR: socket failed: %d\n", errno); free(ws); return ERROR; } /* Set send and receive timeout values */ tv.tv_sec = CONFIG_WEBCLIENT_TIMEOUT; tv.tv_usec = 0; setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, (FAR const void *)&tv, sizeof(struct timeval)); setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (FAR const void *)&tv, sizeof(struct timeval)); /* Get the server address from the host name */ server.sin_family = AF_INET; server.sin_port = htons(ws->port); ret = wget_gethostip(ws->hostname, &server.sin_addr); if (ret < 0) { /* Could not resolve host (or malformed IP address) */ nwarn("WARNING: Failed to resolve hostname\n"); ret = -EHOSTUNREACH; goto errout_with_errno; } /* Connect to server. First we have to set some fields in the * 'server' address structure. The system will assign me an arbitrary * local port that is not in use. */ ret = connect(sockfd, (struct sockaddr *)&server, sizeof(struct sockaddr_in)); if (ret < 0) { nerr("ERROR: connect failed: %d\n", errno); goto errout; } /* Send the GET request */ dest = ws->buffer; if (mode == WGET_MODE_POST) { dest = wget_strcpy(dest, g_httppost); } else { dest = wget_strcpy(dest, g_httpget); } #ifndef WGET_USE_URLENCODE dest = wget_strcpy(dest, ws->filename); #else /* TODO: should we use wget_urlencode_strcpy? */ dest = wget_strcpy(dest, ws->filename); #endif *dest++ = ISO_SPACE; dest = wget_strcpy(dest, g_http10); dest = wget_strcpy(dest, g_httpcrnl); dest = wget_strcpy(dest, g_httphost); dest = wget_strcpy(dest, ws->hostname); dest = wget_strcpy(dest, g_httpcrnl); if (mode == WGET_MODE_POST) { int post_len; char post_size[8]; dest = wget_strcpy(dest, g_httpform); dest = wget_strcpy(dest, g_httpcrnl); dest = wget_strcpy(dest, g_httpcontsize); /* Post content size */ post_len = strlen((char *)posts); sprintf(post_size, "%d", post_len); dest = wget_strcpy(dest, post_size); dest = wget_strcpy(dest, g_httpcrnl); } dest = wget_strcpy(dest, g_httpuseragentfields); if (mode == WGET_MODE_POST) { dest = wget_strcpy(dest, (char *)posts); } len = dest - buffer; ret = send(sockfd, buffer, len, 0); if (ret < 0) { nerr("ERROR: send failed: %d\n", errno); goto errout; } /* Now loop to get the file sent in response to the GET. This * loop continues until either we read the end of file (nbytes == 0) * or until we detect that we have been redirected. */ ws->state = WEBCLIENT_STATE_STATUSLINE; redirected = false; for (; ; ) { ws->datend = recv(sockfd, ws->buffer, ws->buflen, 0); if (ws->datend < 0) { nerr("ERROR: recv failed: %d\n", errno); goto errout; } else if (ws->datend == 0) { ninfo("Connection lost\n"); close(sockfd); break; } /* Handle initial parsing of the status line */ ws->offset = 0; if (ws->state == WEBCLIENT_STATE_STATUSLINE) { ret = wget_parsestatus(ws); if (ret < 0) { goto errout_with_errno; } } /* Parse the HTTP data */ if (ws->state == WEBCLIENT_STATE_HEADERS) { ret = wget_parseheaders(ws); if (ret < 0) { goto errout_with_errno; } } /* Dispose of the data payload */ if (ws->state == WEBCLIENT_STATE_DATA) { if (ws->httpstatus != HTTPSTATUS_MOVED) { /* Let the client decide what to do with the * received file. */ callback(&ws->buffer, ws->offset, ws->datend, &buflen, arg); } else { redirected = true; nredirect++; if (nredirect > CONFIG_WEBCLIENT_MAX_REDIRECT) { nerr("ERROR: too many redirects (%u)\n", nredirect); goto errout; } close(sockfd); break; } } } } while (redirected); free(ws); return OK; errout_with_errno: errno = -ret; errout: close(sockfd); free(ws); return ERROR; } /**************************************************************************** * Public Functions ****************************************************************************/ /**************************************************************************** * Name: web_post_str ****************************************************************************/ #ifdef WGET_USE_URLENCODE char *web_post_str(FAR char *buffer, int *size, FAR char *name, FAR char *value) { char *dst = buffer; buffer = wget_strcpy(buffer, name); buffer = wget_strcpy(buffer, "="); buffer = wget_urlencode_strcpy(buffer, value); *size = buffer - dst; return dst; } #endif /**************************************************************************** * Name: web_post_strlen ****************************************************************************/ #ifdef WGET_USE_URLENCODE int web_post_strlen(FAR char *name, FAR char *value) { return strlen(name) + urlencode_len(value, strlen(value)) + 1; } #endif /**************************************************************************** * Name: web_posts_str ****************************************************************************/ #ifdef WGET_USE_URLENCODE char *web_posts_str(FAR char *buffer, int *size, FAR char **name, FAR char **value, int len) { char *dst = buffer; int wlen; int i; for (i = 0; i < len; i++) { if (i > 0) { buffer = wget_strcpy(buffer, "&"); } wlen = *size; buffer = web_post_str(buffer, &wlen, name[i], value[i]); buffer += wlen; } *size = buffer - dst; return dst; } #endif /**************************************************************************** * Name: web_posts_strlen ****************************************************************************/ #ifdef WGET_USE_URLENCODE int web_posts_strlen(FAR char **name, FAR char **value, int len) { int wlen = 0; int i; for (i = 0; i < len; i++) { wlen += web_post_strlen(name[i], value[i]); } return wlen + len - 1; } #endif /**************************************************************************** * Name: wget * * Description: * Obtain the requested file from an HTTP server using the GET method. * * Input Parameters * url - A pointer to a string containing either the full URL to * the file to get (e.g., http://www.nutt.org/index.html, or * http://192.168.23.1:80/index.html). * buffer - A user provided buffer to receive the file data (also * used for the outgoing GET request * buflen - The size of the user provided buffer * callback - As data is obtained from the host, this function is * to dispose of each block of file data as it is received. * arg - User argument passed to callback. * * Returned Value: * 0: if the GET operation completed successfully; * -1: On a failure with errno set appropriately * ****************************************************************************/ int wget(FAR const char *url, FAR char *buffer, int buflen, wget_callback_t callback, FAR void *arg) { return wget_base(url, buffer, buflen, callback, arg, NULL, WGET_MODE_GET); } /**************************************************************************** * Name: wget_post ****************************************************************************/ int wget_post(FAR const char *url, FAR const char *posts, FAR char *buffer, int buflen, wget_callback_t callback, FAR void *arg) { return wget_base(url, buffer, buflen, callback, arg, posts, WGET_MODE_POST); }