webclient: Implement chunked transfer (receiving side)

This is a requirement for HTTP 1.1.

Tested against:

* http://httpbin.org/stream/10

* Docker API
This commit is contained in:
YAMAMOTO Takashi 2022-03-07 12:48:59 +09:00 committed by Xiang Xiao
parent 092ce81444
commit 44e80ac972
2 changed files with 406 additions and 48 deletions

View File

@ -133,6 +133,9 @@
* buflen - A pointer to the length of the buffer. If the callee wishes
* to change the size of the buffer, it may write to buflen.
* arg - User argument passed to callback.
*
* Note: changing buffer address and/or size is only allowed for HTTP 1.0.
* It's not allowed for HTTP 1.1.
*/
typedef void (*wget_callback_t)(FAR char **buffer, int offset,

View File

@ -154,6 +154,11 @@ enum webclient_state_e
WEBCLIENT_STATE_STATUSLINE,
WEBCLIENT_STATE_HEADERS,
WEBCLIENT_STATE_DATA,
WEBCLIENT_STATE_CHUNKED_HEADER,
WEBCLIENT_STATE_CHUNKED_DATA,
WEBCLIENT_STATE_CHUNKED_ENDDATA,
WEBCLIENT_STATE_CHUNKED_TRAILER,
WEBCLIENT_STATE_WAIT_CLOSE,
WEBCLIENT_STATE_CLOSE,
WEBCLIENT_STATE_DONE,
};
@ -174,7 +179,8 @@ struct conn_s
/* flags for wget_s::internal_flags */
#define WGET_FLAG_GOT_CONTENT_LENGTH 1
#define WGET_FLAG_GOT_CONTENT_LENGTH 1U
#define WGET_FLAG_CHUNKED 2U
struct wget_s
{
@ -202,6 +208,9 @@ struct wget_s
uintmax_t expected_resp_body_len;
uintmax_t received_body_len;
uintmax_t chunk_len;
uintmax_t chunk_received;
#ifdef CONFIG_WEBCLIENT_GETMIMETYPE
char mimetype[CONFIG_WEBCLIENT_MAXMIMESIZE];
#endif
@ -226,13 +235,14 @@ struct wget_s
* Private Data
****************************************************************************/
static const char g_http10[] = "HTTP/1.0";
static const char g_http11[] = "HTTP/1.1";
static const char g_http10[] = "HTTP/1.0";
static const char g_http11[] = "HTTP/1.1";
#ifdef CONFIG_WEBCLIENT_GETMIMETYPE
static const char g_httpcontenttype[] = "content-type: ";
static const char g_httpcontenttype[] = "content-type: ";
#endif
static const char g_httphost[] = "host: ";
static const char g_httplocation[] = "location: ";
static const char g_httphost[] = "host: ";
static const char g_httplocation[] = "location: ";
static const char g_httptransferencoding[] = "transfer-encoding: ";
static const char g_httpuseragentfields[] =
"Connection: close\r\n"
@ -413,13 +423,13 @@ static char *wget_urlencode_strcpy(char *dest, const char *src)
* Name: wget_parseint
****************************************************************************/
static int wget_parseint(const char *cp, uintmax_t *resultp)
static int wget_parseint(const char *cp, uintmax_t *resultp, int base)
{
char *ep;
uintmax_t val;
errno = 0;
val = strtoumax(cp, &ep, 10);
val = strtoumax(cp, &ep, base);
if (cp == ep)
{
return -EINVAL; /* not a number */
@ -459,7 +469,7 @@ static inline int wget_parsestatus(struct webclient_context *ctx,
{
bool got_nl;
ws->line[ndx] = ws->buffer[offset];
ws->line[ndx] = ws->buffer[offset++];
got_nl = ws->line[ndx] == ISO_NL;
if (got_nl || ndx == CONFIG_WEBCLIENT_MAXHTTPLINE - 1)
{
@ -540,12 +550,12 @@ static inline int wget_parsestatus(struct webclient_context *ctx,
*/
ws->state = WEBCLIENT_STATE_HEADERS;
ws->internal_flags &= ~WGET_FLAG_CHUNKED;
ndx = 0;
break;
}
else
{
offset++;
ndx++;
}
}
@ -614,7 +624,7 @@ static inline int wget_parseheaders(struct webclient_context *ctx,
{
bool got_nl;
ws->line[ndx] = ws->buffer[offset];
ws->line[ndx] = ws->buffer[offset++];
got_nl = ws->line[ndx] == ISO_NL;
if (got_nl || ndx == CONFIG_WEBCLIENT_MAXHTTPLINE - 1)
{
@ -649,7 +659,16 @@ static inline int wget_parseheaders(struct webclient_context *ctx,
* actual data.
*/
ws->state = WEBCLIENT_STATE_DATA;
if ((ws->internal_flags & WGET_FLAG_CHUNKED) != 0)
{
ws->state = WEBCLIENT_STATE_CHUNKED_HEADER;
ndx = 0;
}
else
{
ws->state = WEBCLIENT_STATE_DATA;
}
goto exit;
}
@ -730,7 +749,7 @@ static inline int wget_parseheaders(struct webclient_context *ctx,
if (got_nl)
{
ret = wget_parseint(ws->line + strlen(g_httpcontsize),
&ws->expected_resp_body_len);
&ws->expected_resp_body_len, 10);
if (ret != 0)
{
goto exit;
@ -742,6 +761,24 @@ static inline int wget_parseheaders(struct webclient_context *ctx,
ws->expected_resp_body_len);
}
}
else if (strncasecmp(ws->line, g_httptransferencoding,
strlen(g_httptransferencoding)) == 0)
{
/* Parse the new host and filename from the URL.
*/
FAR const char *encodings =
ws->line + strlen(g_httptransferencoding);
if (strcasecmp(encodings, "chunked"))
{
nerr("unknown encodings: '%s'\n", encodings);
return -EPROTO;
}
ninfo("transfer encodings: '%s'\n", encodings);
ws->internal_flags |= WGET_FLAG_CHUNKED;
}
}
if (found && !got_nl)
@ -769,12 +806,229 @@ static inline int wget_parseheaders(struct webclient_context *ctx,
{
ndx++;
}
offset++;
}
exit:
ws->offset = ++offset;
ws->offset = offset;
ws->ndx = ndx;
return ret;
}
/****************************************************************************
* Name: wget_parsechunkheader
****************************************************************************/
static inline int wget_parsechunkheader(struct webclient_context *ctx,
struct wget_s *ws)
{
int offset;
int ndx;
int ret = OK;
offset = ws->offset;
ndx = ws->ndx;
while (offset < ws->datend)
{
bool got_nl;
ws->line[ndx] = ws->buffer[offset++];
got_nl = ws->line[ndx] == ISO_NL;
if (got_nl || ndx == CONFIG_WEBCLIENT_MAXHTTPLINE - 1)
{
bool found_extension = false;
/* We have an entire header line in ws->line, or
* our buffer is already full, so we start parsing it.
*/
if (ndx > 0) /* Should always be true */
{
FAR char *semicolon;
ninfo("Got chunk header line%s: %.*s\n",
got_nl ? "" : " (truncated)",
ndx - 1, &ws->line[0]);
if (ws->line[0] == ISO_CR)
{
nerr("ERROR: empty chunk header\n");
ret = -EPROTO;
break;
}
/* Truncate the trailing \r\n */
if (got_nl)
{
ndx--;
if (ws->line[ndx] != ISO_CR)
{
nerr("ERROR: unexpected EOL from the server\n");
ret = -EPROTO;
break;
}
}
ws->line[ndx] = '\0';
semicolon = strchr(ws->line, ';');
if (semicolon != NULL)
{
found_extension = true;
ninfo("Ignoring extentions in chunk header\n");
*semicolon = 0;
}
}
if (!got_nl && !found_extension)
{
/* We found something we might care.
* but we couldn't process it correctly.
*/
nerr("ERROR: truncated a header due to "
"small CONFIG_WEBCLIENT_MAXHTTPLINE\n");
ret = -E2BIG;
break;
}
ret = wget_parseint(ws->line, &ws->chunk_len, 16);
if (ret != 0)
{
break;
}
if (ws->chunk_len != 0)
{
ninfo("Receiving a chunk with %ju bytes\n", ws->chunk_len);
ws->state = WEBCLIENT_STATE_CHUNKED_DATA;
ws->chunk_received = 0;
}
else
{
ws->state = WEBCLIENT_STATE_CHUNKED_TRAILER;
}
ndx = 0;
break;
}
else
{
ndx++;
}
}
ws->offset = offset;
ws->ndx = ndx;
return ret;
}
/****************************************************************************
* Name: wget_parsechunkenddata
****************************************************************************/
static inline int wget_parsechunkenddata(struct webclient_context *ctx,
struct wget_s *ws)
{
int offset;
int ndx;
int ret = OK;
offset = ws->offset;
ndx = ws->ndx;
while (offset < ws->datend)
{
ws->line[ndx] = ws->buffer[offset++];
if (ws->line[ndx] == ISO_NL)
{
if (ndx == 0)
{
ret = -EPROTO;
break;
}
if (ws->line[ndx - 1] != ISO_CR)
{
ret = -EPROTO;
break;
}
if (ndx != 1)
{
ret = -EPROTO;
break;
}
if (ws->chunk_len == 0)
{
ws->state = WEBCLIENT_STATE_CHUNKED_TRAILER;
}
else
{
ws->state = WEBCLIENT_STATE_CHUNKED_HEADER;
}
ndx = 0;
break;
}
ndx++;
}
ws->offset = offset;
ws->ndx = ndx;
return ret;
}
/****************************************************************************
* Name: wget_parsechunktrailer
****************************************************************************/
static inline int wget_parsechunktrailer(struct webclient_context *ctx,
struct wget_s *ws)
{
int offset;
int ndx;
int ret = OK;
offset = ws->offset;
ndx = ws->ndx;
while (offset < ws->datend)
{
ws->line[ndx] = ws->buffer[offset++];
if (ws->line[ndx] == ISO_NL)
{
if (ndx == 0)
{
ret = -EPROTO;
break;
}
if (ws->line[ndx - 1] != ISO_CR)
{
ret = -EPROTO;
break;
}
if (ndx != 1)
{
/* Ignore all non empty lines. */
ndx = 0;
continue;
}
ws->state = WEBCLIENT_STATE_WAIT_CLOSE;
break;
}
ndx++;
}
ws->offset = offset;
ws->ndx = ndx;
return ret;
}
@ -1293,46 +1547,60 @@ int webclient_perform(FAR struct webclient_context *ctx)
if (ws->state == WEBCLIENT_STATE_STATUSLINE ||
ws->state == WEBCLIENT_STATE_HEADERS ||
ws->state == WEBCLIENT_STATE_DATA)
ws->state == WEBCLIENT_STATE_DATA ||
ws->state == WEBCLIENT_STATE_CHUNKED_HEADER ||
ws->state == WEBCLIENT_STATE_CHUNKED_DATA)
{
for (; ; )
{
ws->datend = conn_recv(ctx, conn, ws->buffer, ws->buflen);
if (ws->datend < 0)
if (ws->datend - ws->offset == 0)
{
ret = ws->datend;
nerr("ERROR: recv failed: %d\n", -ret);
goto errout_with_errno;
}
else if (ws->datend == 0)
{
if (ws->state != WEBCLIENT_STATE_DATA)
ssize_t ssz;
ninfo("Reading new data\n");
ssz = conn_recv(ctx, conn, ws->buffer, ws->buflen);
if (ssz < 0)
{
nerr("Connection lost unexpectedly\n");
ret = -ECONNABORTED;
ret = ssz;
nerr("ERROR: recv failed: %d\n", -ret);
goto errout_with_errno;
}
if ((ws->internal_flags &
WGET_FLAG_GOT_CONTENT_LENGTH) != 0 &&
ws->expected_resp_body_len != ws->received_body_len)
else if (ssz == 0)
{
nerr("Unexpected response body length: %ju != %ju\n",
ws->expected_resp_body_len,
ws->received_body_len);
ret = -EPROTO;
goto errout_with_errno;
if (ws->state != WEBCLIENT_STATE_DATA &&
ws->state != WEBCLIENT_STATE_WAIT_CLOSE)
{
nerr("Connection lost unexpectedly\n");
ret = -ECONNABORTED;
goto errout_with_errno;
}
if ((ws->internal_flags &
WGET_FLAG_GOT_CONTENT_LENGTH) != 0 &&
ws->expected_resp_body_len !=
ws->received_body_len)
{
nerr("Unexpected response body length: "
"%ju != %ju\n",
ws->expected_resp_body_len,
ws->received_body_len);
ret = -EPROTO;
goto errout_with_errno;
}
ninfo("Connection lost\n");
ws->state = WEBCLIENT_STATE_CLOSE;
ws->redirected = 0;
break;
}
ninfo("Connection lost\n");
ws->state = WEBCLIENT_STATE_CLOSE;
ws->redirected = 0;
break;
ninfo("Got %zd bytes data\n", ssz);
ws->offset = 0;
ws->datend = ssz;
}
/* Handle initial parsing of the status line */
ws->offset = 0;
if (ws->state == WEBCLIENT_STATE_STATUSLINE)
{
ret = wget_parsestatus(ctx, ws);
@ -1353,29 +1621,88 @@ int webclient_perform(FAR struct webclient_context *ctx)
}
}
/* Parse the chunk header */
if (ws->state == WEBCLIENT_STATE_CHUNKED_HEADER)
{
ret = wget_parsechunkheader(ctx, ws);
if (ret < 0)
{
goto errout_with_errno;
}
}
if (ws->state == WEBCLIENT_STATE_CHUNKED_ENDDATA)
{
ret = wget_parsechunkenddata(ctx, ws);
if (ret < 0)
{
goto errout_with_errno;
}
}
if (ws->state == WEBCLIENT_STATE_CHUNKED_TRAILER)
{
ret = wget_parsechunktrailer(ctx, ws);
if (ret < 0)
{
goto errout_with_errno;
}
}
if (ws->state == WEBCLIENT_STATE_WAIT_CLOSE)
{
uintmax_t received = ws->datend - ws->offset;
if (received != 0)
{
nerr("Unexpected %ju bytes data received", received);
ret = -EPROTO;
goto errout_with_errno;
}
}
/* Dispose of the data payload */
if (ws->state == WEBCLIENT_STATE_DATA)
if (ws->state == WEBCLIENT_STATE_DATA ||
ws->state == WEBCLIENT_STATE_CHUNKED_DATA)
{
if (ws->httpstatus != HTTPSTATUS_MOVED)
{
uintmax_t received = ws->datend - ws->offset;
FAR char *orig_buffer = ws->buffer;
int orig_buflen = ws->buflen;
if (ws->state == WEBCLIENT_STATE_CHUNKED_DATA)
{
uintmax_t chunk_left =
ws->chunk_len - ws->chunk_received;
if (received > chunk_left)
{
received = chunk_left;
}
ws->chunk_received += received;
}
ninfo("Processing resp body %ju - %ju\n",
ws->received_body_len,
ws->received_body_len + ws->datend - ws->offset);
ws->received_body_len += ws->datend - ws->offset;
ws->received_body_len + received);
ws->received_body_len += received;
/* Let the client decide what to do with the
* received file.
*/
if (ws->offset == ws->datend)
if (received == 0)
{
/* We don't have data to give to the client yet. */
}
else if (ctx->sink_callback)
{
ret = ctx->sink_callback(&ws->buffer, ws->offset,
ws->datend, &ws->buflen,
ws->offset + received,
&ws->buflen,
ctx->sink_callback_arg);
if (ret != 0)
{
@ -1384,9 +1711,37 @@ int webclient_perform(FAR struct webclient_context *ctx)
}
else
{
ctx->callback(&ws->buffer, ws->offset, ws->datend,
ctx->callback(&ws->buffer, ws->offset,
ws->offset + received,
&ws->buflen, ctx->sink_callback_arg);
}
ws->offset += received;
/* The buffer swapping API doesn't work for
* HTTP 1.1 chunked transfer because the buffer here
* might already contain the next chunk header.
*/
if (ctx->protocol_version ==
WEBCLIENT_PROTOCOL_VERSION_HTTP_1_1)
{
if (orig_buffer != ws->buffer ||
orig_buflen != ws->buflen)
{
ret = -EINVAL;
goto errout_with_errno;
}
}
if (ws->state == WEBCLIENT_STATE_CHUNKED_DATA)
{
if (ws->chunk_len == ws->chunk_received)
{
ws->state = WEBCLIENT_STATE_CHUNKED_ENDDATA;
ws->ndx = 0;
}
}
}
else
{