nuttx/net/ipfrag/ipfrag.c
wangchen 2e6d0815b2 nuttx/net:Support to PMTUD
RFC 1191
When a router is unable to forward a datagram because it exceeds the
   MTU of the next-hop network and its Don't Fragment bit is set, the
   router is required to return an ICMP Destination Unreachable message
   to the source of the datagram, with the Code indicating
   "fragmentation needed and DF set".  To support the Path MTU Discovery
   technique specified in this memo, the router MUST include the MTU of
   that next-hop network in the low-order 16 bits of the ICMP header
   field that is labelled "unused" in the ICMP specification [7].  The
   high-order 16 bits remain unused, and MUST be set to zero.  Thus, the
   message has the following format:

       0                   1                   2                   3
       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
      |   Type = 3    |   Code = 4    |           Checksum            |
      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
      |           unused = 0          |         Next-Hop MTU          |
      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
      |      Internet Header + 64 bits of Original Datagram Data      |
      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
RFC 1185
Packet Too Big Message

       0                   1                   2                   3
       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
      |     Type      |     Code      |          Checksum             |
      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
      |                             MTU                               |
      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
      |                    As much of invoking packet                 |
      +               as will fit without the ICMPv6 packet           +
      |                       exceeding 576 octets                    |

   IPv6 Fields:

   Destination Address

                  Copied from the Source Address field of the invoking
                  packet.

   ICMPv6 Fields:

   Type           2

   Code           0

   MTU            The Maximum Transmission Unit of the next-hop link.

   Description

   A Packet Too Big MUST be sent by a router in response to a packet
   that it cannot forward because the packet is larger than the MTU of
   the outgoing link.  The information in this message is used as part
   of the Path MTU Discovery process [RFC-1191].

Signed-off-by: wangchen <wangchen41@xiaomi.com>
2023-05-12 10:21:52 -03:00

1284 lines
34 KiB
C

/****************************************************************************
* net/ipfrag/ipfrag.c
* Handling incoming IPv4 and IPv6 fragment input
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <nuttx/config.h>
#if (defined(CONFIG_NET_IPv4) || defined(CONFIG_NET_IPv6)) && \
defined(CONFIG_NET_IPFRAG)
#include <sys/ioctl.h>
#include <stdint.h>
#include <stdlib.h>
#include <debug.h>
#include <string.h>
#include <errno.h>
#include <netinet/in.h>
#include <net/if.h>
#include <nuttx/nuttx.h>
#include <nuttx/wqueue.h>
#include <nuttx/kmalloc.h>
#include <nuttx/net/netconfig.h>
#include <nuttx/net/netdev.h>
#include <nuttx/net/netstats.h>
#include <nuttx/net/ip.h>
#include <nuttx/net/ipv6ext.h>
#include "netdev/netdev.h"
#include "inet/inet.h"
#include "icmp/icmp.h"
#include "icmpv6/icmpv6.h"
#include "ipfrag.h"
/****************************************************************************
* Pre-processor Definitions
****************************************************************************/
#define GOTO_IF(expression, to) \
if (expression) \
{ \
goto to; \
}
#define UPDATE_IOB(iob, off, len) \
do \
{ \
(iob)->io_offset = (off); \
(iob)->io_len = (len); \
(iob)->io_pktlen = (len); \
} while (0);
/* Defined the minimal timeout interval to avoid triggering timeout timer
* too frequently, default: 0.5 seconds.
*/
#define REASSEMBLY_TIMEOUT_MINIMAL 5
#if CONFIG_NET_IPFRAG_REASS_MAXAGE < REASSEMBLY_TIMEOUT_MINIMAL
# define REASSEMBLY_TIMEOUT REASSEMBLY_TIMEOUT_MINIMAL
#else
# define REASSEMBLY_TIMEOUT CONFIG_NET_IPFRAG_REASS_MAXAGE
#endif
#define REASSEMBLY_TIMEOUT_MINIMALTICKS DSEC2TICK(REASSEMBLY_TIMEOUT_MINIMAL)
#define REASSEMBLY_TIMEOUT_TICKS DSEC2TICK(REASSEMBLY_TIMEOUT)
#define IPFRAGWORK LPWORK
/* Helper macro to count I/O buffer count for a given I/O buffer chain */
#define IOBUF_CNT(ptr) (((ptr)->io_pktlen + CONFIG_IOB_BUFSIZE - 1)/ \
CONFIG_IOB_BUFSIZE)
/* The maximum I/O buffer occupied by fragment reassembly cache */
#define REASSEMBLY_MAXOCCUPYIOB CONFIG_IOB_NBUFFERS / 5
/* Deciding whether to fragment outgoing packets which target is to ourself */
#define LOOPBACK_IPFRAME_NOFRAGMENT 0
/****************************************************************************
* Private Data
****************************************************************************/
/* A timeout timer used to start a worker which is used to check
* whether the assembly time of those fragments within one node is expired,
* if so, free all resources of this node.
*/
static struct wdog_s g_wdfragtimeout;
/* Reassembly timeout work */
static struct work_s g_wkfragtimeout;
/* Remember the number of I/O buffers currently in reassembly cache */
static uint8_t g_bufoccupy;
/* Queue header definition, it links all fragments of all NICs by ascending
* ipid.
*/
static sq_queue_t g_assemblyhead_ipid;
/* Queue header definition, which connects all fragments of all NICs in order
* of addition time.
*/
static sq_queue_t g_assemblyhead_time;
/****************************************************************************
* Public Data
****************************************************************************/
/* Only one thread can access g_assemblyhead_ipid and g_assemblyhead_time
* at a time.
*/
mutex_t g_ipfrag_lock = NXMUTEX_INITIALIZER;
/****************************************************************************
* Private Function Prototypes
****************************************************************************/
static void ip_fragin_timerout_expiry(wdparm_t arg);
static void ip_fragin_timerwork(FAR void *arg);
static inline FAR struct ip_fraglink_s *
ip_fragin_freelink(FAR struct ip_fraglink_s *fraglink);
static void ip_fragin_check(FAR struct ip_fragsnode_s *fragsnode);
static void ip_fragin_cachemonitor(FAR struct ip_fragsnode_s *curnode);
static inline FAR struct iob_s *
ip_fragout_allocfragbuf(FAR struct iob_queue_s *fragq);
/****************************************************************************
* Private Functions
****************************************************************************/
/****************************************************************************
* Name: ip_fragin_timerout_expiry
*
* Description:
* Schedule the timeout checking and handling on the low priority work
* queue.
*
* Returned Value:
* None
*
****************************************************************************/
static void ip_fragin_timerout_expiry(wdparm_t arg)
{
assert(g_wkfragtimeout.worker == NULL);
work_queue(IPFRAGWORK, &g_wkfragtimeout, ip_fragin_timerwork, NULL, 0);
}
/****************************************************************************
* Name: ip_fragin_timerwork
*
* Description:
* The really work of fragment timeout checking and handling.
*
* Returned Value:
* None
*
****************************************************************************/
static void ip_fragin_timerwork(FAR void *arg)
{
clock_t curtick = clock_systime_ticks();
sclock_t interval;
FAR sq_entry_t *entry;
FAR sq_entry_t *entrynext;
FAR struct ip_fragsnode_s *node;
ninfo("Start reassembly work queue\n");
nxmutex_lock(&g_ipfrag_lock);
/* Walk through the list, check the timetout and calculate the next timer
* interval
*/
entry = sq_peek(&g_assemblyhead_time);
while (entry != NULL)
{
entrynext = sq_next(entry);
node = (FAR struct ip_fragsnode_s *)
container_of(entry, FAR struct ip_fragsnode_s, flinkat);
/* Check for timeout, be careful with the calculation formula,
* the tick counter may overflow
*/
interval = curtick - node->tick;
if (interval >= REASSEMBLY_TIMEOUT_TICKS)
{
/* If this timeout expires, the partially-reassembled datagram
* MUST be discarded and an ICMP Time Exceeded message sent to
* the source host (if fragment zero has been received).
*/
ninfo("Reassembly timeout occurs!");
#if defined(CONFIG_NET_ICMP) && !defined(CONFIG_NET_ICMP_NO_STACK)
if ((node->verifyflag & IP_FRAGVERIFY_RECVDZEROFRAG) != 0)
{
FAR struct net_driver_s *dev = node->dev;
net_lock();
netdev_iob_replace(dev, node->frags->frag);
node->frags->frag = NULL;
#ifdef CONFIG_NET_IPv4
if (node->frags->isipv4)
{
icmp_reply(dev, ICMP_TIME_EXCEEDED,
ICMP_EXC_FRAGTIME);
}
#endif
#ifdef CONFIG_NET_IPv6
if (!node->frags->isipv4)
{
icmpv6_reply(dev, ICMPv6_PACKET_TIME_EXCEEDED,
ICMPV6_EXC_FRAGTIME, 0);
}
#endif
if (iob_tryadd_queue(dev->d_iob, &dev->d_fragout) == 0)
{
netdev_iob_clear(dev);
/* Send ICMP Time Exceeded message via dev->d_fragout
* queue
*/
ninfo("Send Time Exceeded ICMP%s Message to source "
"host\n", node->frags->isipv4 ? "v4" : "v6");
netdev_txnotify_dev(dev);
}
net_unlock();
}
#endif
/* Remove fragments of this node */
if (node->frags != NULL)
{
FAR struct ip_fraglink_s *fraglink = node->frags;
while (fraglink)
{
fraglink = ip_fragin_freelink(fraglink);
}
}
/* Remove node from single-list and free node memory */
ip_frag_remnode(node);
kmm_free(node);
}
else
{
/* Because fragment nodes have been sorted to g_assemblyhead_time
* according to the added time, so enter here, we can get the
* 'interval' of the earliest time node that has not timed out.
* There is no need to continue the loop here, and use time
* REASSEMBLY_TIMEOUT_TICKS - 'interval' as the input for the next
* Timer starting.
*/
break;
}
entry = entrynext;
}
/* Be sure to start the timer, if there are nodes in the linked list */
if (sq_peek(&g_assemblyhead_time) != NULL)
{
clock_t delay = REASSEMBLY_TIMEOUT_MINIMALTICKS;
/* The interval for the next timer is REASSEMBLY_TIMEOUT_TICKS -
* interval, if it is less than the minimum timeout interval,
* fix it to REASSEMBLY_TIMEOUT_MINIMALTICKS
*/
if (delay < REASSEMBLY_TIMEOUT_TICKS - interval)
{
delay = REASSEMBLY_TIMEOUT_TICKS - interval;
}
ninfo("Reschedule reassembly work queue\n");
wd_start(&g_wdfragtimeout, delay, ip_fragin_timerout_expiry,
(wdparm_t)NULL);
}
else
{
ninfo("Stop reassembly work queue\n");
}
nxmutex_unlock(&g_ipfrag_lock);
}
/****************************************************************************
* Name: ip_fragin_freelink
*
* Description:
* Free the I/O buffer and ip_fraglink_s buffer at the head of a
* ip_fraglink_s chain.
*
* Input Parameters:
* fraglink - node of the lower-level linked list, it maintains
* information of one fragment
*
* Returned Value:
* The link to the next ip_fraglink_s buffer in the chain.
*
****************************************************************************/
static inline FAR struct ip_fraglink_s *
ip_fragin_freelink(FAR struct ip_fraglink_s *fraglink)
{
FAR struct ip_fraglink_s *next = fraglink->flink;
if (fraglink->frag != NULL)
{
iob_free_chain(fraglink->frag);
}
kmm_free(fraglink);
return next;
}
/****************************************************************************
* Name: ip_fragin_check
*
* Description:
* Check whether the zero fragment has been received or all fragments have
* been received.
*
* Input Parameters:
* fragsnode - node of the upper-level linked list, it maintains
* information bout all fragments belonging to an IP datagram
*
* Returned Value:
* None
*
****************************************************************************/
static void ip_fragin_check(FAR struct ip_fragsnode_s *fragsnode)
{
uint16_t formerlen = 0;
FAR struct ip_fraglink_s *entry;
if (fragsnode->verifyflag & IP_FRAGVERIFY_RECVDTAILFRAG)
{
entry = fragsnode->frags;
while (entry != NULL)
{
if (entry->morefrags)
{
formerlen += entry->fraglen;
}
else
{
/* Only the last entry has a 0 morefrags flag */
if (entry->fragoff == formerlen)
{
fragsnode->verifyflag |= IP_FRAGVERIFY_RECVDALLFRAGS;
}
}
entry = entry->flink;
}
}
}
/****************************************************************************
* Name: ip_fragin_cachemonitor
*
* Description:
* Check the reassembly cache buffer size, if it exceeds the configured
* threshold, some I/O buffers need to be freed
*
* Input Parameters:
* curnode - node of the upper-level linked list, it maintains information
* about all fragments belonging to an IP datagram
*
* Returned Value:
* none
*
****************************************************************************/
static void ip_fragin_cachemonitor(FAR struct ip_fragsnode_s *curnode)
{
uint32_t cleancnt = 0;
uint32_t bufcnt;
FAR sq_entry_t *entry;
FAR sq_entry_t *entrynext;
FAR struct ip_fragsnode_s *node;
/* Start cache cleaning if g_bufoccupy exceeds the cache threshold */
if (g_bufoccupy > REASSEMBLY_MAXOCCUPYIOB)
{
cleancnt = g_bufoccupy - REASSEMBLY_MAXOCCUPYIOB;
entry = sq_peek(&g_assemblyhead_time);
while (entry != NULL && cleancnt > 0)
{
entrynext = sq_next(entry);
node = (FAR struct ip_fragsnode_s *)
container_of(entry, FAR struct ip_fragsnode_s, flinkat);
/* Skip specified node */
if (node != curnode)
{
/* Remove fragments of this node */
if (node->frags != NULL)
{
FAR struct ip_fraglink_s *fraglink = node->frags;
while (fraglink != NULL)
{
fraglink = ip_fragin_freelink(fraglink);
}
}
/* Remove node from single-list and free node memory */
bufcnt = ip_frag_remnode(node);
kmm_free(node);
cleancnt = cleancnt > bufcnt ? cleancnt - bufcnt : 0;
}
entry = entrynext;
}
}
}
/****************************************************************************
* Name: ip_fragout_allocfragbuf
*
* Description:
* Prepare one I/O buffer and enqueue it to a specified queue
*
* Input Parameters:
* fragq - the queue head
*
* Returned Value:
* The pointer to I/O buffer
*
****************************************************************************/
static inline FAR struct iob_s *
ip_fragout_allocfragbuf(FAR struct iob_queue_s *fragq)
{
FAR struct iob_s *iob;
iob = iob_tryalloc(false);
if (iob != NULL)
{
if (iob_tryadd_queue(iob, fragq) < 0)
{
iob_free(iob);
iob = NULL;
}
}
return iob;
}
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: ip_frag_remnode
*
* Description:
* free ip_fragsnode_s
*
* Input Parameters:
* node - node of the upper-level linked list, it maintains
* information about all fragments belonging to an IP datagram
*
* Returned Value:
* I/O buffer count of this node
*
****************************************************************************/
uint32_t ip_frag_remnode(FAR struct ip_fragsnode_s *node)
{
g_bufoccupy -= node->bufcnt;
assert(g_bufoccupy < CONFIG_IOB_NBUFFERS);
sq_rem((FAR sq_entry_t *)node, &g_assemblyhead_ipid);
sq_rem((FAR sq_entry_t *)&node->flinkat, &g_assemblyhead_time);
return node->bufcnt;
}
/****************************************************************************
* Name: ip_fragin_enqueue
*
* Description:
* Enqueue one fragment.
* All fragments belonging to one IP frame are organized in a linked list
* form, that is a ip_fragsnode_s node. All ip_fragsnode_s nodes are also
* organized in an upper-level linked list.
*
* Input Parameters:
* dev - NIC Device instance
* curfraglink - node of the lower-level linked list, it maintains
* information of one fragment
*
* Returned Value:
* Whether queue is empty before enqueue the new node
*
****************************************************************************/
bool ip_fragin_enqueue(FAR struct net_driver_s *dev,
FAR struct ip_fraglink_s *curfraglink)
{
FAR struct ip_fragsnode_s *node;
FAR sq_entry_t *entry;
FAR sq_entry_t *entrylast = NULL;
bool empty;
/* The linked list is ordered by IP ID value, walk through it and try to
* find a node that has the same IP ID value, otherwise need to create a
* new node and insert it into the linked list.
*/
entry = sq_peek(&g_assemblyhead_ipid);
empty = (entry == NULL) ? true : false;
while (entry != NULL)
{
node = (struct ip_fragsnode_s *)entry;
if (dev == node->dev && curfraglink->ipid <= node->ipid)
{
break;
}
entrylast = entry;
entry = sq_next(entry);
}
node = (FAR struct ip_fragsnode_s *)entry;
if (node != NULL && curfraglink->ipid == node->ipid)
{
FAR struct ip_fraglink_s *fraglink;
FAR struct ip_fraglink_s *lastlink = NULL;
/* Found a previously created ip_fragsnode_s, insert this new
* ip_fraglink_s to the subchain of this node.
*/
fraglink = node->frags;
/* An ip_fragsnode_s must have an ip_fraglink_s because we allocate a
* new ip_fraglink_s when caching a new ip_fraglink_s with a new IP ID
*/
while (fraglink != NULL)
{
/* The fragment list is ordered by fragment offset value */
if (curfraglink->fragoff <= fraglink->fragoff)
{
break;
}
lastlink = fraglink;
fraglink = fraglink->flink;
}
if (fraglink == NULL)
{
/* This fragment offset is greater than the previous fragments,
* added to the last position
*/
lastlink->flink = curfraglink;
/* Remember I/O buffer count */
node->bufcnt += IOBUF_CNT(curfraglink->frag);
g_bufoccupy += IOBUF_CNT(curfraglink->frag);
}
else if (curfraglink->fragoff == fraglink->fragoff)
{
/* Fragments with same offset value contain the same data, use the
* more recently arrived copy. Refer to RFC791, Section3.2, Page29.
* Replace and removed the old packet from the fragment list
*/
curfraglink->flink = fraglink->flink;
if (lastlink == NULL)
{
node->frags = curfraglink;
}
else
{
lastlink->flink = curfraglink;
}
iob_free_chain(fraglink->frag);
kmm_free(fraglink);
}
else
{
/* Insert into the fragment list */
if (lastlink == NULL)
{
/* Insert before the first node */
curfraglink->flink = node->frags;
node->frags = curfraglink;
}
else
{
/* Insert this node after lastlink */
curfraglink->flink = lastlink->flink;
lastlink->flink = curfraglink;
}
/* Remember I/O buffer count */
node->bufcnt += IOBUF_CNT(curfraglink->frag);
g_bufoccupy += IOBUF_CNT(curfraglink->frag);
}
}
else
{
/* It's a new IP ID fragment, malloc a new node and insert it into the
* linked list
*/
node = kmm_malloc(sizeof(struct ip_fragsnode_s));
if (node == NULL)
{
nerr("ERROR: Failed to allocate buffer.\n");
return -ENOMEM;
}
node->flink = NULL;
node->flinkat = NULL;
node->dev = dev;
node->ipid = curfraglink->ipid;
node->frags = curfraglink;
node->tick = clock_systime_ticks();
node->bufcnt = IOBUF_CNT(curfraglink->frag);
g_bufoccupy += IOBUF_CNT(curfraglink->frag);
node->verifyflag = 0;
node->outgoframe = NULL;
/* Insert this new node into linked list identified by
* g_assemblyhead_ipid with correct position
*/
if (sq_peek(&g_assemblyhead_ipid) == NULL || entrylast == NULL)
{
sq_addfirst((FAR sq_entry_t *)node, &g_assemblyhead_ipid);
}
else
{
sq_addafter(entrylast, (FAR sq_entry_t *)node,
&g_assemblyhead_ipid);
}
/* Add this new node to the tail of linked list identified by
* g_assemblyhead_time
*/
sq_addlast((FAR sq_entry_t *)&node->flinkat, &g_assemblyhead_time);
}
if (curfraglink->fragoff == 0)
{
/* Have received the zero fragment */
node->verifyflag |= IP_FRAGVERIFY_RECVDZEROFRAG;
}
else if (!curfraglink->morefrags)
{
/* Have received the tail fragment */
node->verifyflag |= IP_FRAGVERIFY_RECVDTAILFRAG;
}
/* For indexing convenience */
curfraglink->fragsnode = node;
/* Check receiving status */
ip_fragin_check(node);
/* Buffer is take away, clear original pointers in NIC */
netdev_iob_clear(dev);
/* Perform cache cleaning when reassembly cache size exceeds the configured
* threshold
*/
ip_fragin_cachemonitor(node);
return empty;
}
/****************************************************************************
* Name: ip_fragout_slice
*
* Description:
* According to the MTU of a given NIC, split the original data into
* multiple data pieces, and the space for filling the L3 header is
* reserved at the forefront of each piece. Each piece is stored in
* independent I/O buffer(s) and eventually forms an I/O buffer queue.
* Note:
* 1.About the 'piece' above
* 1).If MTU < CONFIG_IOB_BUFSIZE, a piece consists of an I/O buffer;
* 2).If MTU >= CONFIG_IOB_BUFSIZE, a piece consists of multiple I/O
* buffers.
* 2.This function split and gathers the incoming data into outgoing
* I/O buffers according to the MTU, but is not responsible for
* building the L3 header related to the fragmentation.
*
* Input Parameters:
* iob - The data comes from
* domain - PF_INET or PF_INET6
* mtu - MTU of current NIC
* unfraglen - The starting position to fragmentation processing
* fragq - Those output slices
*
* Returned Value:
* Number of fragments
*
* Assumptions:
* Data length(iob->io_pktlen) is grater than the MTU of current NIC
*
****************************************************************************/
int32_t ip_fragout_slice(FAR struct iob_s *iob, uint8_t domain, uint16_t mtu,
uint16_t unfraglen, FAR struct iob_queue_s *fragq)
{
FAR uint8_t *leftstart;
uint16_t leftlen = 0;
uint16_t ncopy;
uint16_t navail;
uint32_t nfrags = 0;
bool expand = false;
FAR struct iob_s *orig = NULL;
FAR struct iob_s *reorg = NULL;
FAR struct iob_s *head = NULL;
if (iob == NULL || fragq == NULL)
{
nerr("ERROR: Invalid parameters! iob: %p, fragq: %p\n", iob, fragq);
return 0;
}
assert(iob->io_pktlen > mtu);
#ifdef CONFIG_NET_IPv4
if (domain == PF_INET)
{
uint16_t nreside;
/* Fragmentation requires that the data length after the IP header
* must be a multiple of 8
*/
mtu = ((mtu - IPv4_HDRLEN) & ~0x7) + IPv4_HDRLEN;
/* Remember the number of resident bytes */
nreside = mtu;
/* For IPv4, fragmented frames and non-fragmented frames have the
* same length L3 header. So process it as follows:
* the zero fragment use the original I/O buffer and reorganize
* the non-zero fragments (copy to new I/O buffers), space for the
* L3 IP header must be reserved for all fragments
*/
head = iob;
while (iob != NULL && nreside > iob->io_len)
{
nreside -= iob->io_len;
iob = iob->io_flink;
}
leftstart = iob->io_data + iob->io_offset + nreside;
leftlen = iob->io_len - nreside;
orig = iob->io_flink;
if (orig != NULL)
{
orig->io_pktlen = head->io_pktlen - (mtu + leftlen);
iob->io_flink = NULL;
}
head->io_pktlen = mtu;
iob->io_len = nreside;
if (iob_tryadd_queue(head, fragq) < 0)
{
goto allocfail;
}
head = NULL;
nfrags++;
if (leftlen == 0 && orig != NULL)
{
reorg = ip_fragout_allocfragbuf(fragq);
GOTO_IF(reorg == NULL, allocfail);
nfrags++;
/* This is a new fragment buffer, reserve L2&L3 header space
* in the front of this buffer
*/
UPDATE_IOB(reorg, CONFIG_NET_LL_GUARDSIZE, unfraglen);
}
else
{
/* If the MTU is relatively small, the remaining data of the first
* I/O buffer may need to be fragmented multiple times.
* For IPv4, the first I/O Buffer is reused, which have reserved
* the L4 header space, the following fragmentation flow is only
* for non-zero fragments, so following flow does not need to
* consider the L4 header
*/
while (leftlen > 0)
{
reorg = ip_fragout_allocfragbuf(fragq);
GOTO_IF(reorg == NULL, allocfail);
nfrags++;
if (leftlen + unfraglen > mtu)
{
ncopy = mtu - unfraglen;
}
else
{
ncopy = leftlen;
}
/* This is a new fragment buffer, reserve L2&L3 header space
* in the front of this buffer
*/
UPDATE_IOB(reorg, CONFIG_NET_LL_GUARDSIZE, unfraglen);
/* Then copy L4 data */
GOTO_IF(iob_trycopyin(reorg, leftstart, ncopy,
reorg->io_pktlen, false) < 0, allocfail);
leftstart += ncopy;
leftlen -= ncopy;
}
}
}
#ifdef CONFIG_NET_IPv6
else
#endif
#endif
#ifdef CONFIG_NET_IPv6
if (domain == PF_INET6)
{
unfraglen += EXTHDR_FRAG_LEN;
/* Fragmentation requires the length field to be a multiples of 8,
* and the length of the IPv6 basic header and all extended headers
* is a multiples of 8, so here directly fix the MTU to 8-byte
* alignment.
*/
mtu = mtu & ~0x7;
/* For IPv6 fragment, a fragment header needs to be inserted before
* the l4 header, so all data must be reorganized, a space for IPv6
* header and fragment external header is reserved before l4 header
* for all fragments
*/
reorg = ip_fragout_allocfragbuf(fragq);
GOTO_IF(reorg == NULL, allocfail);
nfrags++;
/* Reserve L3 header space */
UPDATE_IOB(reorg, CONFIG_NET_LL_GUARDSIZE, unfraglen);
/* Copy L3 header(include unfragmentable extention header if present)
* from original I/O buffer
*/
orig = iob;
memcpy(reorg->io_data + reorg->io_offset,
orig->io_data + orig->io_offset, unfraglen - EXTHDR_FRAG_LEN);
iob_trimhead(orig, unfraglen - EXTHDR_FRAG_LEN);
}
#endif
/* Copy data from original I/O buffer chain 'orig' to new reorganized
* I/O buffer chain 'reorg'
*/
while (orig != NULL)
{
leftstart = orig->io_data + orig->io_offset;
leftlen = orig->io_len;
/* For each I/O buffer data of the 'orig' chain */
while (leftlen > 0)
{
/* Calculate target area size */
navail = mtu - reorg->io_pktlen;
if (navail > 0)
{
if (leftlen > navail)
{
/* Target area is too small, need expand the destination
* chain
*/
expand = true;
ncopy = navail;
}
else
{
ncopy = leftlen;
}
if (iob_trycopyin(reorg, leftstart, ncopy,
reorg->io_pktlen, false) < 0)
{
goto allocfail;
}
leftlen -= ncopy;
leftstart += ncopy;
}
else
{
expand = true;
}
if (expand)
{
reorg = ip_fragout_allocfragbuf(fragq);
GOTO_IF(reorg == NULL, allocfail);
nfrags++;
/* This is a new fragment buffer, reserve L2&L3 header space
* in the front of this buffer
*/
UPDATE_IOB(reorg, CONFIG_NET_LL_GUARDSIZE, unfraglen);
expand = false;
}
}
orig = iob_free(orig);
}
return nfrags;
allocfail:
nerr("ERROR: Fragout fail! No I/O buffer available!");
iob_free_chain(head);
iob_free_chain(orig);
iob_free_chain(reorg);
iob_free_queue(fragq);
return 0;
}
/****************************************************************************
* Name: ip_frag_startwdog
*
* Description:
* Start the reassembly timeout timer
*
* Returned Value:
* None
*
****************************************************************************/
void ip_frag_startwdog(void)
{
if (g_wdfragtimeout.func == NULL)
{
wd_start(&g_wdfragtimeout, REASSEMBLY_TIMEOUT_TICKS,
ip_fragin_timerout_expiry, (wdparm_t)NULL);
}
}
/****************************************************************************
* Name: ip_frag_uninit
*
* Description:
* Uninitialize the fragment processing module.
*
* Returned Value:
* None
*
****************************************************************************/
int32_t ip_frag_uninit(void)
{
FAR struct net_driver_s *dev;
ninfo("Uninitialize frag proccessing module\n");
/* Stop work queue */
if (!work_available(&g_wkfragtimeout))
{
ninfo("Cancel reassembly work queue\n");
work_cancel(IPFRAGWORK, &g_wkfragtimeout);
}
/* Release frag processing resources of each NIC */
net_lock();
for (dev = g_netdevices; dev; dev = dev->flink)
{
/* Is the interface in the "up" state? */
if ((dev->d_flags & IFF_UP) != 0)
{
ip_frag_stop(dev);
}
}
net_unlock();
return OK;
}
/****************************************************************************
* Name: ip_frag_stop
*
* Description:
* Stop the fragment process function for the specified NIC.
*
* Input Parameters:
* dev - NIC Device instance which will be bring down
*
* Returned Value:
* None
*
****************************************************************************/
void ip_frag_stop(FAR struct net_driver_s *dev)
{
FAR sq_entry_t *entry = NULL;
FAR sq_entry_t *entrynext;
ninfo("Stop frag processing for NIC:%p\n", dev);
nxmutex_lock(&g_ipfrag_lock);
entry = sq_peek(&g_assemblyhead_ipid);
/* Drop those unassembled incoming fragments belonging to this NIC */
while (entry != NULL)
{
FAR struct ip_fragsnode_s *node = (FAR struct ip_fragsnode_s *)entry;
entrynext = sq_next(entry);
if (dev == node->dev)
{
if (node->frags != NULL)
{
FAR struct ip_fraglink_s *fraglink = node->frags;
while (fraglink != NULL)
{
fraglink = ip_fragin_freelink(fraglink);
}
}
ip_frag_remnode(node);
kmm_free(entry);
}
entry = entrynext;
}
nxmutex_unlock(&g_ipfrag_lock);
/* Drop those unsent outgoing fragments belonging to this NIC */
iob_free_queue(&dev->d_fragout);
}
/****************************************************************************
* Name: ip_frag_remallfrags
*
* Description:
* Release all I/O Buffers used by fragment processing module when
* I/O Buffer resources are exhausted.
*
* Returned Value:
* None
*
****************************************************************************/
void ip_frag_remallfrags(void)
{
FAR sq_entry_t *entry = NULL;
FAR sq_entry_t *entrynext;
FAR struct net_driver_s *dev;
nxmutex_lock(&g_ipfrag_lock);
entry = sq_peek(&g_assemblyhead_ipid);
/* Drop all unassembled incoming fragments */
while (entry != NULL)
{
FAR struct ip_fragsnode_s *node = (FAR struct ip_fragsnode_s *)entry;
entrynext = sq_next(entry);
if (node->frags != NULL)
{
FAR struct ip_fraglink_s *fraglink = node->frags;
while (fraglink != NULL)
{
fraglink = ip_fragin_freelink(fraglink);
}
}
/* Because nodes managed by the two queues are the same,
* and g_assemblyhead_ipid will be cleared after this loop ends,
* so only reset g_assemblyhead_time is needed after this loop ends
*/
sq_rem(entry, &g_assemblyhead_ipid);
kmm_free(entry);
entry = entrynext;
}
sq_init(&g_assemblyhead_time);
g_bufoccupy = 0;
nxmutex_unlock(&g_ipfrag_lock);
/* Drop all unsent outgoing fragments */
net_lock();
for (dev = g_netdevices; dev; dev = dev->flink)
{
/* Is the interface in the "up" state? */
if ((dev->d_flags & IFF_UP) != 0)
{
iob_free_queue(&dev->d_fragout);
}
}
net_unlock();
}
/****************************************************************************
* Name: ip_fragout
*
* Description:
* Fragout processing
*
* Input Parameters:
* dev - The NIC device
*
* Returned Value:
* A non-negative value is returned on success; negative value on failure.
*
****************************************************************************/
int32_t ip_fragout(FAR struct net_driver_s *dev)
{
uint16_t mtu = devif_get_mtu(dev);
if (dev->d_iob == NULL || dev->d_len == 0)
{
return -EINVAL;
}
if (dev->d_iob->io_pktlen <= mtu)
{
return OK;
}
#ifdef CONFIG_NET_6LOWPAN
if (dev->d_lltype == NET_LL_IEEE802154 ||
dev->d_lltype == NET_LL_PKTRADIO)
{
return -EINVAL;
}
#endif
if (devif_is_loopback(dev))
{
return OK;
}
ninfo("pkt size: %d, MTU: %d\n", dev->d_iob->io_pktlen, mtu);
#ifdef CONFIG_NET_IPv4
if (IFF_IS_IPv4(dev->d_flags))
{
return ipv4_fragout(dev, mtu);
}
#endif
#ifdef CONFIG_NET_IPv6
if (IFF_IS_IPv6(dev->d_flags))
{
return ipv6_fragout(dev, mtu);
}
#endif
return -EINVAL;
}
#endif /* (CONFIG_NET_IPv4 || CONFIG_NET_IPv6) && CONFIG_NET_IPFRAG */