nuttx/net/icmp/icmp_reply.c
chao an 34d2cde8a8 net/l2/l3/l4: add support of iob offload
1. Add new config CONFIG_NET_LL_GUARDSIZE to isolation of l2 stack,
   which will benefit l3(IP) layer for multi-MAC(l2) implementation,
   especially in some NICs such as celluler net driver.

new configuration options: CONFIG_NET_LL_GUARDSIZE

CONFIG_NET_LL_GUARDSIZE will reserved l2 buffer header size of
network buffer to isolate the L2/L3 (MAC/IP) data on network layer,
which will be beneficial to L3 network layer protocol transparent
transmission and forwarding

------------------------------------------------------------
Layout of frist iob entry:

        iob_data (aligned by CONFIG_IOB_ALIGNMENT)
            |
            |                  io_offset(CONFIG_NET_LL_GUARDSIZE)
            |                                |
            -------------------------------------------------
      iob   |            Reserved            |    io_len    |
            -------------------------------------------------

-------------------------------------------------------------
Layout of different NICs implementation:

        iob_data (aligned by CONFIG_IOB_ALIGNMENT)
            |
            |                 io_offset(CONFIG_NET_LL_GUARDSIZE)
            |                                |
            -------------------------------------------------
 Ethernet   |       Reserved    | ETH_HDRLEN |    io_len    |
            ---------------------------------|---------------
 8021Q      |   Reserved  | ETH_8021Q_HDRLEN |    io_len    |
            ---------------------------------|---------------
 ipforward  |            Reserved            |    io_len    |
            -------------------------------------------------

--------------------------------------------------------------------

2. Support iob offload to l2 driver to avoid unnecessary memory copy

Support send/receive iob vectors directly between the NICs and l3/l4
stack to avoid unnecessary memory copies, especially on hardware that
supports Scatter/gather, which can greatly improve performance.

new interface to support iob offload:

  ------------------------------------------
  |    IOB version     |     original      |
  |----------------------------------------|
  |  devif_iob_poll()  |   devif_poll()    |
  |       ...          |       ...         |
  ------------------------------------------

--------------------------------------------------------------------

1> NIC hardware support Scatter/gather transfer

TX:

                tcp_poll()/udp_poll()/pkt_poll()/...(l3|l4)
                           /              \
                          /                \
devif_poll_[l3|l4]_connections()     devif_iob_send() (nocopy:udp/icmp/...)
           /                                   \      (copy:tcp)
          /                                     \
  devif_iob_poll("NIC"_txpoll)                callback() // "NIC"_txpoll
                                                  |
                            dev->d_iob:           |
                                                ---------------         ---------------
                             io_data       iob1 |  |          |    iob3 |  |          |
                                    \           ---------------         ---------------
                                  ---------------  |       --------------- |
                             iob0 |  |          |  |  iob2 |  |          | |
                                  ---------------  |       --------------- |
                                     \             |          /           /
                                        \          |       /           /
                                   ----------------------------------------------
                    NICs io vector |    |    |    |    |    |    |    |    |    |
                                   ----------------------------------------------

RX:

  [tcp|udp|icmp|...]ipv[4|6]_data_handler()(iob_concat/append to readahead)
                    |
                    |
      [tcp|udp|icmp|...]_ipv[4|6]_in()/...
                    |
                    |
          pkt/ipv[4/6]_input()/...
                    |
                    |
     NICs io vector receive(iov_base to each iobs)

--------------------------------------------------------------------

2> CONFIG_IOB_BUFSIZE is greater than MTU:

TX:

"(CONFIG_IOB_BUFSIZE) > (MAX_NETDEV_PKTSIZE + CONFIG_NET_GUARDSIZE + CONFIG_NET_LL_GUARDSIZE)"

                tcp_poll()/udp_poll()/pkt_poll()/...(l3|l4)
                           /              \
                          /                \
devif_poll_[l3|l4]_connections()     devif_iob_send() (nocopy:udp/icmp/...)
           /                                   \      (copy:tcp)
          /                                     \
  devif_iob_poll("NIC"_txpoll)                callback() // "NIC"_txpoll
                                                  |
                                             "NIC"_send()
                          (dev->d_iob->io_data[CONFIG_NET_LL_GUARDSIZE - NET_LL_HDRLEN(dev)])

RX:

  [tcp|udp|icmp|...]ipv[4|6]_data_handler()(iob_concat/append to readahead)
                    |
                    |
      [tcp|udp|icmp|...]_ipv[4|6]_in()/...
                    |
                    |
          pkt/ipv[4/6]_input()/...
                    |
                    |
     NICs io vector receive(iov_base to io_data)

--------------------------------------------------------------------

3> Compatible with all old flat buffer NICs

TX:
                tcp_poll()/udp_poll()/pkt_poll()/...(l3|l4)
                           /              \
                          /                \
devif_poll_[l3|l4]_connections()     devif_iob_send() (nocopy:udp/icmp/...)
           /                                   \      (copy:tcp)
          /                                     \
  devif_iob_poll(devif_poll_callback())  devif_poll_callback() /* new interface, gather iobs to flat buffer */
       /                                           \
      /                                             \
 devif_poll("NIC"_txpoll)                     "NIC"_send()(dev->d_buf)

RX:

  [tcp|udp|icmp|...]ipv[4|6]_data_handler()(iob_concat/append to readahead)
                    |
                    |
      [tcp|udp|icmp|...]_ipv[4|6]_in()/...
                    |
                    |
               netdev_input()  /* new interface, Scatter/gather flat/iob buffer */
                    |
                    |
          pkt/ipv[4|6]_input()/...
                    |
                    |
    NICs io vector receive(Orignal flat buffer)

3. Iperf passthrough on NuttX simulator:

  -------------------------------------------------
  |  Protocol      | Server | Client |            |
  |-----------------------------------------------|
  |  TCP           |  813   |   834  |  Mbits/sec |
  |  TCP(Offload)  | 1720   |  1100  |  Mbits/sec |
  |  UDP           |   22   |   757  |  Mbits/sec |
  |  UDP(Offload)  |   25   |  1250  |  Mbits/sec |
  -------------------------------------------------

Signed-off-by: chao an <anchao@xiaomi.com>
2022-12-03 11:47:04 +08:00

209 lines
6.0 KiB
C

/****************************************************************************
* net/icmp/icmp_reply.c
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <nuttx/config.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <stdint.h>
#include <string.h>
#include <debug.h>
#include <netinet/in.h>
#include <net/if.h>
#include <arpa/inet.h>
#include <nuttx/net/net.h>
#include <nuttx/net/netdev.h>
#include <nuttx/net/ip.h>
#include <nuttx/net/icmp.h>
#include "utils/utils.h"
#include "netdev/netdev.h"
#include "devif/devif.h"
#include "inet/inet.h"
#include "icmp/icmp.h"
#ifdef CONFIG_NET_ICMP
/****************************************************************************
* Pre-processor Definitions
****************************************************************************/
/* RFC 1812:
* 4.3.2.3, Original Message Header
* ...
* The ICMP datagram SHOULD contain as much of the original datagram as
* possible without the length of the ICMP datagram exceeding 576 bytes.
* ...
*/
#define ICMP_MAXMSGLEN 576
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: icmp_reply
*
* Description:
* Send an ICMP message in response to a situation
* RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header.
* MAY send more (we do).
* MUST NOT change this header information.
* MUST NOT reply to a multicast/broadcast IP address.
* MUST NOT reply to a multicast/broadcast MAC address.
* MUST reply to only the first fragment.
*
* Input Parameters:
* dev - The device driver structure containing the received packet
* type - ICMP Message Type, eg. ICMP_DEST_UNREACHABLE
* code - ICMP Message Code, eg. ICMP_PORT_UNREACH
*
* Returned Value:
* None
*
****************************************************************************/
void icmp_reply(FAR struct net_driver_s *dev, int type, int code)
{
int ipicmplen = IPv4_HDRLEN + sizeof(struct icmp_hdr_s);
FAR struct ipv4_hdr_s *ipv4 = IPv4BUF;
FAR struct icmp_hdr_s *icmp;
uint16_t datalen;
#ifdef CONFIG_NET_BROADCAST
const in_addr_t bcast = INADDR_BROADCAST;
#endif /* CONFIG_NET_BROADCAST */
const in_addr_t any = INADDR_ANY;
if (net_ipv4addr_hdrcmp(ipv4->destipaddr, &any)
# ifdef CONFIG_NET_BROADCAST
|| net_ipv4addr_hdrcmp(ipv4->destipaddr, &bcast)
|| net_ipv4addr_broadcast(net_ip4addr_conv32(ipv4->destipaddr),
dev->d_netmask)
# endif /* CONFIG_NET_BROADCAST */
)
{
dev->d_len = 0;
return;
}
/* Get the data size of the packet. */
datalen = (ipv4->len[0] << 8) + ipv4->len[1];
/* RFC says return as much as we can without exceeding 576 bytes. */
if (datalen > ICMP_MAXMSGLEN - ipicmplen)
{
datalen = ICMP_MAXMSGLEN - ipicmplen;
iob_trimtail(dev->d_iob, dev->d_iob->io_pktlen - datalen);
}
/* Save the original datagram */
if (CONFIG_IOB_BUFSIZE >= datalen + ipicmplen +
CONFIG_NET_LL_GUARDSIZE)
{
/* Reuse current iob */
memmove((FAR char *)ipv4 + ipicmplen, ipv4, datalen);
/* Skip icmp header from iob */
iob_update_pktlen(dev->d_iob, datalen + ipicmplen);
}
else
{
FAR struct iob_s *iob;
/* Save the original datagram to iob chain */
iob = dev->d_iob;
dev->d_iob = NULL;
/* Re-prepare device buffer */
if (netdev_iob_prepare(dev, false, 0) != OK)
{
dev->d_len = 0;
dev->d_iob = iob;
netdev_iob_release(dev);
return;
}
/* Copy ipv4 header to device buffer */
if (iob_trycopyin(dev->d_iob, (FAR void *)ipv4,
IPv4_HDRLEN, 0, false) != IPv4_HDRLEN)
{
dev->d_len = 0;
netdev_iob_release(dev);
iob_free_chain(iob);
return;
}
/* Skip icmp header from iob */
iob_update_pktlen(dev->d_iob, dev->d_iob->io_pktlen +
sizeof(struct icmp_hdr_s));
/* Concat new icmp packet before original datagram */
iob_concat(dev->d_iob, iob);
/* IPv4 header to new iob */
ipv4 = IPBUF(0);
}
dev->d_len = ipicmplen + datalen;
ipv4_build_header(IPv4BUF, dev->d_len, IP_PROTO_ICMP,
&dev->d_ipaddr, (FAR in_addr_t *)ipv4->srcipaddr,
IP_TTL_DEFAULT, NULL);
/* Initialize the ICMP header */
icmp = (FAR struct icmp_hdr_s *)(ipv4 + 1);
icmp->type = type;
icmp->icode = code;
icmp->data[0] = 0;
icmp->data[1] = 0;
/* Calculate the ICMP checksum. */
icmp->icmpchksum = 0;
icmp->icmpchksum = ~icmp_chksum_iob(dev->d_iob);
if (icmp->icmpchksum == 0)
{
icmp->icmpchksum = 0xffff;
}
ninfo("Outgoing ICMP packet length: %d\n", dev->d_len);
}
#endif /* CONFIG_NET_ICMP */