nuttx/net/neighbor/neighbor_ethernet_out.c
chao an 34d2cde8a8 net/l2/l3/l4: add support of iob offload
1. Add new config CONFIG_NET_LL_GUARDSIZE to isolation of l2 stack,
   which will benefit l3(IP) layer for multi-MAC(l2) implementation,
   especially in some NICs such as celluler net driver.

new configuration options: CONFIG_NET_LL_GUARDSIZE

CONFIG_NET_LL_GUARDSIZE will reserved l2 buffer header size of
network buffer to isolate the L2/L3 (MAC/IP) data on network layer,
which will be beneficial to L3 network layer protocol transparent
transmission and forwarding

------------------------------------------------------------
Layout of frist iob entry:

        iob_data (aligned by CONFIG_IOB_ALIGNMENT)
            |
            |                  io_offset(CONFIG_NET_LL_GUARDSIZE)
            |                                |
            -------------------------------------------------
      iob   |            Reserved            |    io_len    |
            -------------------------------------------------

-------------------------------------------------------------
Layout of different NICs implementation:

        iob_data (aligned by CONFIG_IOB_ALIGNMENT)
            |
            |                 io_offset(CONFIG_NET_LL_GUARDSIZE)
            |                                |
            -------------------------------------------------
 Ethernet   |       Reserved    | ETH_HDRLEN |    io_len    |
            ---------------------------------|---------------
 8021Q      |   Reserved  | ETH_8021Q_HDRLEN |    io_len    |
            ---------------------------------|---------------
 ipforward  |            Reserved            |    io_len    |
            -------------------------------------------------

--------------------------------------------------------------------

2. Support iob offload to l2 driver to avoid unnecessary memory copy

Support send/receive iob vectors directly between the NICs and l3/l4
stack to avoid unnecessary memory copies, especially on hardware that
supports Scatter/gather, which can greatly improve performance.

new interface to support iob offload:

  ------------------------------------------
  |    IOB version     |     original      |
  |----------------------------------------|
  |  devif_iob_poll()  |   devif_poll()    |
  |       ...          |       ...         |
  ------------------------------------------

--------------------------------------------------------------------

1> NIC hardware support Scatter/gather transfer

TX:

                tcp_poll()/udp_poll()/pkt_poll()/...(l3|l4)
                           /              \
                          /                \
devif_poll_[l3|l4]_connections()     devif_iob_send() (nocopy:udp/icmp/...)
           /                                   \      (copy:tcp)
          /                                     \
  devif_iob_poll("NIC"_txpoll)                callback() // "NIC"_txpoll
                                                  |
                            dev->d_iob:           |
                                                ---------------         ---------------
                             io_data       iob1 |  |          |    iob3 |  |          |
                                    \           ---------------         ---------------
                                  ---------------  |       --------------- |
                             iob0 |  |          |  |  iob2 |  |          | |
                                  ---------------  |       --------------- |
                                     \             |          /           /
                                        \          |       /           /
                                   ----------------------------------------------
                    NICs io vector |    |    |    |    |    |    |    |    |    |
                                   ----------------------------------------------

RX:

  [tcp|udp|icmp|...]ipv[4|6]_data_handler()(iob_concat/append to readahead)
                    |
                    |
      [tcp|udp|icmp|...]_ipv[4|6]_in()/...
                    |
                    |
          pkt/ipv[4/6]_input()/...
                    |
                    |
     NICs io vector receive(iov_base to each iobs)

--------------------------------------------------------------------

2> CONFIG_IOB_BUFSIZE is greater than MTU:

TX:

"(CONFIG_IOB_BUFSIZE) > (MAX_NETDEV_PKTSIZE + CONFIG_NET_GUARDSIZE + CONFIG_NET_LL_GUARDSIZE)"

                tcp_poll()/udp_poll()/pkt_poll()/...(l3|l4)
                           /              \
                          /                \
devif_poll_[l3|l4]_connections()     devif_iob_send() (nocopy:udp/icmp/...)
           /                                   \      (copy:tcp)
          /                                     \
  devif_iob_poll("NIC"_txpoll)                callback() // "NIC"_txpoll
                                                  |
                                             "NIC"_send()
                          (dev->d_iob->io_data[CONFIG_NET_LL_GUARDSIZE - NET_LL_HDRLEN(dev)])

RX:

  [tcp|udp|icmp|...]ipv[4|6]_data_handler()(iob_concat/append to readahead)
                    |
                    |
      [tcp|udp|icmp|...]_ipv[4|6]_in()/...
                    |
                    |
          pkt/ipv[4/6]_input()/...
                    |
                    |
     NICs io vector receive(iov_base to io_data)

--------------------------------------------------------------------

3> Compatible with all old flat buffer NICs

TX:
                tcp_poll()/udp_poll()/pkt_poll()/...(l3|l4)
                           /              \
                          /                \
devif_poll_[l3|l4]_connections()     devif_iob_send() (nocopy:udp/icmp/...)
           /                                   \      (copy:tcp)
          /                                     \
  devif_iob_poll(devif_poll_callback())  devif_poll_callback() /* new interface, gather iobs to flat buffer */
       /                                           \
      /                                             \
 devif_poll("NIC"_txpoll)                     "NIC"_send()(dev->d_buf)

RX:

  [tcp|udp|icmp|...]ipv[4|6]_data_handler()(iob_concat/append to readahead)
                    |
                    |
      [tcp|udp|icmp|...]_ipv[4|6]_in()/...
                    |
                    |
               netdev_input()  /* new interface, Scatter/gather flat/iob buffer */
                    |
                    |
          pkt/ipv[4|6]_input()/...
                    |
                    |
    NICs io vector receive(Orignal flat buffer)

3. Iperf passthrough on NuttX simulator:

  -------------------------------------------------
  |  Protocol      | Server | Client |            |
  |-----------------------------------------------|
  |  TCP           |  813   |   834  |  Mbits/sec |
  |  TCP(Offload)  | 1720   |  1100  |  Mbits/sec |
  |  UDP           |   22   |   757  |  Mbits/sec |
  |  UDP(Offload)  |   25   |  1250  |  Mbits/sec |
  -------------------------------------------------

Signed-off-by: chao an <anchao@xiaomi.com>
2022-12-03 11:47:04 +08:00

211 lines
7.3 KiB
C

/****************************************************************************
* net/neighbor/neighbor_ethernet_out.c
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <nuttx/config.h>
#include <string.h>
#include <assert.h>
#include <debug.h>
#include <nuttx/net/ethernet.h>
#include <nuttx/net/ip.h>
#include <nuttx/net/netdev.h>
#include <nuttx/net/neighbor.h>
#include "route/route.h"
#include "icmpv6/icmpv6.h"
#include "neighbor/neighbor.h"
/****************************************************************************
* Private Data
****************************************************************************/
/* Support for MLD multicast addresses.
*
* Well-known ethernet multicast address:
*
* ADDRESS TYPE USAGE
* 01-00-0c-cc-cc-cc 0x0802 CDP (Cisco Discovery Protocol), VTP (Virtual
* Trunking Protocol)
* 01-00-0c-cc-cc-cd 0x0802 Cisco Shared Spanning Tree Protocol Address
* 01-80-c2-00-00-00 0x0802 Spanning Tree Protocol (for bridges) IEEE 802.1D
* 01-80-c2-00-00-02 0x0809 Ethernet OAM Protocol IEEE 802.3ah
* 01-00-5e-xx-xx-xx 0x0800 IPv4 IGMP Multicast Address
* 33-33-00-00-00-00 0x86DD IPv6 Neighbor Discovery
* 33-33-xx-xx-xx-xx 0x86DD IPv6 Multicast Address (RFC3307)
*/
/****************************************************************************
* Private Functions
****************************************************************************/
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: neighbor_ethernet_out
*
* Description:
* This function should be called before sending out an IPv6 packet. The
* function checks the destination IPv6 address of the IPv6 packet to see
* what Ethernet MAC address that should be used as a destination MAC
* address on the Ethernet.
*
* If the destination IPv6 address is in the local network (determined
* by logical ANDing of netmask and our IPv6 address), the function
* checks the Neighbor Table to see if an entry for the destination IPv6
* address is found. If so, an Ethernet header is pre-pended at the
* beginning of the packet and the function returns.
*
* If no Neighbor Table entry is found for the destination IPv6 address,
* the packet in the d_buf is replaced by an ICMPv6 Neighbor Solicit
* request packet for the IPv6 address. The IPv6 packet is dropped and
* it is assumed that the higher level protocols (e.g., TCP) eventually
* will retransmit the dropped packet.
*
* Upon return in either the case, a packet to be sent is present in the
* d_buf buffer and the d_len field holds the length of the Ethernet
* frame that should be transmitted.
*
****************************************************************************/
void neighbor_ethernet_out(FAR struct net_driver_s *dev)
{
FAR struct eth_hdr_s *eth = ETHBUF;
FAR struct ipv6_hdr_s *ip = IPv6BUF;
struct neighbor_addr_s laddr;
/* Skip sending Neighbor Solicitations when the frame to be transmitted was
* written into a packet socket.
*/
if (IFF_IS_NOARP(dev->d_flags))
{
/* Clear the indication and let the packet continue on its way. */
IFF_CLR_NOARP(dev->d_flags);
return;
}
/* Find the destination IPv6 address in the Neighbor Table and construct
* the Ethernet header. If the destination IPv6 address isn't on the local
* network, we use the default router's IPv6 address instead.
*
* If no Neighbor Table entry is found, we overwrite the original IPv6
* packet with an Neighbor Solicitation Request for the IPv6 address.
*/
/* First check if destination isn't IPv6 multicast address. */
if (!net_is_addr_mcast(ip->destipaddr))
{
net_ipv6addr_t ipaddr;
/* Check if the destination address is on the local network. */
if (!net_ipv6addr_maskcmp(ip->destipaddr, dev->d_ipv6addr,
dev->d_ipv6netmask))
{
/* Destination address is not on the local network */
#ifdef CONFIG_NET_ROUTE
/* We have a routing table.. find the correct router to use in
* this case (or, as a fall-back, use the device's default router
* address). We will use the router IPv6 address instead of the
* destination address when determining the MAC address.
*/
netdev_ipv6_router(dev, ip->destipaddr, ipaddr);
#else
/* Use the device's default router IPv6 address instead of the
* destination address when determining the MAC address.
*/
net_ipv6addr_copy(ipaddr, dev->d_ipv6draddr);
#endif
}
else
{
/* Else, we use the destination IPv6 address. */
net_ipv6addr_copy(ipaddr, ip->destipaddr);
}
/* Check if we already have this destination address in the
* Neighbor Table.
*/
if (neighbor_lookup(ipaddr, &laddr) < 0)
{
#ifdef CONFIG_NET_ICMPv6
ninfo("IPv6 Neighbor solicitation for IPv6\n");
/* The destination address was not in our Neighbor Table, so we
* overwrite the IPv6 packet with an ICMPv6 Neighbor Solicitation
* message.
*/
icmpv6_solicit(dev, ipaddr);
#else
/* What to do here? We need the laddr, but no way to get it. */
nerr("ERROR: IPv6 needs link layer address for ethernet.\n");
DEBUGPANIC();
return;
#endif
}
}
/* Build an Ethernet header. */
if (net_is_addr_mcast(ip->destipaddr))
{
eth->dest[0] = eth->dest[1] = 0x33;
memcpy(&eth->dest[2], &ip->destipaddr[6], 4);
}
else
{
memcpy(eth->dest, laddr.u.na_ethernet.ether_addr_octet,
ETHER_ADDR_LEN);
}
/* Finish populating the Ethernet header */
memcpy(eth->src, dev->d_mac.ether.ether_addr_octet, ETHER_ADDR_LEN);
eth->type = HTONS(ETHTYPE_IP6);
/* Update device buffer length */
iob_update_pktlen(dev->d_iob, dev->d_len);
/* Add the size of the layer layer header to the total size of the
* outgoing packet.
*/
dev->d_len += netdev_ipv6_hdrlen(dev);
ninfo("Outgoing IPv6 Packet length: %d (%d)\n",
dev->d_len, (ip->len[0] << 8) | ip->len[1]);
}