nuttx/net/udp/udp_callback.c
chao an 34d2cde8a8 net/l2/l3/l4: add support of iob offload
1. Add new config CONFIG_NET_LL_GUARDSIZE to isolation of l2 stack,
   which will benefit l3(IP) layer for multi-MAC(l2) implementation,
   especially in some NICs such as celluler net driver.

new configuration options: CONFIG_NET_LL_GUARDSIZE

CONFIG_NET_LL_GUARDSIZE will reserved l2 buffer header size of
network buffer to isolate the L2/L3 (MAC/IP) data on network layer,
which will be beneficial to L3 network layer protocol transparent
transmission and forwarding

------------------------------------------------------------
Layout of frist iob entry:

        iob_data (aligned by CONFIG_IOB_ALIGNMENT)
            |
            |                  io_offset(CONFIG_NET_LL_GUARDSIZE)
            |                                |
            -------------------------------------------------
      iob   |            Reserved            |    io_len    |
            -------------------------------------------------

-------------------------------------------------------------
Layout of different NICs implementation:

        iob_data (aligned by CONFIG_IOB_ALIGNMENT)
            |
            |                 io_offset(CONFIG_NET_LL_GUARDSIZE)
            |                                |
            -------------------------------------------------
 Ethernet   |       Reserved    | ETH_HDRLEN |    io_len    |
            ---------------------------------|---------------
 8021Q      |   Reserved  | ETH_8021Q_HDRLEN |    io_len    |
            ---------------------------------|---------------
 ipforward  |            Reserved            |    io_len    |
            -------------------------------------------------

--------------------------------------------------------------------

2. Support iob offload to l2 driver to avoid unnecessary memory copy

Support send/receive iob vectors directly between the NICs and l3/l4
stack to avoid unnecessary memory copies, especially on hardware that
supports Scatter/gather, which can greatly improve performance.

new interface to support iob offload:

  ------------------------------------------
  |    IOB version     |     original      |
  |----------------------------------------|
  |  devif_iob_poll()  |   devif_poll()    |
  |       ...          |       ...         |
  ------------------------------------------

--------------------------------------------------------------------

1> NIC hardware support Scatter/gather transfer

TX:

                tcp_poll()/udp_poll()/pkt_poll()/...(l3|l4)
                           /              \
                          /                \
devif_poll_[l3|l4]_connections()     devif_iob_send() (nocopy:udp/icmp/...)
           /                                   \      (copy:tcp)
          /                                     \
  devif_iob_poll("NIC"_txpoll)                callback() // "NIC"_txpoll
                                                  |
                            dev->d_iob:           |
                                                ---------------         ---------------
                             io_data       iob1 |  |          |    iob3 |  |          |
                                    \           ---------------         ---------------
                                  ---------------  |       --------------- |
                             iob0 |  |          |  |  iob2 |  |          | |
                                  ---------------  |       --------------- |
                                     \             |          /           /
                                        \          |       /           /
                                   ----------------------------------------------
                    NICs io vector |    |    |    |    |    |    |    |    |    |
                                   ----------------------------------------------

RX:

  [tcp|udp|icmp|...]ipv[4|6]_data_handler()(iob_concat/append to readahead)
                    |
                    |
      [tcp|udp|icmp|...]_ipv[4|6]_in()/...
                    |
                    |
          pkt/ipv[4/6]_input()/...
                    |
                    |
     NICs io vector receive(iov_base to each iobs)

--------------------------------------------------------------------

2> CONFIG_IOB_BUFSIZE is greater than MTU:

TX:

"(CONFIG_IOB_BUFSIZE) > (MAX_NETDEV_PKTSIZE + CONFIG_NET_GUARDSIZE + CONFIG_NET_LL_GUARDSIZE)"

                tcp_poll()/udp_poll()/pkt_poll()/...(l3|l4)
                           /              \
                          /                \
devif_poll_[l3|l4]_connections()     devif_iob_send() (nocopy:udp/icmp/...)
           /                                   \      (copy:tcp)
          /                                     \
  devif_iob_poll("NIC"_txpoll)                callback() // "NIC"_txpoll
                                                  |
                                             "NIC"_send()
                          (dev->d_iob->io_data[CONFIG_NET_LL_GUARDSIZE - NET_LL_HDRLEN(dev)])

RX:

  [tcp|udp|icmp|...]ipv[4|6]_data_handler()(iob_concat/append to readahead)
                    |
                    |
      [tcp|udp|icmp|...]_ipv[4|6]_in()/...
                    |
                    |
          pkt/ipv[4/6]_input()/...
                    |
                    |
     NICs io vector receive(iov_base to io_data)

--------------------------------------------------------------------

3> Compatible with all old flat buffer NICs

TX:
                tcp_poll()/udp_poll()/pkt_poll()/...(l3|l4)
                           /              \
                          /                \
devif_poll_[l3|l4]_connections()     devif_iob_send() (nocopy:udp/icmp/...)
           /                                   \      (copy:tcp)
          /                                     \
  devif_iob_poll(devif_poll_callback())  devif_poll_callback() /* new interface, gather iobs to flat buffer */
       /                                           \
      /                                             \
 devif_poll("NIC"_txpoll)                     "NIC"_send()(dev->d_buf)

RX:

  [tcp|udp|icmp|...]ipv[4|6]_data_handler()(iob_concat/append to readahead)
                    |
                    |
      [tcp|udp|icmp|...]_ipv[4|6]_in()/...
                    |
                    |
               netdev_input()  /* new interface, Scatter/gather flat/iob buffer */
                    |
                    |
          pkt/ipv[4|6]_input()/...
                    |
                    |
    NICs io vector receive(Orignal flat buffer)

3. Iperf passthrough on NuttX simulator:

  -------------------------------------------------
  |  Protocol      | Server | Client |            |
  |-----------------------------------------------|
  |  TCP           |  813   |   834  |  Mbits/sec |
  |  TCP(Offload)  | 1720   |  1100  |  Mbits/sec |
  |  UDP           |   22   |   757  |  Mbits/sec |
  |  UDP(Offload)  |   25   |  1250  |  Mbits/sec |
  -------------------------------------------------

Signed-off-by: chao an <anchao@xiaomi.com>
2022-12-03 11:47:04 +08:00

283 lines
7.6 KiB
C

/****************************************************************************
* net/udp/udp_callback.c
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <nuttx/config.h>
#if defined(CONFIG_NET) && defined(CONFIG_NET_UDP)
#include <stdint.h>
#include <string.h>
#include <debug.h>
#include <nuttx/net/netconfig.h>
#include <nuttx/net/netdev.h>
#include <nuttx/net/netstats.h>
#include <nuttx/net/udp.h>
#include "devif/devif.h"
#include "udp/udp.h"
/****************************************************************************
* Private Functions
****************************************************************************/
/****************************************************************************
* Name: udp_datahandler
*
* Description:
* Handle the receipt of UDP data by adding the newly received packet to
* the UDP read-ahead buffer.
*
****************************************************************************/
static uint16_t udp_datahandler(FAR struct net_driver_s *dev,
FAR struct udp_conn_s *conn,
FAR uint8_t *buffer, uint16_t buflen)
{
FAR struct iob_s *iob;
int ret;
#ifdef CONFIG_NET_IPv6
FAR struct sockaddr_in6 src_addr6 =
{
0
};
#endif
#ifdef CONFIG_NET_IPv4
FAR struct sockaddr_in src_addr4 =
{
0
};
#endif
uint8_t src_addr_size;
FAR void *src_addr;
uint8_t offset = 0;
#if CONFIG_NET_RECV_BUFSIZE > 0
while (iob_get_queue_size(&conn->readahead) > conn->rcvbufs)
{
iob = iob_remove_queue(&conn->readahead);
iob_free_chain(iob);
}
#endif
iob = dev->d_iob;
#ifdef CONFIG_NET_IPv6
#ifdef CONFIG_NET_IPv4
if (IFF_IS_IPv6(dev->d_flags))
#endif
{
FAR struct udp_hdr_s *udp = UDPIPv6BUF;
FAR struct ipv6_hdr_s *ipv6 = IPv6BUF;
src_addr6.sin6_family = AF_INET6;
src_addr6.sin6_port = udp->srcport;
net_ipv6addr_copy(src_addr6.sin6_addr.s6_addr, ipv6->srcipaddr);
src_addr_size = sizeof(src_addr6);
src_addr = &src_addr6;
}
#endif /* CONFIG_NET_IPv6 */
#ifdef CONFIG_NET_IPv4
#ifdef CONFIG_NET_IPv6
else
#endif
{
#ifdef CONFIG_NET_IPv6
/* Hybrid dual-stack IPv6/IPv4 implementations recognize a special
* class of addresses, the IPv4-mapped IPv6 addresses.
*/
if (conn->domain == PF_INET6)
{
FAR struct udp_hdr_s *udp = UDPIPv6BUF;
FAR struct ipv6_hdr_s *ipv6 = IPv6BUF;
in_addr_t ipv4addr;
/* Encode the IPv4 address as an IPv-mapped IPv6 address */
src_addr6.sin6_family = AF_INET6;
src_addr6.sin6_port = udp->srcport;
ipv4addr = net_ip4addr_conv32(ipv6->srcipaddr);
ip6_map_ipv4addr(ipv4addr, src_addr6.sin6_addr.s6_addr16);
src_addr_size = sizeof(src_addr6);
src_addr = &src_addr6;
}
else
#endif
{
FAR struct udp_hdr_s *udp = UDPIPv4BUF;
FAR struct ipv4_hdr_s *ipv4 = IPv4BUF;
src_addr4.sin_family = AF_INET;
src_addr4.sin_port = udp->srcport;
net_ipv4addr_copy(src_addr4.sin_addr.s_addr,
net_ip4addr_conv32(ipv4->srcipaddr));
memset(src_addr4.sin_zero, 0, sizeof(src_addr4.sin_zero));
src_addr_size = sizeof(src_addr4);
src_addr = &src_addr4;
}
}
#endif /* CONFIG_NET_IPv4 */
/* Override the address info begin of io_data */
#ifdef CONFIG_NETDEV_IFINDEX
iob->io_data[offset++] = dev->d_ifindex;
#endif
iob->io_data[offset++] = src_addr_size;
memcpy(&iob->io_data[offset], src_addr, src_addr_size);
/* Trim l3/l4 offset */
iob = iob_trimhead(iob, (dev->d_appdata - iob->io_data) -
iob->io_offset);
/* Add the new I/O buffer chain to the tail of the read-ahead queue */
ret = iob_tryadd_queue(iob, &conn->readahead);
if (ret < 0)
{
nerr("ERROR: Failed to queue the I/O buffer chain: %d\n", ret);
iob_free_chain(iob);
buflen = 0;
}
#ifdef CONFIG_NET_UDP_NOTIFIER
else
{
ninfo("Buffered %d bytes\n", buflen);
/* Provided notification(s) that additional UDP read-ahead data is
* available.
*/
udp_readahead_signal(conn);
}
#endif
netdev_iob_clear(dev);
return buflen;
}
/****************************************************************************
* Name: net_dataevent
*
* Description:
* Handling the network UDP_NEWDATA event.
*
****************************************************************************/
static inline uint16_t
net_dataevent(FAR struct net_driver_s *dev, FAR struct udp_conn_s *conn,
uint16_t flags)
{
uint16_t ret;
uint8_t *buffer = dev->d_appdata;
int buflen = dev->d_len;
uint16_t recvlen;
ret = (flags & ~UDP_NEWDATA);
/* Is there new data? With non-zero length? (Certain connection events
* can have zero-length with UDP_NEWDATA set just to cause an ACK).
*/
ninfo("No receive on connection\n");
/* Save as the packet data as in the read-ahead buffer. NOTE that
* partial packets will not be buffered.
*/
recvlen = udp_datahandler(dev, conn, buffer, buflen);
if (recvlen < buflen)
{
/* There is no handler to receive new data and there are no free
* read-ahead buffers to retain the data -- drop the packet.
*/
ninfo("Dropped %d bytes\n", dev->d_len);
#ifdef CONFIG_NET_STATISTICS
g_netstats.udp.drop++;
#endif
}
/* In any event, the new data has now been handled */
dev->d_len = 0;
return ret;
}
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: udp_callback
*
* Description:
* Inform the application holding the UDP socket of a change in state.
*
* Returned Value:
* OK if packet has been processed, otherwise ERROR.
*
* Assumptions:
* This function must be called with the network locked.
*
****************************************************************************/
uint16_t udp_callback(FAR struct net_driver_s *dev,
FAR struct udp_conn_s *conn, uint16_t flags)
{
ninfo("flags: %04x\n", flags);
/* Some sanity checking */
if (conn)
{
/* Perform the callback */
flags = devif_conn_event(dev, flags, conn->sconn.list);
if ((flags & UDP_NEWDATA) != 0)
{
/* Data was not handled.. dispose of it appropriately */
flags = net_dataevent(dev, conn, flags);
}
}
return flags;
}
#endif /* CONFIG_NET && CONFIG_NET_UDP */