nuttx/fs/nfs/rpc_clnt.c
patacongo 8105db36c1 NFS client update
git-svn-id: svn://svn.code.sf.net/p/nuttx/code/trunk@4605 42af7a65-404d-4744-a932-0658087f49c3
2012-04-14 00:27:44 +00:00

1525 lines
40 KiB
C

/****************************************************************************
* fs/nfs/rpc_clnt.c
*
* Copyright (C) 2012 Gregory Nutt. All rights reserved.
* Copyright (C) 2012 Jose Pablo Rojas Vargas. All rights reserved.
* Author: Jose Pablo Rojas Vargas <jrojas@nx-engineering.com>
*
* Leveraged from OpenBSD:
*
* Copyright (c) 2004 The Regents of the University of Michigan.
* All rights reserved.
*
* Copyright (c) 2004 Weston Andros Adamson <muzzle@umich.edu>.
* Copyright (c) 2004 Marius Aamodt Eriksen <marius@umich.edu>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of
* California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by Rick Macklem at
* The University of Guelph.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer. 2.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution. 3. All advertising
* materials mentioning features or use of this software must display the
* following acknowledgement: This product includes software developed by the
* University of California, Berkeley and its contributors. 4. Neither the
* name of the University nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
****************************************************************************/
#ifndef __FS_NFS_NFS_SOCKET_H
#define __FS_NFS_NFS_SOCKET_H
/****************************************************************************
* Included Files
****************************************************************************/
#include <sys/socket.h>
#include <queue.h>
#include <time.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <debug.h>
#include "xdr_subs.h"
#include "nfs_proto.h"
#include "nfs.h"
#include "nfs_args.h"
#include "rpc.h"
#include "rpc_clnt_private.h"
#include "rpc_v2.h"
/****************************************************************************
* Pre-processor Definitions
****************************************************************************/
#define RPC_RETURN(X) do { dbg("returning %d", X); return X; }while(0)
/* Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean
* and mean deviation of rtt for the appropriate type of rpc for the frequent
* rpcs and a default for the others. The justification for doing "other"
* this way is that these rpcs happen so infrequently that timer est. would
* probably be stale. Also, since many of these rpcs are non-idempotent, a
* conservative timeout is desired. getattr, lookup - A+2D read, write -
* A+4D other - nm_timeo
*/
#define RPC_RTO(n, t) \
((t) == 0 ? (n)->rc_timeo : \
((t) < 3 ? \
(((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \
((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1)))
#define RPC_SRTT(s,r) (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\
(r)->r_procnum) - 1]
#define RPC_SDRTT(s,r) (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\
(r)->r_procnum) - 1]
/* There is a congestion window for outstanding rpcs maintained per mount
* point. The cwnd size is adjusted in roughly the way that: Van Jacobson,
* Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM,
* August 1988. describes for TCP. The cwnd size is chopped in half on a
* retransmit timeout and incremented by 1/cwnd when each rpc reply is
* received and a full cwnd of rpcs is in progress. (The sent count and cwnd
* are scaled for integer arith.) Variants of "slow start" were tried and
* were found to be too much of a performance hit (ave. rtt 3 times larger),
* I suspect due to the large rtt that nfs rpcs have.
*/
#define RPC_CWNDSCALE 256
#define RPC_MAXCWND (RPC_CWNDSCALE * 32)
#define RPC_ERRSTR_ACCEPTED_SIZE 6
#define RPC_ERRSTR_AUTH_SIZE 6
/****************************************************************************
* Public Data
****************************************************************************/
char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] =
{
"", /* no good message... */
"remote server hasn't exported program.",
"remote server can't support version number.",
"program can't support procedure.",
"procedure can't decode params.",
"remote error. remote side memory allocation failure?"
};
char *rpc_errstr_denied[2] =
{
"remote server doesnt support rpc version 2!",
"remote server authentication error."
};
char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] =
{
"",
"auth error: bad credential (seal broken).",
"auth error: client must begin new session.",
"auth error: bad verifier (seal broken).",
"auth error: verifier expired or replayed.",
"auth error: rejected for security reasons.",
};
/****************************************************************************
* Private Data
****************************************************************************/
static int rpcclnt_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
/* Static data, mostly RPC constants in XDR form */
static uint32_t rpc_reply, rpc_call, rpc_vers, rpc_msgdenied,
rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_autherr, rpc_auth_null;
static uint32_t rpcclnt_xid = 0;
static uint32_t rpcclnt_xid_touched = 0;
int rpcclnt_ticks;
struct rpcstats rpcstats;
struct rpc_call *callmgs;
struct rpc_reply *replymsg;
/* Queue head for rpctask's */
static dq_queue_t *rpctask_q;
//struct callout_handle rpcclnt_timer_handle;
/****************************************************************************
* Private Function Prototypes
****************************************************************************/
static int rpcclnt_send(struct socket *, struct sockaddr *, struct rpc_call *,
struct rpctask *);
static int rpcclnt_receive(struct rpctask *, struct sockaddr *,
struct rpc_reply *, struct rpc_call *);
static int rpcclnt_reply(struct rpctask *, struct rpc_call *,
struct rpc_reply *);
static void rpcclnt_timer(void *, struct rpc_call *);
#ifdef CONFIG_NFS_TCPIP
static int rpcclnt_sndlock(int *, struct rpctask *);
static void rpcclnt_sndunlock(int *);
static int rpcclnt_rcvlock(struct rpctask *);
static void rpcclnt_rcvunlock(int *);
static int rpcclnt_sigintr(struct rpcclnt *, struct rpctask *, cthread_t *);
#endif
static void rpcclnt_softterm(struct rpctask *task);
static uint32_t rpcclnt_proct(struct rpcclnt *, uint32_t);
static int rpcclnt_buildheader(struct rpcclnt *, int, int, void *, struct rpc_call *);
/****************************************************************************
* Private Functions
****************************************************************************/
/* This is the nfs send routine. For connection based socket types, it must
* be called with an nfs_sndlock() on the socket. "rep == NULL" indicates
* that it has been called from a server. For the client side: - return EINTR
* if the RPC is terminated, 0 otherwise - set TASK_MUSTRESEND if the send fails
* for any reason - do any cleanup required by recoverable socket errors
* (???) For the server side: - return EINTR or ERESTART if interrupted by a
* signal - return EPIPE if a connection is lost for connection based sockets
* (TCP...) - do any cleanup required by recoverable socket errors (???)
*/
static int
rpcclnt_send(struct socket *so, struct sockaddr *nam, struct rpc_call *call,
struct rpctask *rep)
{
struct sockaddr *sendnam;
int error, soflags, flags;
if (rep != NULL)
{
if (rep->r_flags & TASK_SOFTTERM)
{
RPC_RETURN(EINTR);
}
if ((so = rep->r_rpcclnt->rc_so) == NULL)
{
rep->r_flags |= TASK_MUSTRESEND;
RPC_RETURN(0);
}
rep->r_flags &= ~TASK_MUSTRESEND;
soflags = rep->r_rpcclnt->rc_soflags;
}
else
{
soflags = so->s_flags;
}
#ifdef CONFIG_NFS_TCPIP
if ((soflags & PR_CONNREQUIRED))
{
sendnam = NULL;
{
else
#endif
{
sendnam = nam;
}
if (so->s_type == SOCK_SEQPACKET)
{
flags = MSG_EOR;
}
else
{
flags = 0;
}
error =
psock_sendto(so, call, sizeof(*call), flags, sendnam, sizeof(*sendnam));
if (error != 0)
{
if (rep != NULL)
{
ndbg("rpc send error %d for service %s\n", error,
rep->r_rpcclnt->rc_prog->prog_name);
/* Deal with errors for the client side. */
if (rep->r_flags & TASK_SOFTTERM)
{
error = EINTR;
}
else
{
rep->r_flags |= TASK_MUSTRESEND;
}
}
else
{
ndbg("rpc service send error %d\n", error);
}
/* Handle any recoverable (soft) socket errors here. */
if (error != EINTR && error != ERESTART &&
error != EWOULDBLOCK && error != EPIPE)
{
error = 0;
}
}
RPC_RETURN(error);
}
/* Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
* done by soreceive().For SOCK_STREAM, first get the
* Record Mark to find out how much more there is to get. We must
* lock the socket against other receivers until we have an entire
* rpc request/reply.
*/
static int rpcclnt_receive(struct rpctask *rep, struct sockaddr *aname,
struct rpc_reply *reply, struct rpc_call *call)
{
struct socket *so;
#ifdef CONFIG_NFS_TCPIP
uint32_t len;
#endif
int error, sotype, rcvflg;
/* Set up arguments for soreceive() */
sotype = rep->r_rpcclnt->rc_sotype;
/* For reliable protocols, lock against other senders/receivers in
* case a reconnect is necessary. For SOCK_STREAM, first get the
* Record Mark to find out how much more there is to get. We must
* lock the socket against other receivers until we have an entire
* rpc request/reply.
*/
#ifdef CONFIG_NFS_TCPIP
if (sotype != SOCK_DGRAM)
{
error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
if (error != 0)
{
return error;
}
tryagain:
/* Check for fatal errors and resending request.
*
* Ugh: If a reconnect attempt just happened, rc_so would
* have changed. NULL indicates a failed attempt that has
* essentially shut down this mount point.
*/
if (rep->r_flags & TASK_SOFTTERM)
{
rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
return EINTR;
}
so = rep->r_rpcclnt->rc_so;
if (so == NULL)
{
error = rpcclnt_reconnect(rep);
if (error)
{
rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
return error;
}
goto tryagain;
}
while (rep->r_flags & TASK_MUSTRESEND)
{
rpcstats.rpcretries++;
error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, call, rep);
if (error)
{
if (error == EINTR || error == ERESTART ||
(error = rpcclnt_reconnect(rep)) != 0)
{
rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
return error;
}
goto tryagain;
}
}
rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
if (sotype == SOCK_STREAM)
{
do
{
rcvflg = MSG_WAITALL;
error = psock_recvfrom(so, reply, sizeof(*reply),
&rcvflg, rep->r_rpcclnt->rc_name,
sizeof(*rep->r_rpcclnt->rc_name));
if (error == EWOULDBLOCK && rep && (rep->r_flags & TASK_SOFTTERM))
{
RPC_RETURN(EINTR);
}
}
while (error == EWOULDBLOCK);
if (error == 0)
{
ndbg("short receive from rpc server %s\n",
rep->r_rpcclnt->rc_prog->prog_name);
error = EPIPE;
}
len = ntohl(len) & ~0x80000000;
/* This is SERIOUS! We are out of sync with the
* sender and forcing a disconnect/reconnect is all I
* can do.
*/
if (len > RPC_MAXPACKET)
{
ndbg("%s (%d) from rpc server %s\n",
"impossible packet length",
len, rep->r_rpcclnt->rc_prog->prog_name);
error = EFBIG;
goto errout;
}
do
{
rcvflg = MSG_WAITALL;
error = psock_recvfrom(so, reply, sizeof(*reply),
&rcvflg, rep->r_rpcclnt->rc_name,
sizeof(*rep->r_rpcclnt->rc_name));
}
while (error == EWOULDBLOCK || error == EINTR || error == ERESTART);
if (error == 0)
{
ndbg("short receive from rpc server %s\n",
rep->r_rpcclnt->rc_prog->prog_name);
error = EPIPE;
}
if (error != 0)
{
goto errout;
}
}
else
{
/* NB: Since uio_resid is big, MSG_WAITALL is ignored
* and soreceive() will return when it has either a
* control msg or a data msg. We have no use for
* control msg., but must grab them and then throw
* them away so we know what is going on.
*/
do
{
rcvflg = 0;
error = psock_recvfrom(so, reply, sizeof(*reply),
&rcvflg, rep->r_rpcclnt->rc_name,
sizeof(*rep->r_rpcclnt->rc_name));
if (error == EWOULDBLOCK && rep)
{
if (rep->r_flags & TASK_SOFTTERM)
{
return EINTR;
}
}
}
while (error == EWOULDBLOCK || (!error));
if ((rcvflg & MSG_EOR) == 0)
{
ndbg("Egad!!\n");
}
if (error == 0)
{
error = EPIPE;
}
}
errout:
if (error != 0 && error != EINTR && error != ERESTART)
{
if (error != EPIPE)
{
ndbg("receive error %d from rpc server %s\n",
error, rep->r_rpcclnt->rc_prog->prog_name);
}
error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
if (error == 0)
{
error = rpcclnt_reconnect(rep);
}
if (error == 0)
{
goto tryagain;
}
}
}
else
{
#endif
if ((so = rep->r_rpcclnt->rc_so) == NULL)
{
RPC_RETURN(EACCES);
}
do
{
rcvflg = 0;
error =
psock_recvfrom(so, reply, sizeof(*reply), rcvflg, aname,
(socklen_t *) sizeof(*aname));
dbg("psock_recvfrom returns %d", error);
if (error == EWOULDBLOCK && (rep->r_flags & TASK_SOFTTERM))
{
dbg("wouldblock && softerm -> EINTR");
RPC_RETURN(EINTR);
}
}
while (error == EWOULDBLOCK);
#ifdef CONFIG_NFS_TCPIP
}
#endif
RPC_RETURN(error);
}
/* Implement receipt of reply on a socket. We must search through the list of
* received datagrams matching them with outstanding requests using the xid,
* until ours is found.
*/
/* ARGSUSED */
static int
rpcclnt_reply(struct rpctask *myrep, struct rpc_call *call,
struct rpc_reply *reply)
{
struct rpctask *rep;
struct rpcclnt *rpc = myrep->r_rpcclnt;
int32_t t1;
struct sockaddr *nam = NULL;
uint32_t rxid;
int error;
/* Loop around until we get our own reply */
for (;;)
{
/* Lock against other receivers so that I don't get stuck in
* sbwait() after someone else has received my reply for me.
* Also necessary for connection based protocols to avoid
* race conditions during a reconnect.
*/
#ifdef CONFIG_NFS_TCPIP
error = rpcclnt_rcvlock(myrep);
if (error)
{
return error;
}
#endif
/*
* Get the next Rpc reply off the socket
*/
error = rpcclnt_receive(myrep, nam, reply, call);
#ifdef CONFIG_NFS_TCPIP
rpcclnt_rcvunlock(&rpc->rc_flag);
#endif
if (error != 0)
{
/* Ignore routing errors on connectionless
* protocols??
*/
if (RPCIGNORE_SOERROR(rpc->rc_soflags, error))
{
if (myrep->r_flags & TASK_GETONEREP)
{
RPC_RETURN(0);
}
ndbg("ingoring routing error on connectionless protocol.");
continue;
}
RPC_RETURN(error);
}
/* Get the xid and check that it is an rpc reply */
rxid = reply->rp_xid;
if (reply->rp_direction != rpc_reply)
{
rpcstats.rpcinvalid++;
if (myrep->r_flags & TASK_GETONEREP)
{
RPC_RETURN(0);
}
continue;
}
/* Loop through the request list to match up the reply Iff no
* match, just drop the datagram
*/
for (rep = (struct rpctask *)rpctask_q->head; rep;
rep = (struct rpctask *)rep->r_chain.flink)
{
if (rxid == rep->r_xid)
{
/* Update congestion window. Do the additive
* increase of one rpc/rtt.
*/
if (rpc->rc_cwnd <= rpc->rc_sent)
{
rpc->rc_cwnd +=
(RPC_CWNDSCALE * RPC_CWNDSCALE +
(rpc->rc_cwnd >> 1)) / rpc->rc_cwnd;
if (rpc->rc_cwnd > RPC_MAXCWND)
{
rpc->rc_cwnd = RPC_MAXCWND;
}
}
rep->r_flags &= ~TASK_SENT;
rpc->rc_sent -= RPC_CWNDSCALE;
/* Update rtt using a gain of 0.125 on the
* mean and a gain of 0.25 on the deviation.
*/
if (rep->r_flags & TASK_TIMING)
{
/* Since the timer resolution of
* NFS_HZ is so course, it can often
* result in r_rtt == 0. Since r_rtt
* == N means that the actual rtt is
* between N+dt and N+2-dt ticks, add
* 1.
*/
t1 = rep->r_rtt + 1;
t1 -= (RPC_SRTT(rpc, rep) >> 3);
RPC_SRTT(rpc, rep) += t1;
if (t1 < 0)
{
t1 = -t1;
}
t1 -= (RPC_SDRTT(rpc, rep) >> 2);
RPC_SDRTT(rpc, rep) += t1;
}
rpc->rc_timeouts = 0;
break;
}
}
/* If not matched to a request, drop it. If it's mine, get
* out.
*/
if (rep == 0)
{
rpcstats.rpcunexpected++;
dbg("rpc reply not matched\n");
}
else if (rep == myrep)
{
RPC_RETURN(0);
}
if (myrep->r_flags & TASK_GETONEREP)
{
RPC_RETURN(0);
}
}
}
#ifdef CONFIG_NFS_TCPIP
static int
rpcclnt_sigintr( struct rpcclnt *rpc, struct rpctask *task, cthread_t *td)
{
struct proc *p;
sigset_t tmpset;
if (rpc == NULL)
{
return EFAULT;
}
if (ISSET(rpc->rc_flag, RPCCLNT_REDIRECT))
{
return 0;
}
/* XXX deal with forced unmounts */
if (task && ISSET(task->r_flags, TASK_SOFTTERM))
{
RPC_RETURN(EINTR);
}
if (!ISSET(rpc->rc_flag, RPCCLNT_INT))
{
RPC_RETURN(0);
}
if (td == NULL)
{
return 0;
}
p = cthread_proc(td);
PROC_LOCK(p);
tmpset = p->p_siglist;
SIGSETNAND(tmpset, td->td_sigmask);
mtx_lock(&p->p_sigacts->ps_mtx);
SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
mtx_unlock(&p->p_sigacts->ps_mtx);
if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset))
{
PROC_UNLOCK(p);
RPC_RETURN(EINTR);
}
PROC_UNLOCK(p);
RPC_RETURN(0);
}
/* Lock a socket against others. Necessary for STREAM sockets to ensure you
* get an entire rpc request/reply and also to avoid race conditions between
* the processes with nfs requests in progress when a reconnect is necessary.
*/
static int rpcclnt_sndlock(int *flagp, struct rpctask *task)
{
int slpflag = 0, slptimeo = 0;
if (task)
{
if (task->r_rpcclnt->rc_flag & RPCCLNT_INT)
slpflag = PCATCH;
}
while (*flagp & RPCCLNT_SNDLOCK)
{
if (rpcclnt_sigintr(task->r_rpcclnt, task, p))
{
return EINTR;
}
*flagp |= RPCCLNT_WANTSND;
if (slpflag == PCATCH)
{
slpflag = 0;
slptimeo = 2 * CLOCKS_PER_SEC;
}
}
*flagp |= RPCCLNT_SNDLOCK;
return 0;
}
/* Unlock the stream socket for others. */
static void rpcclnt_sndunlock(int *flagp)
{
if ((*flagp & RPCCLNT_SNDLOCK) == 0)
{
panic("rpc sndunlock");
}
*flagp &= ~RPCCLNT_SNDLOCK;
if (*flagp & RPCCLNT_WANTSND)
{
*flagp &= ~RPCCLNT_WANTSND;
}
}
static int rpcclnt_rcvlock(struct rpctask *task)
{
int *flagp = &task->r_rpcclnt->rc_flag;
int slpflag, slptimeo = 0;
if (*flagp & RPCCLNT_INT)
{
slpflag = PCATCH;
}
else
{
slpflag = 0;
}
while (*flagp & RPCCLNT_RCVLOCK)
{
if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td))
{
return EINTR;
}
*flagp |= RPCCLNT_WANTRCV;
tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcrcvlk", slptimeo);
if (slpflag == PCATCH)
{
slpflag = 0;
slptimeo = 2 * CLOCKS_PER_SEC;
}
}
*flagp |= RPCCLNT_RCVLOCK;
return 0;
}
/* Unlock the stream socket for others. */
static void rpcclnt_rcvunlock(int *flagp)
{
if ((*flagp & RPCCLNT_RCVLOCK) == 0)
{
panic("nfs rcvunlock");
}
*flagp &= ~RPCCLNT_RCVLOCK;
if (*flagp & RPCCLNT_WANTRCV)
{
*flagp &= ~RPCCLNT_WANTRCV;
wakeup((caddr_t) flagp);
}
}
#endif
static uint32_t rpcclnt_proct(struct rpcclnt *rpc, uint32_t procid)
{
if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL &&
procid < rpc->rc_proctlen)
{
return rpc->rc_proct[procid];
}
return 0;
}
static void rpcclnt_softterm(struct rpctask *task)
{
task->r_flags |= TASK_SOFTTERM;
if (task->r_flags & TASK_SENT)
{
task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE;
task->r_flags &= ~TASK_SENT;
}
}
/****************************************************************************
* Public Functions
****************************************************************************/
void rpcclnt_init(void)
{
rpcclnt_ticks = (CLOCKS_PER_SEC * RPC_TICKINTVL + 500) / 1000;
if (rpcclnt_ticks < 1)
{
rpcclnt_ticks = 1;
}
rpcstats.rpcretries = 0;
rpcstats.rpcrequests = 0;
rpcstats.rpctimeouts = 0;
rpcstats.rpcunexpected = 0;
rpcstats.rpcinvalid = 0;
/* RPC constants how about actually using more than one of these! */
rpc_reply = txdr_unsigned(RPC_REPLY);
rpc_vers = txdr_unsigned(RPC_VER2);
rpc_call = txdr_unsigned(RPC_CALL);
rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
rpc_autherr = txdr_unsigned(RPC_AUTHERR);
rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
rpc_auth_null = txdr_unsigned(RPCAUTH_NULL);
/* Initialize rpctask queue */
dq_init(rpctask_q);
rpcclnt_timer(NULL, callmgs);
nvdbg("rpc initialed");
return;
}
/*
void
rpcclnt_uninit(void)
{
nvdbg("uninit");
untimeout(rpcclnt_timer, (void *)NULL, rpcclnt_timer_handle);
}
*/
/* Initialize sockets and congestion for a new RPC connection. We do not free
* the sockaddr if error.
*/
int rpcclnt_connect(struct rpcclnt *rpc)
{
struct socket *so;
int error;
struct sockaddr *saddr;
struct sockaddr_in *sin = NULL;
struct timeval *tv = NULL;
uint16_t tport;
/* Create the socket */
rpc->rc_so = NULL;
saddr = rpc->rc_name;
rpc->rc_sotype = SOCK_DGRAM;
error =
psock_socket(saddr->sa_family, rpc->rc_sotype, rpc->rc_soproto, rpc->rc_so);
if (error != 0)
{
ndbg("error %d in psock_socket()", error);
RPC_RETURN(error);
}
so = rpc->rc_so;
rpc->rc_soflags = so->s_flags;
/* Some servers require that the client port be a reserved port
* number. We always allocate a reserved port, as this prevents
* filehandle disclosure through UDP port capture.
*/
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = INADDR_ANY;
tport = 1024;
do
{
tport--;
sin->sin_port = htons(tport);
error = psock_bind(so, (struct sockaddr *)sin, sizeof(*sin));
}
while (error == EADDRINUSE && tport > 1024 / 2);
if (error)
{
ndbg("bind failed\n");
goto bad;
}
/* Protocols that do not require connections may be optionally left
* unconnected for servers that reply from a port other than
* NFS_PORT.
*/
#ifdef CONFIG_NFS_TCPIP
if (rpc->rc_soflags == PR_CONNREQUIRED)
{
error = ENOTCONN;
goto bad;
}
else
{
#endif
error = psock_connect(so, saddr, sizeof(*saddr));
if (error)
{
dbg("psock_connect returns %d", error);
goto bad;
}
#ifdef CONFIG_NFS_TCPIP
}
#endif
/* Always set receive timeout to detect server crash and reconnect.
* Otherwise, we can get stuck in psock_receive forever.
*/
tv->tv_sec = 1;
tv->tv_usec = 0;
if ((error =
psock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, (const void *)tv,
sizeof(*tv))))
{
goto bad;
}
/* Initialize other non-zero congestion variables */
rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] = rpc->rc_srtt[3] = (RPC_TIMEO << 3);
rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] = rpc->rc_sdrtt[3] = 0;
rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */
rpc->rc_sent = 0;
rpc->rc_timeouts = 0;
RPC_RETURN(0);
bad:
rpcclnt_disconnect(rpc);
RPC_RETURN(error);
}
/* Reconnect routine: Called when a connection is broken on a reliable
* protocol. - clean up the old socket - nfs_connect() again - set
* TASK_MUSTRESEND for all outstanding requests on mount point If this
* fails the mount point is DEAD! nb: Must be called with the
* nfs_sndlock() set on the mount point.
*/
int rpcclnt_reconnect(struct rpctask *rep)
{
struct rpctask *rp;
struct rpcclnt *rpc = rep->r_rpcclnt;
int error;
rpcclnt_disconnect(rpc);
while ((error = rpcclnt_connect(rpc)) != 0)
{
if (error == EINTR || error == ERESTART)
{
return EINTR;
}
}
/* Loop through outstanding request list and fix up all
* requests on old socket.
*/
for (rp = (struct rpctask *)rpctask_q->head; rp != NULL;
rp = (struct rpctask *)rp->r_chain.blink)
{
if (rp->r_rpcclnt == rpc)
{
rp->r_flags |= TASK_MUSTRESEND;
}
}
return 0;
}
void rpcclnt_disconnect(struct rpcclnt *rpc)
{
struct socket *so;
if (rpc->rc_so != NULL)
{
so = rpc->rc_so;
rpc->rc_so = NULL;
(void)psock_close(so);
}
}
#ifdef CONFIG_NFS_TCPIP
void rpcclnt_safedisconnect(struct rpcclnt *rpc)
{
struct rpctask dummytask;
memset((void *)dummytask, 0, sizeof(*call));
dummytask.r_rpcclnt = rpc;
rpcclnt_rcvlock(&dummytask);
rpcclnt_disconnect(rpc);
rpcclnt_rcvunlock(&rpc->rc_flag);
}
#endif
/* XXX: ignores tryagain! */
/* Code from nfs_request - goes something like this - fill in task struct -
* links task into list - calls nfs_send() for first transmit - calls
* nfs_receive() to get reply - fills in reply (which should be initialized
* prior to calling), which is valid when 0 is returned and is NEVER freed in
* this function
*
* always frees the request header, but NEVER frees 'mrest'
*
*
* note that reply->result_* are invalid unless reply->type ==
* RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_*
* are invalid unless reply->type == RPC_MSGACCEPTED
*/
int rpcclnt_request(struct rpcclnt *rpc, int procnum, struct rpc_reply *reply, void *datain)
{
struct rpc_call *call = NULL;
struct rpc_reply *replysvr = NULL;
struct rpctask *task, _task;
int error = 0;
int xid = 0;
task = &_task;
memset(task, 0, sizeof(*task));
task->r_rpcclnt = rpc;
task->r_procnum = procnum;
error = rpcclnt_buildheader(rpc, procnum, xid, datain, call);
if (error)
{
ndbg("building call header error");
goto rpcmout;
}
task->r_xid = fxdr_unsigned(uint32_t, xid);
if (rpc->rc_flag & RPCCLNT_SOFT)
{
task->r_retry = rpc->rc_retry;
}
else
{
task->r_retry = RPC_MAXREXMIT + 1; /* past clip limit */
}
task->r_rtt = task->r_rexmit = 0;
if (rpcclnt_proct(rpc, procnum) > 0)
{
task->r_flags = TASK_TIMING;
}
else
{
task->r_flags = 0;
}
/* Do the client side RPC. */
rpcstats.rpcrequests++;
/* Chain request into list of outstanding requests. Be sure to put it
* LAST so timer finds oldest requests first.
*/
dq_addlast(&task->r_chain, rpctask_q);
/* If backing off another request or avoiding congestion, don't send
* this one now but let timer do it. If not timing a request, do it
* now.
*/
if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM ||
(rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
rpc->rc_sent < rpc->rc_cwnd))
{
#ifdef CONFIG_NFS_TCPIP
if (rpc->rc_soflags & PR_CONNREQUIRED)
{
error = rpcclnt_sndlock(&rpc->rc_flag, task);
}
#endif
if (error == 0)
{
error = rpcclnt_send(rpc->rc_so, rpc->rc_name, call, task);
#ifdef CONFIG_NFS_TCPIP
if (rpc->rc_soflags & PR_CONNREQUIRED)
{
rpcclnt_sndunlock(&rpc->rc_flag);
}
#endif
}
if (error == 0 && (task->r_flags & TASK_MUSTRESEND) == 0)
{
rpc->rc_sent += RPC_CWNDSCALE;
task->r_flags |= TASK_SENT;
}
}
else
{
task->r_rtt = -1;
}
/* Wait for the reply from our send. */
if (error == 0 || error == EPIPE)
{
error = rpcclnt_reply(task, call, replysvr);
}
/* RPC done, unlink the request. */
dq_rem(&task->r_chain, rpctask_q);
/* Decrement the outstanding request count. */
if (task->r_flags & TASK_SENT)
{
task->r_flags &= ~TASK_SENT; /* paranoia */
rpc->rc_sent -= RPC_CWNDSCALE;
}
if (error != 0)
{
goto rpcmout;
}
/* Break down the rpc header and check if ok */
reply->stat.type = fxdr_unsigned(uint32_t, replysvr->stat.type);
if (reply->stat.type == RPC_MSGDENIED)
{
reply->stat.status = fxdr_unsigned(uint32_t, replysvr->stat.status);
switch (reply->stat.status)
{
case RPC_MISMATCH:
reply->stat.mismatch_info.low =
fxdr_unsigned(uint32_t, replysvr->stat.mismatch_info.low);
reply->stat.mismatch_info.high =
fxdr_unsigned(uint32_t, replysvr->stat.mismatch_info.high);
ndbg("RPC_MSGDENIED: RPC_MISMATCH error");
error = EOPNOTSUPP;
break;
case RPC_AUTHERR:
reply->stat.autherr = fxdr_unsigned(uint32_t, replysvr->stat.autherr);
ndbg("RPC_MSGDENIED: RPC_AUTHERR error");
error = EACCES;
break;
default:
error = EOPNOTSUPP;
break;
}
goto rpcmout;
}
else if (reply->stat.type != RPC_MSGACCEPTED)
{
error = EOPNOTSUPP;
goto rpcmout;
}
/* Verifier */
reply->rpc_verfi.authtype =
fxdr_unsigned(uint32_t, replysvr->rpc_verfi.authtype);
reply->rpc_verfi.authlen =
fxdr_unsigned(uint32_t, replysvr->rpc_verfi.authlen);
if (reply->stat.status == RPC_SUCCESS)
{
nvdbg("RPC_SUCCESS");
reply->stat.where = replysvr->stat.where;
}
else if (reply->stat.status == RPC_PROGMISMATCH)
{
reply->stat.mismatch_info.low =
fxdr_unsigned(uint32_t, replysvr->stat.mismatch_info.low);
reply->stat.mismatch_info.high =
fxdr_unsigned(uint32_t, replysvr->stat.mismatch_info.high);
ndbg("RPC_MSGACCEPTED: RPC_PROGMISMATCH error");
error = EOPNOTSUPP; /* XXXMARIUS */
}
else if (reply->stat.status > 5)
{
error = EOPNOTSUPP;
goto rpcmout;
}
rpcmout:
RPC_RETURN(error);
}
/* Nfs timer routine Scan the nfsreq list and retranmit any requests that
* have timed out To avoid retransmission attempts on STREAM sockets (in the
* future) make sure to set the r_retry field to 0 (implies nm_retry == 0).
*/
void rpcclnt_timer(void *arg, struct rpc_call *call)
{
struct rpctask *rep;
struct socket *so;
struct rpcclnt *rpc;
int timeo, error;
for (rep = (struct rpctask *)rpctask_q->head; rep;
rep = (struct rpctask *)rep->r_chain.flink)
{
rpc = rep->r_rpcclnt;
if (rep->r_flags & TASK_SOFTTERM)
{
continue;
}
if (rep->r_rtt >= 0)
{
rep->r_rtt++;
if (rpc->rc_flag & RPCCLNT_DUMBTIMR)
{
timeo = rpc->rc_timeo;
}
else
{
timeo = RPC_RTO(rpc, rpcclnt_proct(rep->r_rpcclnt, rep->r_procnum));
}
if (rpc->rc_timeouts > 0)
{
timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1];
}
if (rep->r_rtt <= timeo)
{
continue;
}
if (rpc->rc_timeouts < 8)
{
rpc->rc_timeouts++;
}
}
/* Check for server not responding */
if ((rep->r_flags & TASK_TPRINTFMSG) == 0 &&
rep->r_rexmit > rpc->rc_deadthresh)
{
ndbg("Server is not responding");
rep->r_flags |= TASK_TPRINTFMSG;
}
if (rep->r_rexmit >= rep->r_retry)
{ /* too many */
rpcstats.rpctimeouts++;
rep->r_flags |= TASK_SOFTTERM;
continue;
}
if (rpc->rc_sotype != SOCK_DGRAM)
{
if (++rep->r_rexmit > RPC_MAXREXMIT)
{
rep->r_rexmit = RPC_MAXREXMIT;
}
continue;
}
if ((so = rpc->rc_so) == NULL)
{
continue;
}
/* If there is enough space and the window allows.. Resend it
* Set r_rtt to -1 in case we fail to send it now.
*/
rep->r_rtt = -1;
if ((rpc->rc_flag & RPCCLNT_DUMBTIMR) || (rep->r_flags & TASK_SENT) ||
rpc->rc_sent < rpc->rc_cwnd)
{
if ((rpc->rc_flag & RPCCLNT_NOCONN) == 0)
{
error = psock_sendto(so, call, sizeof(*call), 0, NULL, 0);
}
else
{
error =
psock_sendto(so, call, sizeof(*call), 0, rpc->rc_name,
sizeof(*rpc->rc_name));
}
if (!error)
{
/* Iff first send, start timing else turn
* timing off, backoff timer and divide
* congestion window by 2.
*/
if (rep->r_flags & TASK_SENT)
{
rep->r_flags &= ~TASK_TIMING;
if (++rep->r_rexmit > RPC_MAXREXMIT)
{
rep->r_rexmit = RPC_MAXREXMIT;
}
rpc->rc_cwnd >>= 1;
if (rpc->rc_cwnd < RPC_CWNDSCALE)
{
rpc->rc_cwnd = RPC_CWNDSCALE;
}
rpcstats.rpcretries++;
}
else
{
rep->r_flags |= TASK_SENT;
rpc->rc_sent += RPC_CWNDSCALE;
}
rep->r_rtt = 0;
}
}
}
// rpcclnt_timer_handle = timeout(rpcclnt_timer, NULL, rpcclnt_ticks);
}
/* Build the RPC header and fill in the authorization info. */
int rpcclnt_buildheader(struct rpcclnt *rc, int procid,
int xidp, void *datain, struct rpc_call *call)
{
struct timeval *tv = NULL;
srand(time(NULL));
/* The RPC header.*/
/* Get a new (non-zero) xid */
if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0))
{
rpcclnt_xid = rand();
rpcclnt_xid_touched = 1;
}
else
{
do
{
xidp = rand();
}
while ((xidp % 256) == 0);
rpcclnt_xid += xidp;
}
call->rp_xid = xidp = txdr_unsigned(rpcclnt_xid);
call->rp_direction = rpc_call;
call->rp_rpcvers = rpc_vers;
call->rp_prog = txdr_unsigned(rc->rc_prog->prog_id);
call->rp_vers = txdr_unsigned(rc->rc_prog->prog_version);
call->rp_proc = txdr_unsigned(procid);
call->data = datain;
/* rpc_auth part (auth_unix as root) */
call->rpc_auth.authtype = rpc_auth_null;
call->rpc_auth.authlen = txdr_unsigned(sizeof(NULL));
tv->tv_sec = 1;
tv->tv_usec = 0;
#ifdef CONFIG_NFS_UNIX_AUTH
call->rpc_unix.ua_time = txdr_unsigned(tv->tv_sec);
call->rpc_unix.ua_hostname = 0;
call->rpc_unix.ua_uid = geteuid();
call->rpc_unix.ua_gid = getegid();
call->rpc_unix.ua_gidlist = 0;
#endif
/* rpc_verf part (auth_null) */
call->rpc_verf.authtype = 0;
call->rpc_verf.authlen = 0;
return 0;
}
int rpcclnt_cancelreqs(struct rpcclnt *rpc)
{
struct rpctask *task;
int i;
for (task = (struct rpctask *)rpctask_q->head; task;
task = (struct rpctask *)task->r_chain.flink)
{
if (rpc != task->r_rpcclnt || (task->r_flags & TASK_SOFTTERM))
{
continue;
}
rpcclnt_softterm(task);
}
for (i = 0; i < 30; i++)
{
for (task = (struct rpctask *)rpctask_q->head; task;
task = (struct rpctask *)task->r_chain.flink)
{
if (rpc == task->r_rpcclnt)
{
break;
}
}
if (task == NULL)
{
return 0;
}
}
return EBUSY;
}
#endif