[email protected]
[Top] [All Lists]

PERFORCE change 113893 for review

Subject: PERFORCE change 113893 for review
From: Roman Divacky
Date: Fri, 2 Feb 2007 09:19:42 GMT
http://perforce.freebsd.org/chv.cgi?CH=113893

Change 113893 by [email protected]_witten on 2007/02/02 09:18:56

        IFC

Affected files ...

.. //depot/projects/linuxolator/src/sys/arm/xscale/ixp425/avila_machdep.c#3 
integrate
.. //depot/projects/linuxolator/src/sys/compat/linux/linux_emul.c#32 integrate
.. //depot/projects/linuxolator/src/sys/dev/iwi/if_iwi.c#6 integrate
.. //depot/projects/linuxolator/src/sys/kern/sched_4bsd.c#10 integrate
.. //depot/projects/linuxolator/src/sys/kern/subr_witness.c#6 integrate
.. //depot/projects/linuxolator/src/sys/kern/uipc_socket.c#10 integrate
.. //depot/projects/linuxolator/src/sys/net80211/_ieee80211.h#4 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_input.c#7 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_output.c#5 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_syncache.c#5 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_usrreq.c#4 integrate
.. //depot/projects/linuxolator/src/sys/netinet/tcp_var.h#3 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/include/intr_machdep.h#3 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/include/smp.h#4 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/sun4v/intr_machdep.c#4 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/sun4v/mp_machdep.c#5 integrate
.. //depot/projects/linuxolator/src/sys/sun4v/sun4v/tte.c#4 integrate
.. //depot/projects/linuxolator/src/sys/sys/socketvar.h#2 integrate

Differences ...

==== //depot/projects/linuxolator/src/sys/arm/xscale/ixp425/avila_machdep.c#3 
(text+ko) ====

@@ -49,7 +49,7 @@
 #include "opt_ddb.h"
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/arm/xscale/ixp425/avila_machdep.c,v 1.2 2006/12/06 
06:34:54 julian Exp $");
+__FBSDID("$FreeBSD: src/sys/arm/xscale/ixp425/avila_machdep.c,v 1.3 2007/02/02 
05:14:21 kevlo Exp $");
 
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
@@ -274,7 +274,7 @@
 #ifdef DDB
        vm_offset_t zstart = 0, zend = 0;
 #endif
-       int i = 0;
+       int i;
        uint32_t fake_preload[35];
        uint32_t memsize;
 

==== //depot/projects/linuxolator/src/sys/compat/linux/linux_emul.c#32 
(text+ko) ====

@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/compat/linux/linux_emul.c,v 1.14 2007/02/01 
13:29:27 kib Exp $");
+__FBSDID("$FreeBSD: src/sys/compat/linux/linux_emul.c,v 1.15 2007/02/02 
08:58:16 kib Exp $");
 
 #include "opt_compat.h"
 

==== //depot/projects/linuxolator/src/sys/dev/iwi/if_iwi.c#6 (text+ko) ====

@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/dev/iwi/if_iwi.c,v 1.44 2006/12/07 15:24:38 kevlo 
Exp $");
+__FBSDID("$FreeBSD: src/sys/dev/iwi/if_iwi.c,v 1.45 2007/02/02 05:17:18 kevlo 
Exp $");
 
 /*-
  * Intel(R) PRO/Wireless 2200BG/2225BG/2915ABG driver
@@ -545,9 +545,10 @@
        ring->queued = 0;
        ring->cur = ring->next = 0;
 
-       error = bus_dma_tag_create(NULL, 4, 0, BUS_SPACE_MAXADDR_32BIT,
-           BUS_SPACE_MAXADDR, NULL, NULL, count * IWI_CMD_DESC_SIZE, 1,
-           count * IWI_CMD_DESC_SIZE, 0, NULL, NULL, &ring->desc_dmat);
+       error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0,
+           BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+           count * IWI_CMD_DESC_SIZE, 1, count * IWI_CMD_DESC_SIZE, 0, 
+           NULL, NULL, &ring->desc_dmat);
        if (error != 0) {
                device_printf(sc->sc_dev, "could not create desc DMA tag\n");
                goto fail;
@@ -606,9 +607,10 @@
        ring->csr_ridx = csr_ridx;
        ring->csr_widx = csr_widx;
 
-       error = bus_dma_tag_create(NULL, 4, 0, BUS_SPACE_MAXADDR_32BIT,
-           BUS_SPACE_MAXADDR, NULL, NULL, count * IWI_TX_DESC_SIZE, 1,
-           count * IWI_TX_DESC_SIZE, 0, NULL, NULL, &ring->desc_dmat);
+       error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0,
+           BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+           count * IWI_TX_DESC_SIZE, 1, count * IWI_TX_DESC_SIZE, 0, NULL, 
+           NULL, &ring->desc_dmat);
        if (error != 0) {
                device_printf(sc->sc_dev, "could not create desc DMA tag\n");
                goto fail;
@@ -636,9 +638,9 @@
                goto fail;
        }
 
-       error = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR_32BIT,
-           BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, IWI_MAX_NSEG,
-           MCLBYTES, 0, NULL, NULL, &ring->data_dmat);
+       error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
+       BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES,
+       IWI_MAX_NSEG, MCLBYTES, 0, NULL, NULL, &ring->data_dmat);
        if (error != 0) {
                device_printf(sc->sc_dev, "could not create data DMA tag\n");
                goto fail;
@@ -744,9 +746,9 @@
                goto fail;
        }
 
-       error = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR_32BIT,
-           BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL,
-           NULL, &ring->data_dmat);
+       error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
+           BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES,
+           1, MCLBYTES, 0, NULL, NULL, &ring->data_dmat);
        if (error != 0) {
                device_printf(sc->sc_dev, "could not create data DMA tag\n");
                goto fail;
@@ -3111,9 +3113,10 @@
        if (sc->fw_uc.size > sc->fw_dma_size)
                sc->fw_dma_size = sc->fw_uc.size;
 
-       if (bus_dma_tag_create(NULL, 4, 0, BUS_SPACE_MAXADDR_32BIT,
-           BUS_SPACE_MAXADDR, NULL, NULL, sc->fw_dma_size, 1, sc->fw_dma_size,
-           0, NULL, NULL, &sc->fw_dmat) != 0) {
+       if (bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0, 
+           BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, 
+           sc->fw_dma_size, 1, sc->fw_dma_size, 0, NULL, NULL, 
+           &sc->fw_dmat) != 0) {
                device_printf(sc->sc_dev,
                    "could not create firmware DMA tag\n");
                IWI_LOCK(sc);

==== //depot/projects/linuxolator/src/sys/kern/sched_4bsd.c#10 (text+ko) ====

@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.95 2007/01/23 08:46:50 jeff 
Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.96 2007/02/02 05:14:21 
julian Exp $");
 
 #include "opt_hwpmc_hooks.h"
 
@@ -866,9 +866,12 @@
         * or stopped or any thing else similar.  We never put the idle
         * threads on the run queue, however.
         */
-       if (td == PCPU_GET(idlethread))
+       if (td->td_flags & TDF_IDLETD) {
                TD_SET_CAN_RUN(td);
-       else {
+#ifdef SMP
+               idle_cpus_mask &= ~PCPU_GET(cpumask);
+#endif
+       } else {
                if (TD_IS_RUNNING(td)) {
                        /* Put us back on the run queue. */
                        sched_add(td, (flags & SW_PREEMPT) ?
@@ -901,13 +904,33 @@
                        PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
 #endif
 
+                /* I feel sleepy */
                cpu_switch(td, newtd);
+               /*
+                * Where am I?  What year is it?
+                * We are in the same thread that went to sleep above,
+                * but any amount of time may have passed. All out context
+                * will still be available as will local variables.
+                * PCPU values however may have changed as we may have
+                * changed CPU so don't trust cached values of them.
+                * New threads will go to fork_exit() instead of here
+                * so if you change things here you may need to change
+                * things there too.
+                * If the thread above was exiting it will never wake
+                * up again here, so either it has saved everything it
+                * needed to, or the thread_wait() or wait() will
+                * need to reap it.
+                */
 #ifdef HWPMC_HOOKS
                if (PMC_PROC_IS_USING_PMCS(td->td_proc))
                        PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
 #endif
        }
 
+#ifdef SMP
+       if (td->td_flags & TDF_IDLETD)
+               idle_cpus_mask |= PCPU_GET(cpumask);
+#endif
        sched_lock.mtx_lock = (uintptr_t)td;
        td->td_oncpu = PCPU_GET(cpuid);
 }
@@ -1326,18 +1349,9 @@
 {
        struct proc *p;
        struct thread *td;
-#ifdef SMP
-       cpumask_t mycpu;
-#endif
 
        td = curthread;
        p = td->td_proc;
-#ifdef SMP
-       mycpu = PCPU_GET(cpumask);
-       mtx_lock_spin(&sched_lock);
-       idle_cpus_mask |= mycpu;
-       mtx_unlock_spin(&sched_lock);
-#endif
        for (;;) {
                mtx_assert(&Giant, MA_NOTOWNED);
 
@@ -1345,13 +1359,7 @@
                        cpu_idle();
 
                mtx_lock_spin(&sched_lock);
-#ifdef SMP
-               idle_cpus_mask &= ~mycpu;
-#endif
                mi_switch(SW_VOL, NULL);
-#ifdef SMP
-               idle_cpus_mask |= mycpu;
-#endif
                mtx_unlock_spin(&sched_lock);
        }
 }

==== //depot/projects/linuxolator/src/sys/kern/subr_witness.c#6 (text+ko) ====

@@ -82,7 +82,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/subr_witness.c,v 1.221 2007/01/16 22:56:28 
ssouhlal Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/subr_witness.c,v 1.222 2007/02/02 09:02:18 
kib Exp $");
 
 #include "opt_ddb.h"
 #include "opt_witness.h"
@@ -370,6 +370,13 @@
        { "cdev", &lock_class_mtx_sleep },
        { NULL, NULL },
        /*
+        * kqueue/VFS interaction
+        */
+       { "kqueue", &lock_class_mtx_sleep },
+       { "struct mount mtx", &lock_class_mtx_sleep },
+       { "vnode interlock", &lock_class_mtx_sleep },
+       { NULL, NULL },
+       /*
         * spin locks
         */
 #ifdef SMP

==== //depot/projects/linuxolator/src/sys/kern/uipc_socket.c#10 (text+ko) ====

@@ -95,7 +95,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.289 2007/01/22 14:50:28 
andre Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.290 2007/02/01 17:53:40 
andre Exp $");
 
 #include "opt_inet.h"
 #include "opt_mac.h"
@@ -368,6 +368,10 @@
        knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd),
            NULL, NULL, NULL);
        so->so_count = 1;
+       /*
+        * Auto-sizing of socket buffers is managed by the protocols and
+        * the appropriate flags must be set in the pru_attach function.
+        */
        error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
        if (error) {
                KASSERT(so->so_count == 1, ("socreate: so_count %d",
@@ -442,6 +446,8 @@
        so->so_snd.sb_lowat = head->so_snd.sb_lowat;
        so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
        so->so_snd.sb_timeo = head->so_snd.sb_timeo;
+       so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
+       so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
        so->so_state |= connstatus;
        ACCEPT_LOCK();
        if (connstatus) {
@@ -2116,6 +2122,8 @@
                                        error = ENOBUFS;
                                        goto bad;
                                }
+                               (sopt->sopt_name == SO_SNDBUF ? &so->so_snd :
+                                   &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE;
                                break;
 
                        /*

==== //depot/projects/linuxolator/src/sys/net80211/_ieee80211.h#4 (text+ko) ====

@@ -29,7 +29,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/net80211/_ieee80211.h,v 1.6 2007/01/15 01:12:28 sam Exp $
+ * $FreeBSD: src/sys/net80211/_ieee80211.h,v 1.7 2007/02/02 02:45:33 sam Exp $
  */
 #ifndef _NET80211__IEEE80211_H_
 #define _NET80211__IEEE80211_H_
@@ -186,6 +186,8 @@
        (((_c)->ic_flags & (IEEE80211_CHAN_QUARTER | IEEE80211_CHAN_HALF)) == 0)
 #define        IEEE80211_IS_CHAN_GSM(_c) \
        (((_c)->ic_flags & IEEE80211_CHAN_GSM) != 0)
+#define        IEEE80211_IS_CHAN_PASSIVE(_c) \
+       (((_c)->ic_flags & IEEE80211_CHAN_PASSIVE) != 0)
 
 /* ni_chan encoding for FH phy */
 #define        IEEE80211_FH_CHANMOD    80

==== //depot/projects/linuxolator/src/sys/netinet/tcp_input.c#7 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *     @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.311 2006/12/12 12:17:56 bz Exp $
+ * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.312 2007/02/01 18:32:13 andre Exp 
$
  */
 
 #include "opt_ipfw.h"          /* for ipfw_fwd         */
@@ -161,6 +161,18 @@
           &tcp_reass_overflows, 0,
           "Global number of TCP Segment Reassembly Queue Overflows");
 
+int    tcp_do_autorcvbuf = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
+          &tcp_do_autorcvbuf, 0, "Enable automatic receive buffer sizing");
+
+int    tcp_autorcvbuf_inc = 16*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
+          &tcp_autorcvbuf_inc, 0, "Incrementor step size of automatic receive 
buffer");
+
+int    tcp_autorcvbuf_max = 256*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
+          &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
+
 struct inpcbhead tcb;
 #define        tcb6    tcb  /* for KAME src sync over BSD*'s */
 struct inpcbinfo tcbinfo;
@@ -1295,6 +1307,8 @@
                } else if (th->th_ack == tp->snd_una &&
                    LIST_EMPTY(&tp->t_segq) &&
                    tlen <= sbspace(&so->so_rcv)) {
+                       int newsize = 0;        /* automatic sockbuf scaling */
+
                        KASSERT(headlocked, ("headlocked"));
                        INP_INFO_WUNLOCK(&tcbinfo);
                        headlocked = 0;
@@ -1321,18 +1335,78 @@
                        tcpstat.tcps_rcvpack++;
                        tcpstat.tcps_rcvbyte += tlen;
                        ND6_HINT(tp);   /* some progress has been done */
-                       /*
 #ifdef TCPDEBUG
                        if (so->so_options & SO_DEBUG)
                                tcp_trace(TA_INPUT, ostate, tp,
                                    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
-                        * Add data to socket buffer.
-                        */
+               /*
+                * Automatic sizing of receive socket buffer.  Often the send
+                * buffer size is not optimally adjusted to the actual network
+                * conditions at hand (delay bandwidth product).  Setting the
+                * buffer size too small limits throughput on links with high
+                * bandwidth and high delay (eg. trans-continental/oceanic 
links).
+                *
+                * On the receive side the socket buffer memory is only rarely
+                * used to any significant extent.  This allows us to be much
+                * more aggressive in scaling the receive socket buffer.  For
+                * the case that the buffer space is actually used to a large
+                * extent and we run out of kernel memory we can simply drop
+                * the new segments; TCP on the sender will just retransmit it
+                * later.  Setting the buffer size too big may only consume too
+                * much kernel memory if the application doesn't read() from
+                * the socket or packet loss or reordering makes use of the
+                * reassembly queue.
+                *
+                * The criteria to step up the receive buffer one notch are:
+                *  1. the number of bytes received during the time it takes
+                *     one timestamp to be reflected back to us (the RTT);
+                *  2. received bytes per RTT is within seven eighth of the
+                *     current socket buffer size;
+                *  3. receive buffer size has not hit maximal automatic size;
+                *
+                * This algorithm does one step per RTT at most and only if
+                * we receive a bulk stream w/o packet losses or reorderings.
+                * Shrinking the buffer during idle times is not necessary as
+                * it doesn't consume any memory when idle.
+                *
+                * TODO: Only step up if the application is actually serving
+                * the buffer to better manage the socket buffer resources.
+                */
+                       if (tcp_do_autorcvbuf &&
+                           to.to_tsecr &&
+                           (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
+                               if (to.to_tsecr > tp->rfbuf_ts &&
+                                   to.to_tsecr - tp->rfbuf_ts < hz) {
+                                       if (tp->rfbuf_cnt >
+                                           (so->so_rcv.sb_hiwat / 8 * 7) &&
+                                           so->so_rcv.sb_hiwat <
+                                           tcp_autorcvbuf_max) {
+                                               newsize =
+                                                   min(so->so_rcv.sb_hiwat +
+                                                   tcp_autorcvbuf_inc,
+                                                   tcp_autorcvbuf_max);
+                                       }
+                                       /* Start over with next RTT. */
+                                       tp->rfbuf_ts = 0;
+                                       tp->rfbuf_cnt = 0;
+                               } else
+                                       tp->rfbuf_cnt += tlen;  /* add up */
+                       }
+
+                       /* Add data to socket buffer. */
                        SOCKBUF_LOCK(&so->so_rcv);
                        if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
                                m_freem(m);
                        } else {
+                               /*
+                                * Set new socket buffer size.
+                                * Give up when limit is reached.
+                                */
+                               if (newsize)
+                                       if (!sbreserve_locked(&so->so_rcv,
+                                           newsize, so, curthread))
+                                               so->so_rcv.sb_flags &= 
~SB_AUTOSIZE;
                                m_adj(m, drop_hdrlen);  /* delayed header drop 
*/
                                sbappendstream_locked(&so->so_rcv, m);
                        }
@@ -1361,6 +1435,10 @@
        tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
        }
 
+       /* Reset receive buffer auto scaling when not in bulk receive mode. */
+       tp->rfbuf_ts = 0;
+       tp->rfbuf_cnt = 0;
+
        switch (tp->t_state) {
 
        /*

==== //depot/projects/linuxolator/src/sys/netinet/tcp_output.c#5 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *     @(#)tcp_output.c        8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.121 2006/10/22 11:52:16 rwatson 
Exp $
+ * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.122 2007/02/01 18:32:13 andre 
Exp $
  */
 
 #include "opt_inet.h"
@@ -110,6 +110,19 @@
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
        &tcp_do_tso, 0, "Enable TCP Segmentation Offload");
 
+int    tcp_do_autosndbuf = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
+       &tcp_do_autosndbuf, 0, "Enable automatic send buffer sizing");
+
+int    tcp_autosndbuf_inc = 8*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_RW,
+       &tcp_autosndbuf_inc, 0, "Incrementor step size of automatic send 
buffer");
+
+int    tcp_autosndbuf_max = 256*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW,
+       &tcp_autosndbuf_max, 0, "Max size of automatic send buffer");
+
+
 /*
  * Tcp output routine: figure out what should be sent and send it.
  */
@@ -380,11 +393,60 @@
                }
        }
 
+       /* len will be >= 0 after this point. */
+       KASSERT(len >= 0, ("%s: len < 0", __func__));
+
+       /*
+        * Automatic sizing of send socket buffer.  Often the send buffer
+        * size is not optimally adjusted to the actual network conditions
+        * at hand (delay bandwidth product).  Setting the buffer size too
+        * small limits throughput on links with high bandwidth and high
+        * delay (eg. trans-continental/oceanic links).  Setting the
+        * buffer size too big consumes too much real kernel memory,
+        * especially with many connections on busy servers.
+        *
+        * The criteria to step up the send buffer one notch are:
+        *  1. receive window of remote host is larger than send buffer
+        *     (with a fudge factor of 5/4th);
+        *  2. send buffer is filled to 7/8th with data (so we actually
+        *     have data to make use of it);
+        *  3. send buffer fill has not hit maximal automatic size;
+        *  4. our send window (slow start and cogestion controlled) is
+        *     larger than sent but unacknowledged data in send buffer.
+        *
+        * The remote host receive window scaling factor may limit the
+        * growing of the send buffer before it reaches its allowed
+        * maximum.
+        *
+        * It scales directly with slow start or congestion window
+        * and does at most one step per received ACK.  This fast
+        * scaling has the drawback of growing the send buffer beyond
+        * what is strictly necessary to make full use of a given
+        * delay*bandwith product.  However testing has shown this not
+        * to be much of an problem.  At worst we are trading wasting
+        * of available bandwith (the non-use of it) for wasting some
+        * socket buffer memory.
+        *
+        * TODO: Shrink send buffer during idle periods together
+        * with congestion window.  Requires another timer.  Has to
+        * wait for upcoming tcp timer rewrite.
+        */
+       if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
+               if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
+                   so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) &&
+                   so->so_snd.sb_cc < tcp_autosndbuf_max &&
+                   sendwin >= (so->so_snd.sb_cc - (tp->snd_nxt - 
tp->snd_una))) {
+                       if (!sbreserve_locked(&so->so_snd,
+                           min(so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
+                            tcp_autosndbuf_max), so, curthread))
+                               so->so_snd.sb_flags &= ~SB_AUTOSIZE;
+               }
+       }
+
        /*
-        * len will be >= 0 after this point.  Truncate to the maximum
-        * segment length or enable TCP Segmentation Offloading (if supported
-        * by hardware) and ensure that FIN is removed if the length no longer
-        * contains the last data byte.
+        * Truncate to the maximum segment length or enable TCP Segmentation
+        * Offloading (if supported by hardware) and ensure that FIN is removed
+        * if the length no longer contains the last data byte.
         *
         * TSO may only be used if we are in a pure bulk sending state.  The
         * presence of TCP-MD5, SACK retransmits, SACK advertizements and
@@ -606,6 +668,10 @@
                optlen += TCPOLEN_TSTAMP_APPA;
        }
 
+       /* Set receive buffer autosizing timestamp. */
+       if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE))
+               tp->rfbuf_ts = ticks;
+
 #ifdef TCP_SIGNATURE
 #ifdef INET6
        if (!isipv6)

==== //depot/projects/linuxolator/src/sys/netinet/tcp_syncache.c#5 (text+ko) 
====

@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.103 2006/12/13 06:00:56 csjp 
Exp $
+ * $FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.104 2007/02/01 17:39:18 andre 
Exp $
  */
 
 #include "opt_inet.h"
@@ -1014,9 +1014,15 @@
                if (to->to_flags & TOF_SCALE) {
                        int wscale = 0;
 
-                       /* Compute proper scaling value from buffer space */
+                       /*
+                        * Compute proper scaling value from buffer space.
+                        * Leave enough room for the socket buffer to grow
+                        * with auto sizing.  This allows us to scale the
+                        * receive buffer over a wide range while not losing
+                        * any efficiency or fine granularity.
+                        */
                        while (wscale < TCP_MAX_WINSHIFT &&
-                           (TCP_MAXWIN << wscale) < sb_hiwat)
+                           (0x1 << wscale) < tcp_minmss)
                                wscale++;
                        sc->sc_requested_r_scale = wscale;
                        sc->sc_requested_s_scale = to->to_requested_s_scale;

==== //depot/projects/linuxolator/src/sys/netinet/tcp_usrreq.c#4 (text+ko) ====

@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *     From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94
- * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.142 2006/11/22 17:16:54 sam Exp $
+ * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.144 2007/02/01 18:32:13 andre 
Exp $
  */
 
 #include "opt_inet.h"
@@ -1131,9 +1131,14 @@
        inp->inp_laddr = laddr;
        in_pcbrehash(inp);
 
-       /* Compute window scaling to request.  */
+       /*
+        * Compute window scaling to request:
+        * Scale to fit into sweet spot.  See tcp_syncache.c.
+        * XXX: This should move to tcp_output().
+        * XXX: This should be based on the actual MSS.
+        */
        while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
-           (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
+           (0x1 << tp->request_r_scale) < tcp_minmss)
                tp->request_r_scale++;
 
        soisconnecting(so);
@@ -1441,6 +1446,8 @@
                if (error)
                        return (error);
        }
+       so->so_rcv.sb_flags |= SB_AUTOSIZE;
+       so->so_snd.sb_flags |= SB_AUTOSIZE;
        INP_INFO_WLOCK(&tcbinfo);
        error = in_pcballoc(so, &tcbinfo);
        if (error) {

==== //depot/projects/linuxolator/src/sys/netinet/tcp_var.h#3 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *     @(#)tcp_var.h   8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.137 2006/09/13 13:08:27 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.138 2007/02/01 18:32:13 andre Exp $
  */
 
 #ifndef _NETINET_TCP_VAR_H_
@@ -202,6 +202,8 @@
                                           episode starts at this seq number */
        struct sackhint sackhint;       /* SACK scoreboard hint */
        int     t_rttlow;               /* smallest observerved RTT */
+       u_int32_t       rfbuf_ts;       /* recv buffer autoscaling timestamp */
+       int     rfbuf_cnt;              /* recv buffer autoscaling byte count */
 };
 
 #define IN_FASTRECOVERY(tp)    (tp->t_flags & TF_FASTRECOVERY)

==== //depot/projects/linuxolator/src/sys/sun4v/include/intr_machdep.h#3 
(text+ko) ====

@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/sun4v/include/intr_machdep.h,v 1.2 2007/01/19 11:15:33 
marius Exp $
+ * $FreeBSD: src/sys/sun4v/include/intr_machdep.h,v 1.3 2007/02/02 05:00:21 
kmacy Exp $
  */
 
 #ifndef        _MACHINE_INTR_MACHDEP_H_
@@ -46,6 +46,7 @@
 #define        PIL_RENDEZVOUS  3       /* smp rendezvous ipi */
 #define        PIL_AST         4       /* ast ipi */
 #define        PIL_STOP        5       /* stop cpu ipi */
+#define        PIL_PREEMPT     6       /* preempt idle thread cpu ipi */
 #define        PIL_FAST        13      /* fast interrupts */
 #define        PIL_TICK        14
 

==== //depot/projects/linuxolator/src/sys/sun4v/include/smp.h#4 (text+ko) ====

@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/sun4v/include/smp.h,v 1.3 2006/12/25 02:05:52 kmacy Exp $
+ * $FreeBSD: src/sys/sun4v/include/smp.h,v 1.4 2007/02/02 05:00:21 kmacy Exp $
  */
 
 #ifndef        _MACHINE_SMP_H_
@@ -44,7 +44,9 @@
 #define        IPI_AST         PIL_AST
 #define        IPI_RENDEZVOUS  PIL_RENDEZVOUS
 #define        IPI_STOP        PIL_STOP
+#define IPI_PREEMPT     PIL_PREEMPT
 
+
 #define        IPI_RETRIES     5000
 
 struct cpu_start_args {
@@ -79,6 +81,7 @@
 
 void cpu_ipi_ast(struct trapframe *tf);
 void cpu_ipi_stop(struct trapframe *tf);
+void cpu_ipi_preempt(struct trapframe *tf);
 
 void   ipi_selected(u_int cpus, u_int ipi);
 void   ipi_all(u_int ipi);

==== //depot/projects/linuxolator/src/sys/sun4v/sun4v/intr_machdep.c#4 
(text+ko) ====

@@ -59,7 +59,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/intr_machdep.c,v 1.3 2006/11/24 
05:27:49 kmacy Exp $");
+__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/intr_machdep.c,v 1.4 2007/02/02 
05:00:21 kmacy Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -114,7 +114,8 @@
        "rndzvs",       /* PIL_RENDEZVOUS */
        "ast",          /* PIL_AST */
        "stop",         /* PIL_STOP */
-       "stray", "stray", "stray", "stray", "stray", "stray", "stray",
+       "preempt",      /* PIL_PREEMPT */
+       "stray", "stray", "stray", "stray", "stray", "stray",
        "fast",         /* PIL_FAST */
        "tick",         /* PIL_TICK */
 };
@@ -266,6 +267,7 @@
        intr_handlers[PIL_AST] = cpu_ipi_ast;
        intr_handlers[PIL_RENDEZVOUS] = (ih_func_t *)smp_rendezvous_action;
        intr_handlers[PIL_STOP]= cpu_ipi_stop;
+       intr_handlers[PIL_PREEMPT]= cpu_ipi_preempt;
 #endif
        mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN);
        cpu_intrq_alloc();

==== //depot/projects/linuxolator/src/sys/sun4v/sun4v/mp_machdep.c#5 (text+ko) 
====

@@ -55,7 +55,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/mp_machdep.c,v 1.5 2006/12/17 01:31:56 
kmacy Exp $");
+__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/mp_machdep.c,v 1.6 2007/02/02 05:00:21 
kmacy Exp $");
 
 #include "opt_trap_trace.h"
 
@@ -456,6 +456,20 @@
 }
 
 void
+cpu_ipi_preempt(struct trapframe *tf)
+{
+       struct thread *running_thread = curthread;
+
+       mtx_lock_spin(&sched_lock);
+       if (running_thread->td_critnest > 1)
+               running_thread->td_owepreempt = 1;
+       else
+               mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+       mtx_unlock_spin(&sched_lock);
+
+}
+
+void
 cpu_ipi_selected(int cpu_count, uint16_t *cpulist, u_long d0, u_long d1, 
u_long d2, uint64_t *ackmask)
 {
 

==== //depot/projects/linuxolator/src/sys/sun4v/sun4v/tte.c#4 (text+ko) ====

@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/tte.c,v 1.3 2006/12/24 08:03:27 kmacy 
Exp $");
+__FBSDID("$FreeBSD: src/sys/sun4v/sun4v/tte.c,v 1.4 2007/02/02 04:57:11 kmacy 
Exp $");
 
 #include "opt_ddb.h"
 #include "opt_pmap.h"
@@ -74,7 +74,7 @@
                PMAP_LOCK(pmap);
                otte_data = tte_hash_clear_bits(pmap->pm_hash, pv->pv_va, 
flags);
                if ((matchbits = (otte_data & active_flags)) != 0) {
-                       if (matchbits == VTD_W) 
+                       if ((otte_data & (VTD_SW_W|VTD_W)) == (VTD_SW_W|VTD_W)) 
                                vm_page_dirty(m);
                        pmap_invalidate_page(pmap, pv->pv_va, TRUE);
                }

==== //depot/projects/linuxolator/src/sys/sys/socketvar.h#2 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *     @(#)socketvar.h 8.3 (Berkeley) 2/19/95
- * $FreeBSD: src/sys/sys/socketvar.h,v 1.154 2006/08/01 10:30:26 rwatson Exp $
+ * $FreeBSD: src/sys/sys/socketvar.h,v 1.155 2007/02/01 17:53:41 andre Exp $
  */
 
 #ifndef _SYS_SOCKETVAR_H_
@@ -128,6 +128,7 @@
 #define        SB_NOINTR       0x40            /* operations not interruptible 
*/
 #define SB_AIO         0x80            /* AIO operations queued */
 #define SB_KNOTE       0x100           /* kernel note attached */
+#define        SB_AUTOSIZE     0x800           /* automatically size socket 
buffer */
 
        void    (*so_upcall)(struct socket *, void *, int);
        void    *so_upcallarg;
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/p4-projects
To unsubscribe, send any mail to "[email protected]"

<Prev in Thread] Current Thread [Next in Thread>
  • PERFORCE change 113893 for review, Roman Divacky <=