[email protected]
[Top] [All Lists]

PERFORCE change 113838 for review

Subject: PERFORCE change 113838 for review
From: Robert Watson
Date: Thu, 1 Feb 2007 15:23:34 GMT
http://perforce.freebsd.org/chv.cgi?CH=113838

Change 113838 by [email protected]_cinnamon on 2007/02/01 15:23:29

        Add bpf_buffree() buffer provider function: with zero-copy and
        shared memory ACKing of user space being done with a buffer, BPF
        needs to detect when the ACK has happened so it can rotate the
        buffers making the held buffer free.  We call this in catchpacket()
        so that the free buffer is checked for whenever we have a new
        packet.  We might want to revise the placement.  Only zero-copy
        buffers implement this function.
        
        Add bpf_bufheld() buffer provider function: this is used by BPF to
        notify a buffer provider that a buffer has become "held" -- i.e.,
        that a store buffer has completed.  The buffer provider can then do
        whatever it wants to -- in the case of the zero-copy provider, it
        will update the buffer header.
        
        Modify the zero-copy provider to place a struct bpf_zbuf_header at
        the front of both shared buffer pages.  This contains three fields
        currently: a kernel generation number, a kernel buffer length value,
        and a user generation number (and padding up to 32 bytes).  When
        the two generation numbers are equal, the buffer is owned by the 
        kernel.  When the kernel completes a buffer, it increments the
        buffer version number, which can be detected by user space.  When
        user space is done with the buffer, it increments its version
        numbers.  This requires refinement to use volatile and memory 
        arriers.  The current buffer length is written by the kernel a 
        rotation time so that user space knows how much data is in the 
        buffer.  As with the explicit ioctl-based ACKing, buffers owned by
        user space are static until acknowledged.
        
        Add descriptor locking assertions at several points.
        
        NOTE: This changes the ABI/binary layout of shared memory buffers.

Affected files ...

.. //depot/projects/zcopybpf/src/sys/net/bpf.c#9 edit
.. //depot/projects/zcopybpf/src/sys/net/bpf.h#5 edit
.. //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.c#8 edit
.. //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.h#2 edit

Differences ...

==== //depot/projects/zcopybpf/src/sys/net/bpf.c#9 (text+ko) ====

@@ -165,6 +165,8 @@
     u_int len)
 {
 
+       BPFD_LOCK_ASSERT(d);
+
        switch (d->bd_bufmode) {
        case BPF_BUFMODE_BUFFER:
                return (bpf_buffer_append_bytes(d, buf, offset, src, len));
@@ -185,6 +187,8 @@
     u_int len)
 {
 
+       BPFD_LOCK_ASSERT(d);
+
        switch (d->bd_bufmode) {
        case BPF_BUFMODE_BUFFER:
                return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
@@ -200,6 +204,41 @@
        }
 }
 
+/*
+ * If the buffer mechanism has a way to decide that a held buffer can be made
+ * free, then it is exposed via the bpf_buffree() interface.  (1) is returned
+ * if the buffer can be discarded, (0) is returned if it cannot.
+ */
+static int
+bpf_buffree(struct bpf_d *d)
+{
+
+       BPFD_LOCK_ASSERT(d);
+
+       switch (d->bd_bufmode) {
+#ifdef BPF_ZEROCOPY
+       case BPF_BUFMODE_ZBUF:
+               return (bpf_zerocopy_buffree(d));
+#endif
+       }
+       return (0);
+}
+
+void
+bpf_bufheld(struct bpf_d *d)
+{
+
+       BPFD_LOCK_ASSERT(d);
+
+       switch (d->bd_bufmode) {
+#ifdef BPF_ZEROCOPY
+       case BPF_BUFMODE_ZBUF:
+               bpf_zerocopy_bufheld(d);
+               break;
+#endif
+       }
+}
+
 static void
 bpf_free(struct bpf_d *d)
 {
@@ -1634,6 +1673,20 @@
        int do_wakeup = 0;
 
        BPFD_LOCK_ASSERT(d);
+
+       /*
+        * Detect whether user space has released a buffer back to us, and if
+        * so, move it from being a hold buffer to a free buffer.  This may
+        * not be the best place to do it (for example, we might only want to
+        * run this check if we need the space), but for now it's a reliable
+        * spot to do it.
+        */
+       if (bpf_buffree(d)) {
+               d->bd_fbuf = d->bd_hbuf;
+               d->bd_hbuf = NULL;
+               d->bd_hlen = 0;
+       }
+
        /*
         * Figure out how many bytes to move.  If the packet is
         * greater or equal to the snapshot length, transfer that

==== //depot/projects/zcopybpf/src/sys/net/bpf.h#5 (text+ko) ====

@@ -175,6 +175,21 @@
 #endif
 
 /*
+ * When using zero-copy BPF buffers, a shared memory header is present
+ * allowing the kernel BPF implementation and user process to synchronize
+ * without using system calls.  This structure defines that header.
+ *
+ * The layout of this structure is critical, and must not be changed; if must
+ * fit in a single page on all architectures.
+ */
+struct bpf_zbuf_header {
+       u_int   bzh_kernel_gen;         /* Kernel generation number. */
+       u_int   bzh_kernel_len;         /* Length of buffer. */
+       u_int   bzh_user_gen;           /* User generation number. */
+       u_int   _bzh_pad[5];            /* Padding out to 32-byte boundary. */
+};
+
+/*
  * Data-link level type codes.
  */
 #define DLT_NULL       0       /* BSD loopback encapsulation */
@@ -673,6 +688,7 @@
        (d)->bd_sbuf = (d)->bd_fbuf;                                    \
        (d)->bd_slen = 0;                                               \
        (d)->bd_fbuf = NULL;                                            \
+       bpf_bufheld(d);                                                 \
 } while (0)
 
 /*
@@ -687,6 +703,7 @@
        struct mtx      bif_mtx;        /* mutex for interface */
 };
 
+void    bpf_bufheld(struct bpf_d *d);
 int     bpf_validate(const struct bpf_insn *, int);
 void    bpf_tap(struct bpf_if *, u_char *, u_int);
 void    bpf_mtap(struct bpf_if *, struct mbuf *);

==== //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.c#8 (text+ko) ====

@@ -75,12 +75,18 @@
  * be mapped contiguously in the kernel (i.e., a set of physically
  * non-contiguous pages in the direct map region) so we must implement
  * scatter-gather copying.
+ *
+ * At the front of the shared memor region is a bpf_zbuf_header, which
+ * contains shared control data to allow user space and the kernel to
+ * synchronize; this is included in zb_size, but not bpf_bufsize, so that BPF
+ * knows that the space is not available.
  */
 struct zbuf {
        vm_offset_t      zb_uaddr;      /* User address, may be stale. */
-       u_int            zb_size;       /* Size in bytes. */
+       u_int            zb_size;       /* Size of buffer, incl. header. */
        u_int            zb_numpages;   /* Number of pages. */
        struct sf_buf   **zb_pages;     /* Pages themselves. */
+       struct bpf_zbuf_header  *zb_header;     /* Shared header. */
 };
 
 /*
@@ -219,6 +225,9 @@
                        goto error;
                }
        }
+       zb->zb_header =
+           (struct bpf_zbuf_header *)sf_buf_kva(zb->zb_pages[0]);
+       bzero(zb->zb_header, sizeof(*zb->zb_header));
        *zbp = zb;
        return (0);
 
@@ -253,6 +262,7 @@
         * Scatter-gather copy to user pages mapped into kernel address space
         * using sf_bufs: copy up to a page at a time.
         */
+       offset += sizeof(struct bpf_zbuf_header);
        page = offset / PAGE_SIZE;
        poffset = offset % PAGE_SIZE;
        while (len > 0) {
@@ -298,6 +308,9 @@
        // printf("bpf_zerocopy_append_mbuf(d: %p, buf: %p, offset: %d, src "
        //     "%p, len: %d)\n", d, buf, offset, src, len);
 
+       m = (struct mbuf *)src;
+       zb = (struct zbuf *)buf;
+
        /*
         * Scatter gather both from an mbuf chain and to a user page set
         * mapped into kernel address space using sf_bufs.  If we're lucky,
@@ -305,11 +318,10 @@
         * mbuf alignment work out less well, we'll be doing two copies per
         * mbuf.
         */
+       offset += sizeof(struct bpf_zbuf_header);
        page = offset / PAGE_SIZE;
        poffset = offset % PAGE_SIZE;
        moffset = 0;
-       m = (struct mbuf *)src;
-       zb = (struct zbuf *)buf;
        while (len > 0) {
                KASSERT(page < zb->zb_numpages,
                    ("bpf_zerocopy_append_mbuf: page overflow (%d p %d "
@@ -341,6 +353,54 @@
 }
 
 /*
+ * Notification from the BPF framework that a buffer has moved into the held
+ * slot on a descriptor.  Zero-copy BPF will update the shared page to let
+ * the user process know.
+ *
+ * XXXRW: Do we need to use a memory barrier, atomic operation, or the like
+ * to make sure that the generation update is the last write to make it out
+ * after any packet date so that user space sees the generation increase only
+ * at or after the last packet data change?
+ */
+void
+bpf_zerocopy_bufheld(struct bpf_d *d)
+{
+       struct zbuf *zb;
+
+       KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
+           ("bpf_zerocopy_bufheld: not in zbuf mode"));
+
+       zb = (struct zbuf *)d->bd_hbuf;
+       KASSERT(zb != NULL, ("bpf_zerocopy_bufheld: zb == NULL"));
+       zb->zb_header->bzh_kernel_gen++;
+       zb->zb_header->bzh_kernel_len = d->bd_hlen;
+}
+
+/*
+ * Query from the BPF framework regarding whether the buffer currently in the
+ * held position can be moved to the free position, which can be indicated by
+ * the user process making their generation number equal to the kernel
+ * generation number.
+ *
+ * XXXRW: Memory ordering also an issue here?
+ */
+int
+bpf_zerocopy_buffree(struct bpf_d *d)
+{
+       struct zbuf *zb;
+
+       KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
+           ("bpf_zerocopy_buffree: not in zbuf mode"));
+
+       zb = (struct zbuf *)d->bd_hbuf;
+       if (zb == NULL)
+               return (0);
+       if (zb->zb_header->bzh_kernel_gen == zb->zb_header->bzh_user_gen)
+               return (1);
+       return (0);
+}
+
+/*
  * Free zero copy buffers at request of descriptor.
  */
 void
@@ -416,6 +476,7 @@
                BPFD_UNLOCK(d);
                return (EINVAL);
        }
+       zb->zb_header->bzh_user_gen = zb->zb_header->bzh_kernel_gen;
        d->bd_fbuf = d->bd_hbuf;
        d->bd_hbuf = NULL;
        d->bd_hlen = 0;
@@ -586,7 +647,12 @@
        d->bd_sbuf = (caddr_t)zba;
        d->bd_slen = 0;
        d->bd_hlen = 0;
-       d->bd_bufsize = bz->bz_buflen;
+
+       /*
+        * We expose only the space left in the buffer after the size of the
+        * shared management region.
+        */
+       d->bd_bufsize = bz->bz_buflen - sizeof(struct bpf_zbuf_header);
        BPFD_UNLOCK(d);
        return (0);
 }

==== //depot/projects/zcopybpf/src/sys/net/bpf_zerocopy.h#2 (text+ko) ====

@@ -40,6 +40,8 @@
            void *src, u_int len);
 void   bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
            void *src, u_int len);
+void   bpf_zerocopy_bufheld(struct bpf_d *);
+int    bpf_zerocopy_buffree(struct bpf_d *);
 void   bpf_zerocopy_free(struct bpf_d *d);
 int    bpf_zerocopy_ioctl_ackzbuf(struct thread *td, struct bpf_d *d,
            struct bpf_zbuf *bz);
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/p4-projects
To unsubscribe, send any mail to "[email protected]"

<Prev in Thread] Current Thread [Next in Thread>
  • PERFORCE change 113838 for review, Robert Watson <=