netdev
[Top] [All Lists]

[Patch] fwmark on locally-originated packets

To: netdev@xxxxxxxxxxx
Subject: [Patch] fwmark on locally-originated packets
From: rochberg+l@xxxxxxxxxxxxxxx
Date: Tue, 11 Dec 2001 15:51:33 -0500
Sender: owner-netdev@xxxxxxxxxxx
This patch lets you set the fwmark for locally-originated packets on a
per-socket basis.  This means that with a little application tweaking
(add an ioctl call) you can control packet routing on a per-socket
basis.  Select QoS on each connection, load-balance by hand, slice,
dice!


I've written a patch to do this.  It does:

 1.  Add ioctls to set the fwmark for a socket

 2.  Make sure that the fwmark is passed to the routing functions

  2a.  Add new route function ip_route_output_sk which fetches necessary
       data out of sk (currently sk->bound_dev_if and sk->fwmark) and
       stuffs it into a route key

  2b.  Convert relevant calls to ip_route_output to use
       ip_route_output_sk

  2c.  Convert ip_route_connect to use ip_route_output_sk

 3.  Change ip_queue_xmit to copy skb->nfmark into sk->nfmark on
     outgoing packets

 4.  (unrelated bonus patch) Initialize key correctly in fib_frontend;
     the old "key.foo = bar...." lines didn't initialize fwmark.  The
     new initializer zeros all unused fields

Question 1:  Do I want to hook in at ip_queue_xmit, or is there a better
place?

Question 2:  Do I want to send this to some other mailing list (like
linux-net@vger)?

                                -david
patches follow

diff -X ~/dontdiff -Naur linux-2.4.16/include/linux/sockios.h 
linux-2.4.16-fwmark/include/linux/sockios.h
--- linux-2.4.16/include/linux/sockios.h        Tue Dec 11 14:47:10 2001
+++ linux-2.4.16-fwmark/include/linux/sockios.h Sat Dec  8 14:48:17 2001
@@ -105,6 +105,13 @@
 #define SIOCGIFVLAN    0x8982          /* 802.1Q VLAN support          */
 #define SIOCSIFVLAN    0x8983          /* Set 802.1Q VLAN options      */
 
+/* Set netfilter fwmark on packets for this connection  */ 
+#define SIOCSFWMARK     0x8984          /* Set netfilter fwmark on packets 
from this cxn  */
+#define SIOCGFWMARK     0x8985
+
+
+
+
 /* bonding calls */
 
 #define SIOCBONDENSLAVE        0x8990          /* enslave a device to the bond 
*/
diff -X ~/dontdiff -Naur linux-2.4.16/include/net/route.h 
linux-2.4.16-fwmark/include/net/route.h
--- linux-2.4.16/include/net/route.h    Tue Dec 11 14:47:27 2001
+++ linux-2.4.16-fwmark/include/net/route.h     Tue Dec 11 12:17:18 2001
@@ -27,6 +27,7 @@
 #include <linux/config.h>
 #include <net/dst.h>
 #include <net/inetpeer.h>
+#include <net/sock.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/route.h>
@@ -140,6 +141,17 @@
        return ip_route_output_key(rp, &key);
 }
 
+static inline int ip_route_output_sk(struct rtable **rp,
+                                     u32 daddr, u32 saddr, u32 tos, const 
struct sock *sk) 
+{
+        struct rt_key key = { dst:daddr, src:saddr, oif:sk->bound_dev_if, 
tos:tos,
+#if defined(CONFIG_NETFILTER) || defined(CONFIG_NETFILTER_MODULE)
+                              fwmark:sk->nfmark,
+#endif 
+        };
+        return ip_route_output_key(rp, &key);
+}
+
 
 static inline void ip_rt_put(struct rtable * rt)
 {
@@ -156,17 +168,17 @@
        return ip_tos2prio[IPTOS_TOS(tos)>>1];
 }
 
-static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 
tos, int oif)
+static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 
tos, const struct sock *sk)
 {
        int err;
-       err = ip_route_output(rp, dst, src, tos, oif);
+       err = ip_route_output_sk(rp, dst, src, tos, sk);
        if (err || (dst && src))
                return err;
        dst = (*rp)->rt_dst;
        src = (*rp)->rt_src;
        ip_rt_put(*rp);
        *rp = NULL;
-       return ip_route_output(rp, dst, src, tos, oif);
+       return ip_route_output_sk(rp, dst, src, tos, sk);
 }
 
 extern void rt_bind_peer(struct rtable *rt, int create);
diff -X ~/dontdiff -Naur linux-2.4.16/include/net/sock.h 
linux-2.4.16-fwmark/include/net/sock.h
--- linux-2.4.16/include/net/sock.h     Tue Dec 11 14:47:28 2001
+++ linux-2.4.16-fwmark/include/net/sock.h      Tue Dec 11 12:17:05 2001
@@ -602,6 +602,10 @@
        long                    rcvtimeo;
        long                    sndtimeo;
 
+#if defined(CONFIG_NETFILTER) || defined(CONFIG_NETFILTER_MODULE)
+        int                     nfmark;         /* Set nfmark on outgoing 
packets if non-zero */
+#endif 
+
 #ifdef CONFIG_FILTER
        /* Socket Filtering Instructions */
        struct sk_filter        *filter;
diff -X ~/dontdiff -Naur linux-2.4.16/net/ipv4/af_inet.c 
linux-2.4.16-fwmark/net/ipv4/af_inet.c
--- linux-2.4.16/net/ipv4/af_inet.c     Tue Dec 11 14:48:09 2001
+++ linux-2.4.16-fwmark/net/ipv4/af_inet.c      Mon Dec 10 16:50:07 2001
@@ -931,6 +931,23 @@
 #endif
                        return -ENOPKG;
 
+#if defined(CONFIG_NETFILTER) || defined(CONFIG_NETFILTER_MODULE)
+                case SIOCSFWMARK:
+                        err = get_user(sk->nfmark,(int *) arg);
+                        if (err) {
+                                return err;
+                        }
+                        sk_dst_reset(sk);
+                        break;
+                case SIOCGFWMARK:
+                        err = put_user(sk->nfmark,(int *) arg);
+                        if (err) {
+                                return err;
+                        }
+                        break;
+#endif 
+
+
                default:
                        if ((cmd >= SIOCDEVPRIVATE) &&
                            (cmd <= (SIOCDEVPRIVATE + 15)))
diff -X ~/dontdiff -Naur linux-2.4.16/net/ipv4/fib_frontend.c 
linux-2.4.16-fwmark/net/ipv4/fib_frontend.c
--- linux-2.4.16/net/ipv4/fib_frontend.c        Tue Dec 11 14:48:10 2001
+++ linux-2.4.16-fwmark/net/ipv4/fib_frontend.c Sat Dec  8 14:47:25 2001
@@ -207,17 +207,10 @@
                        struct net_device *dev, u32 *spec_dst, u32 *itag)
 {
        struct in_device *in_dev;
-       struct rt_key key;
+       struct rt_key key = { dst:src, src:dst, tos:tos, 
oif:0,iif:oif,scope:RT_SCOPE_UNIVERSE};
        struct fib_result res;
        int no_addr, rpf;
        int ret;
-
-       key.dst = src;
-       key.src = dst;
-       key.tos = tos;
-       key.oif = 0;
-       key.iif = oif;
-       key.scope = RT_SCOPE_UNIVERSE;
 
        no_addr = rpf = 0;
        read_lock(&inetdev_lock);
diff -X ~/dontdiff -Naur linux-2.4.16/net/ipv4/ip_output.c 
linux-2.4.16-fwmark/net/ipv4/ip_output.c
--- linux-2.4.16/net/ipv4/ip_output.c   Tue Dec 11 14:48:14 2001
+++ linux-2.4.16-fwmark/net/ipv4/ip_output.c    Sat Dec  8 14:47:25 2001
@@ -345,6 +345,12 @@
        struct rtable *rt;
        struct iphdr *iph;
 
+#if defined(CONFIG_NETFILTER) || defined(CONFIG_NETFILTER_MODULE)
+        if (sk->nfmark) {
+                skb->nfmark=sk->nfmark;
+        }
+#endif 
+
        /* Skip all of this if the packet is already routed,
         * f.e. by something like SCTP.
         */
@@ -366,9 +372,9 @@
                 * keep trying until route appears or the connection times 
itself
                 * out.
                 */
-               if (ip_route_output(&rt, daddr, sk->saddr,
+               if (ip_route_output_sk(&rt, daddr, sk->saddr,
                                    RT_CONN_FLAGS(sk),
-                                   sk->bound_dev_if))
+                                   sk))
                        goto no_route;
                __sk_dst_set(sk, &rt->u.dst);
                sk->route_caps = rt->u.dst.dev->features;
@@ -964,6 +970,7 @@
                        daddr = replyopts.opt.faddr;
        }
 
+        /* XXX should this use sk->oif ? */
        if (ip_route_output(&rt, daddr, rt->rt_spec_dst, 
RT_TOS(skb->nh.iph->tos), 0))
                return;
 
diff -X ~/dontdiff -Naur linux-2.4.16/net/ipv4/tcp_ipv4.c 
linux-2.4.16-fwmark/net/ipv4/tcp_ipv4.c
--- linux-2.4.16/net/ipv4/tcp_ipv4.c    Tue Dec 11 14:48:27 2001
+++ linux-2.4.16-fwmark/net/ipv4/tcp_ipv4.c     Sat Dec  8 14:47:25 2001
@@ -667,7 +667,7 @@
        }
 
        tmp = ip_route_connect(&rt, nexthop, sk->saddr,
-                              RT_CONN_FLAGS(sk), sk->bound_dev_if);
+                              RT_CONN_FLAGS(sk), sk);
        if (tmp < 0)
                return tmp;
 
@@ -1150,11 +1150,11 @@
        struct ip_options *opt;
 
        opt = req->af.v4_req.opt;
-       if(ip_route_output(&rt, ((opt && opt->srr) ?
+       if(ip_route_output_sk(&rt, ((opt && opt->srr) ?
                                 opt->faddr :
                                 req->af.v4_req.rmt_addr),
                           req->af.v4_req.loc_addr,
-                          RT_CONN_FLAGS(sk), sk->bound_dev_if)) {
+                          RT_CONN_FLAGS(sk), sk)) {
                IP_INC_STATS_BH(IpOutNoRoutes);
                return NULL;
        }
@@ -1733,7 +1733,7 @@
        /* Query new route. */
        err = ip_route_connect(&rt, daddr, 0,
                               RT_TOS(sk->protinfo.af_inet.tos)|sk->localroute,
-                              sk->bound_dev_if);
+                              sk);
        if (err)
                return err;
 
@@ -1781,8 +1781,8 @@
        if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr)
                daddr = sk->protinfo.af_inet.opt->faddr;
 
-       err = ip_route_output(&rt, daddr, sk->saddr,
-                             RT_CONN_FLAGS(sk), sk->bound_dev_if);
+       err = ip_route_output_sk(&rt, daddr, sk->saddr,
+                             RT_CONN_FLAGS(sk), sk);
        if (!err) {
                __sk_dst_set(sk, &rt->u.dst);
                sk->route_caps = rt->u.dst.dev->features;
diff -X ~/dontdiff -Naur linux-2.4.16/net/ipv4/udp.c 
linux-2.4.16-fwmark/net/ipv4/udp.c
--- linux-2.4.16/net/ipv4/udp.c Tue Dec 11 14:48:29 2001
+++ linux-2.4.16-fwmark/net/ipv4/udp.c  Sat Dec  8 14:47:25 2001
@@ -724,7 +724,7 @@
        sk_dst_reset(sk);
 
        err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
-                              RT_CONN_FLAGS(sk), sk->bound_dev_if);
+                              RT_CONN_FLAGS(sk), sk);
        if (err)
                return err;
        if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {

<Prev in Thread] Current Thread [Next in Thread>
  • [Patch] fwmark on locally-originated packets, rochberg+l <=