[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

(usagi-users 04088) Re: Source address/interface selection patch



Hi YOSHIFUJI-san

I'm writing about what we talked about yesterday.
This mail is a reminder.


I found a strange behavior of bonding driver with IPv6
while I tested it.

In my test, the bonding device(bond0) consists of two
e1000 NIC(eth0, eth1).

I specified GA of bond0 as ping6's -I option.
But the ping6 packet was sent out from "eth0".

when I used IPv6 on kernel 2.6.17 or later,

  # ping6 -I2001:0:0:15::c ff02::2
  # tcpdump -ibond0

     < don't show nothing... >

On the other hand, when I used IPv4, ping packet was sent from bond0.

  # ping -I192.168.0.10 224.0.0.2
  # tcpdump -i bond0
  listening on bond0, link-type EN10MB (Ethernet), capture size 96 bytes
  06:29:20.682640 IP 192.168.0.10 > ALL-ROUTERS.MCAST.NET: ICMP echo request, id 61964, seq 150, length 64


and when I used IPv6 on kernel 2.6.16 or below,
the ping6 packet also was sent out from bond0.


I think it's correct that ping6 packet is sent out from bond0.
I attach a patch that sends packet from bond0.

Especially, could you please check the rt6_check_dev().
because I worry that calculation of the score is wrong.


Regards,
Naohiro Ooiwa


Naohiro Ooiwa wrote::
> Hi YOSHIFUJI-san
> 
> I haven't received your reply yet.
> I know you are busy, but I would greatly appreciate a prompt reply.
> 
> If there are any problems to my patch,
> please let me know.
> 
> 
> Regards,
> Naohiro Ooiwa
> 
> 
> Naohiro Ooiwa wrote::
>> Hi YOSHIFUJI-san
>>
>> Thank you for creating the patch.
>> and I am sorry for late reply.
>>
>> I tested your patch on 2.6.27 kernel.
>> I found some mistake in it so I fixed.
>>
>> Although I haven't understood everything yet,
>> but this fix works well when I don't separate iif and oif.
>>
>> If there is any reason you separate the arguments(ie. iif/oif) in ip6_pol_route(),
>> Could you please explain to me why?
>>
>> If my patch is reasonable,
>> I'll test it w/ TAHI test suite on 2.6.27.
>>
>>
>> Regards,
>> Naohiro Ooiwa


Signed-off-by: Naohiro Ooiwa <nooiwa@xxxxxxxxxxxxxxxx>
---
 net/ipv6/route.c |  119 +++++++++++++++++++++++++----------------------------
 1 files changed, 56 insertions(+), 63 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d69fa46..a4cfb8a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -232,52 +232,54 @@ static inline int rt6_need_strict(struct in6_addr *daddr)
 }

 /*
+ * Default Router Selection (RFC 2461 6.3.6)
+ */
+static inline int rt6_check_dev(struct rt6_info *rt, int dif,
+				struct in6_addr *saddr,
+				int flags)
+{
+	struct net_device *dev = rt->rt6i_dev;
+	int score = 0;
+
+	if (!dif || dev->ifindex == dif)
+		score |= 1;
+	if ((dev->flags & IFF_LOOPBACK) && rt->rt6i_idev &&
+	    (!dif || rt->rt6i_idev->dev->ifindex == dif))
+		score |= 2;
+
+	if (!dif && saddr && ipv6_chk_addr(dev_net(dev), saddr, dev,
+					   flags & RT6_LOOKUP_F_IFACE))
+		score |= 4;
+
+	return score;
+}
+
+/*
  *	Route lookup. Any table->tb6_lock is implied.
  */

 static inline struct rt6_info *rt6_device_match(struct net *net,
-						    struct rt6_info *rt,
-						    struct in6_addr *saddr,
-						    int oif,
-						    int flags)
+						struct rt6_info *rt,
+						struct in6_addr *saddr,
+						int oif,
+						int flags)
 {
-	struct rt6_info *local = NULL;
+	struct rt6_info *best = net->ipv6.ip6_null_entry;
 	struct rt6_info *sprt;
+	int score, hiscore = -1;

 	if (!oif && ipv6_addr_any(saddr))
 		goto out;

 	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
-		struct net_device *dev = sprt->rt6i_dev;
-
-		if (oif) {
-			if (dev->ifindex == oif)
-				return sprt;
-			if (dev->flags & IFF_LOOPBACK) {
-				if (sprt->rt6i_idev == NULL ||
-				    sprt->rt6i_idev->dev->ifindex != oif) {
-					if (flags & RT6_LOOKUP_F_IFACE && oif)
-						continue;
-					if (local && (!oif ||
-						      local->rt6i_idev->dev->ifindex == oif))
-						continue;
-				}
-				local = sprt;
-			}
-		} else {
-			if (ipv6_chk_addr(net, saddr, dev,
-					  flags & RT6_LOOKUP_F_IFACE))
-				return sprt;
+		score = rt6_check_dev(sprt, oif, saddr, flags);
+		if (score == 3)
+			break;
+		if (score > hiscore) {
+			hiscore = score;
+			best = sprt;
 		}
 	}
-
-	if (oif) {
-		if (local)
-			return local;
-
-		if (flags & RT6_LOOKUP_F_IFACE)
-			return net->ipv6.ip6_null_entry;
-	}
 out:
 	return rt;
 }
@@ -318,20 +320,6 @@ static inline void rt6_probe(struct rt6_info *rt)
 }
 #endif

-/*
- * Default Router Selection (RFC 2461 6.3.6)
- */
-static inline int rt6_check_dev(struct rt6_info *rt, int oif)
-{
-	struct net_device *dev = rt->rt6i_dev;
-	if (!oif || dev->ifindex == oif)
-		return 2;
-	if ((dev->flags & IFF_LOOPBACK) &&
-	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
-		return 1;
-	return 0;
-}
-
 static inline int rt6_check_neigh(struct rt6_info *rt)
 {
 	struct neighbour *neigh = rt->rt6i_nexthop;
@@ -355,12 +343,12 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
 	return m;
 }

-static int rt6_score_route(struct rt6_info *rt, int oif,
-			   int strict)
+static int rt6_score_route(struct rt6_info *rt, int dif,
+			   struct in6_addr *saddr, int strict)
 {
 	int m, n;

-	m = rt6_check_dev(rt, oif);
+	m = rt6_check_dev(rt, dif, saddr, strict);
 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 		return -1;
 #ifdef CONFIG_IPV6_ROUTER_PREF
@@ -372,7 +360,8 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
 	return m;
 }

-static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
+static struct rt6_info *find_match(struct rt6_info *rt, int dif,
+				   struct in6_addr *saddr, int strict,
 				   int *mpri, struct rt6_info *match)
 {
 	int m;
@@ -380,7 +369,7 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 	if (rt6_check_expired(rt))
 		goto out;

-	m = rt6_score_route(rt, oif, strict);
+	m = rt6_score_route(rt, dif, saddr, strict);
 	if (m < 0)
 		goto out;

@@ -399,7 +388,10 @@ out:

 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 				     struct rt6_info *rr_head,
-				     u32 metric, int oif, int strict)
+				     u32 metric,
+				     int dif,
+				     struct in6_addr *saddr,
+				     int strict)
 {
 	struct rt6_info *rt, *match;
 	int mpri = -1;
@@ -407,15 +399,16 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 	match = NULL;
 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 	     rt = rt->u.dst.rt6_next)
-		match = find_match(rt, oif, strict, &mpri, match);
+		match = find_match(rt, dif, saddr, strict, &mpri, match);
 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 	     rt = rt->u.dst.rt6_next)
-		match = find_match(rt, oif, strict, &mpri, match);
+		match = find_match(rt, dif, saddr, strict, &mpri, match);

 	return match;
 }

-static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+static struct rt6_info *rt6_select(struct fib6_node *fn, int dif,
+				   struct in6_addr *saddr, int strict)
 {
 	struct rt6_info *match, *rt0;
 	struct net *net;
@@ -427,8 +420,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 	if (!rt0)
 		fn->rr_ptr = rt0 = fn->leaf;

-	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
-
+	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, dif, saddr, strict);
 	if (!match &&
 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
 		struct rt6_info *next = rt0->u.dst.rt6_next;
@@ -668,8 +660,9 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 	return rt;
 }

-static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
-				      struct flowi *fl, int flags)
+static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+				      struct flowi *fl,
+				      int dif, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt, *nrt;
@@ -687,7 +680,7 @@ restart_2:
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);

 restart:
-	rt = rt6_select(fn, oif, strict | reachable);
+	rt = rt6_select(fn, dif, &fl->fl6_src, strict | reachable);

 	BACKTRACK(net, &fl->fl6_src);
 	if (rt == net->ipv6.ip6_null_entry ||
@@ -744,7 +737,7 @@ out2:
 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 					    struct flowi *fl, int flags)
 {
-	return ip6_pol_route(net, table, fl->iif, fl, flags);
+	return ip6_pol_route(net, table, fl, fl->iif, flags);
 }

 void ip6_route_input(struct sk_buff *skb)
@@ -774,7 +767,7 @@ void ip6_route_input(struct sk_buff *skb)
 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 					     struct flowi *fl, int flags)
 {
-	return ip6_pol_route(net, table, fl->oif, fl, flags);
+	return ip6_pol_route(net, table, fl, fl->oif, flags);
 }

 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
-- 1.5.4.1