[PATCH 1/2] Babel: Add option to support ecmp routes

dxld at darkboxed.org dxld at darkboxed.org
Mon May 9 14:33:58 CEST 2022


Hi Toke,

On Sun, May 08, 2022 at 09:06:43PM +0200, Toke Høiland-Jørgensen wrote:
> Daniel Gröber <dxld at darkboxed.org> writes:
> > ---
> > The FIB walks in babel_reconfigure_iface are a bit ugly, I tried to
> > loop over ifa->neigh_list instead but couldn't get it to work. I'm
> > open to suggestions :)
> >
> >  doc/bird.sgml        |  8 +++++
> >  proto/babel/babel.c  | 70 +++++++++++++++++++++++++++++++++++++-------
> >  proto/babel/babel.h  |  3 ++
> >  proto/babel/config.Y |  3 ++
> >  4 files changed, 73 insertions(+), 11 deletions(-)
> >
> > diff --git a/doc/bird.sgml b/doc/bird.sgml
> > index 1580facd..5e85d8ec 100644
> > --- a/doc/bird.sgml
> > +++ b/doc/bird.sgml
> > @@ -1879,6 +1879,7 @@ protocol babel [<name>] {
> >  		check link <switch>;
> >  		next hop ipv4 <address>;
> >  		next hop ipv6 <address>;
> > +		ecmp <switch> [limit <num>];
> >  		authentication none|mac [permissive];
> >  		password "<text>";
> >  		password "<text>" {
> > @@ -1983,6 +1984,13 @@ protocol babel [<name>] {
> >        source for Babel packets will be used. In normal operation, it should not
> >        be necessary to set this option.
> >  
> > +      <tag><label id="babel-ecmp">ecmp <m>switch</m> [limit <m>number</m>]</tag>
> > +
> > +      Determines whether babel will emit ECMP (equal-cost multipath)
> > +      routes, allowing to load-balancing traffic across multiple paths. If
> > +      enabled the maximum number of next-hops to allow can be specified,
> > +      defaulting to 16.
> 
> This should probably explain the constraints: i.e., that only
> equal-weight routes are added (which in practice limits it to interfaces
> of type 'wired', or wireless interfaces with no packet loss, I suppose?)
> and that the routes all have to be feasible (though I'm not sure if we
> explain feasibility in the docs anywhere).

Right, I have this now:

      When neibours are using a dynamic link-quality metric this is
      unlikely to be useful. For best results <ref id="babel-type"
      name="type wired"> should be used throughout the network to get what
      amounts to a hop count metric.

Once RTT metric are merged I'm going to add another paragraph touting the
virtues of using (the equivalents of rtt-min/max from babeld) to kick
bufferbloated or weirdly re-routed paths out of the ECMP set. That's my
main use-case anyway.

> >        <tag><label id="babel-authentication">authentication none|mac [permissive]</tag>
> >        Selects authentication method to be used. <cf/none/ means that packets
> >        are not authenticated at all, <cf/mac/ means MAC authentication is
> > diff --git a/proto/babel/babel.c b/proto/babel/babel.c
> > index 4a7d550f..4d9c657c 100644
> > --- a/proto/babel/babel.c
> > +++ b/proto/babel/babel.c
> > @@ -623,6 +623,34 @@ done:
> >    }
> >  }
> >  
> > +/**
> > + * babel_nexthop_insert - add next_hop of route to nexthop list
> > + * @p: Babel protocol instance
> > + * @r: Babel route
> > + * @nhs: nexthop list head to append onto
> > + * @nh: freshly allocated buffer to fill
> > + */
> > +static void
> > +babel_nexthop_insert(
> > +  struct babel_proto *p,
> > +  struct babel_route *r,
> > +  struct nexthop **nhs,
> > +  struct nexthop *nh)
> > +{
> > +  nh->gw = r->next_hop;
> > +  nh->iface = r->neigh->ifa->iface;
> > +
> > +  /*
> > +   * If we cannot find a reachable neighbour, set the entry to be onlink. This
> > +   * makes it possible to, e.g., assign /32 addresses on a mesh interface and
> > +   * have routing work.
> > +   */
> > +  if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0))
> > +    nh->flags = RNF_ONLINK;
> > +
> > +  nexthop_insert(nhs, nh);
> > +}
> > +
> >  /**
> >   * babel_announce_rte - announce selected route to the core
> >   * @p: Babel protocol instance
> > @@ -635,6 +663,7 @@ done:
> >  static void
> >  babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
> >  {
> > +  struct babel_config *cf = (void *) p->p.cf;
> >    struct babel_route *r = e->selected;
> >    struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
> >  
> > @@ -645,18 +674,24 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
> >        .source = RTS_BABEL,
> >        .scope = SCOPE_UNIVERSE,
> >        .dest = RTD_UNICAST,
> > -      .from = r->neigh->addr,
> > -      .nh.gw = r->next_hop,
> > -      .nh.iface = r->neigh->ifa->iface,
> >      };
> >  
> > -    /*
> > -     * If we cannot find a reachable neighbour, set the entry to be onlink. This
> > -     * makes it possible to, e.g., assign /32 addresses on a mesh interface and
> > -     * have routing work.
> > -     */
> > -    if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0))
> > -      a0.nh.flags = RNF_ONLINK;
> > +    struct nexthop *nhs = NULL;
> > +    babel_nexthop_insert(p, r, &nhs, allocz(sizeof(struct nexthop)));
> > +    int num_nexthops = 1;
> > +
> > +    struct babel_route *cr;
> > +    WALK_LIST(cr, e->routes) {
> > +      if (num_nexthops++ >= cf->ecmp)
> > +	break;
> > +
> > +      if (cr == r || !cr->feasible || cr->metric != r->metric)
> > +	continue;
> 
> You should probably switch the order of those two checks? You're
> increasing the counter even if the route is subsequently discarded...

Whoops, last minute change and brainfart.

> > +
> > +      babel_nexthop_insert(p, cr, &nhs, allocz(sizeof(struct nexthop)));
> > +    }
> > +
> > +    a0.nh = *nhs;
> >  
> >      rta *a = rta_lookup(&a0);
> >      rte *rte = rte_get_temp(a);
> > @@ -736,6 +771,7 @@ babel_announce_retraction(struct babel_proto *p, struct babel_entry *e)
> >  static void
> >  babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod)
> >  {
> > +  struct babel_config *cf = (void *) p->p.cf;
> >    struct babel_route *r, *best = e->selected;
> >  
> >    /* Shortcut if only non-best was modified */
> > @@ -744,8 +780,10 @@ babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_ro
> >      /* Either select modified route, or keep old best route */
> >      if ((mod->metric < (best ? best->metric : BABEL_INFINITY)) && mod->feasible)
> >        best = mod;
> > -    else
> > +    else if (cf->ecmp==1)
> 
> This looks like you're returning if ECMP is enabled; I think finding a
> better name for that config variable would be good; maybe just
> "max_nexthops"?

Yeah that's neater. Fixed.

> >        return;
> > +    /* With ecmp one of the non-selected but equal metric routes might have
> > +     * changed so contine on with the announcement in that case. */
> >    }
> >    else
> >    {
> > @@ -1879,6 +1917,16 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b
> >    if (ifa->up)
> >      babel_iface_kick_timer(ifa);
> >  
> > +  /* Update all routes to refresh ecmp settings. */
> > +  FIB_WALK(&p->ip6_rtable, struct babel_entry, e) {
> > +    babel_select_route(p, e, NULL);
> > +  }
> > +  FIB_WALK_END;
> > +  FIB_WALK(&p->ip4_rtable, struct babel_entry, e) {
> > +    babel_select_route(p, e, NULL);
> > +  }
> > +  FIB_WALK_END;
> > +
> 
> I don't think this is the right thing to do; you should probably just
> refuse the reconfiguration if ECMP settings changed (see the 0-return at
> the top when TOS changes)?

Why not? Works fine AFAICT. I tested the reload stuff and the nexthops get
updated when the limit changes just as I would expect. If I refuse the
reconfiguration the user would have to restart bird to get this to apply,
right? I don't want that.

> >    return 1;
> >  }
> >  
> > diff --git a/proto/babel/babel.h b/proto/babel/babel.h
> > index 84feb085..3868dcb2 100644
> > --- a/proto/babel/babel.h
> > +++ b/proto/babel/babel.h
> > @@ -62,6 +62,8 @@
> >  #define BABEL_OVERHEAD		(IP6_HEADER_LENGTH+UDP_HEADER_LENGTH)
> >  #define BABEL_MIN_MTU		(512 + BABEL_OVERHEAD)
> >  
> > +#define BABEL_DEFAULT_ECMP_LIMIT	16
> > +
> >  #define BABEL_AUTH_NONE			0
> >  #define BABEL_AUTH_MAC			1
> >  
> > @@ -120,6 +122,7 @@ struct babel_config {
> >    list iface_list;			/* List of iface configs (struct babel_iface_config) */
> >    uint hold_time;			/* Time to hold stale entries and unreachable routes */
> >    u8 randomize_router_id;
> > +  u8 ecmp;                              /* Maximum number of nexthops to install */
> 
> See above re: the naming of this; 'max_nexthops'?
> 
> >    struct channel_config *ip4_channel;
> >    struct channel_config *ip6_channel;
> > diff --git a/proto/babel/config.Y b/proto/babel/config.Y
> > index 05210fa4..8f18c790 100644
> > --- a/proto/babel/config.Y
> > +++ b/proto/babel/config.Y
> > @@ -36,6 +36,7 @@ babel_proto_start: proto_start BABEL
> >    this_proto = proto_config_new(&proto_babel, $1);
> >    init_list(&BABEL_CFG->iface_list);
> >    BABEL_CFG->hold_time = 1 S_;
> > +  BABEL_CFG->ecmp = 1;
> >  };
> >  
> >  babel_proto_item:
> > @@ -43,6 +44,8 @@ babel_proto_item:
> >   | proto_channel
> >   | INTERFACE babel_iface
> >   | RANDOMIZE ROUTER ID bool { BABEL_CFG->randomize_router_id = $4; }
> > + | ECMP bool { BABEL_CFG->ecmp = $2 ? BABEL_DEFAULT_ECMP_LIMIT : 1; }
> > + | ECMP bool LIMIT expr { BABEL_CFG->ecmp = $2 ? $4 : 1; }
> >   ;
> >  
> >  babel_proto_opts:
> > -- 
> > 2.30.2

--Daniel


More information about the Bird-users mailing list