[PATCH v3] Babel: Replace internal route selection by bird's nest
Toke Høiland-Jørgensen
toke at toke.dk
Tue Jan 31 12:38:25 CET 2023
Daniel Gröber <dxld at darkboxed.org> writes:
> This allows for filtering routes from specific interfaces and
> neighbours. With the current internal route selection proto babel exports
> only up to one route and an admin cannot do fine-grained filtering.
>
> To fix this we rip out the internal route selection entirely and put them
> all into the bird's nest instead.
I like the idea of enabling route filtering of incoming routes!
> This appears to not actually be a breaking change as route announcement was
> already based on which routes bird would send back to babel_rt_notify
> filters shouldn't be able to tell the difference between a single and
> multiple routes AFAIK.
Can we be a bit more sure than "appears"? :)
Just to make sure I'm understanding correctly: this relies on the nest
using babel_rte_better() to select the route with the lowest metric from
all the ones we announce, right? And if there's no filter, functionality
will be equivalent to before this patch?
Couple of comments on the code below:
> Changes in v3:
> - Subsume FIB_RESTART v2 patch: instead of restarting FIB iteration
> we keep lists of actions to perform after FIB iteration is finished.
>
> proto/babel/babel.c | 269 +++++++++++++++++++-------------------------
> proto/babel/babel.h | 8 +-
> 2 files changed, 120 insertions(+), 157 deletions(-)
>
> diff --git a/proto/babel/babel.c b/proto/babel/babel.c
> index ff8b6b52..087fd95b 100644
> --- a/proto/babel/babel.c
> +++ b/proto/babel/babel.c
> @@ -29,10 +29,11 @@
> * possible routes for the prefix are tracked as &babel_route entries and the
> * feasibility distance is maintained through &babel_source structures.
> *
> - * The main route selection is done in babel_select_route(). This is called when
> - * an entry is updated by receiving updates from the network or when modified by
> - * internal timers. The function selects from feasible and reachable routes the
> - * one with the lowest metric to be announced to the core.
> + * The main route selection is done by the bird nest (heh). For each prefix we
> + * export all feasible routes to the core with a distinct source (rte_src) per
> + * neihbour. Bird will then notify us of the optimal one by calling
> + * babel_rt_notify and we remember which neighbour it selected in
> + * babel_entry.selected_nbr.
> */
>
> #include <stdlib.h>
> @@ -50,7 +51,7 @@ static inline int ge_mod64k(uint a, uint b)
> { return (u16)(a - b) < 0x8000; }
>
> static void babel_expire_requests(struct babel_proto *p, struct babel_entry *e);
> -static void babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod);
> +static void babel_announce_rte(struct babel_proto *p, struct babel_entry *e, struct babel_route *r);
> static inline void babel_announce_retraction(struct babel_proto *p, struct babel_entry *e);
> static void babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n);
> static void babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr, struct babel_neighbor *n);
> @@ -164,13 +165,19 @@ babel_get_route(struct babel_proto *p, struct babel_entry *e, struct babel_neigh
> return r;
> }
>
> +static struct babel_route *
> +babel_get_selected_route(struct babel_proto *p, struct babel_entry *e)
> +{
> + if (e->selected_nbr)
> + return babel_get_route(p, e, e->selected_nbr);
> + return NULL;
> +}
Why the switch to storing the neighbour and resolving that to a route
here? Couldn't we just keep the existing e->selected route and populate
that where e->selected_nbr is currently set?
> static inline void
> babel_retract_route(struct babel_proto *p, struct babel_route *r)
> {
> r->metric = r->advert_metric = BABEL_INFINITY;
> -
> - if (r == r->e->selected)
> - babel_select_route(p, r->e, r);
> + babel_announce_rte(p, r->e, r);
> }
>
> static void
> @@ -182,9 +189,6 @@ babel_flush_route(struct babel_proto *p UNUSED, struct babel_route *r)
> rem_node(NODE r);
> rem_node(&r->neigh_route);
>
> - if (r->e->selected == r)
> - r->e->selected = NULL;
> -
> sl_free(r);
> }
>
> @@ -200,6 +204,7 @@ babel_expire_route(struct babel_proto *p, struct babel_route *r)
> {
> r->metric = r->advert_metric = BABEL_INFINITY;
> r->expires = current_time() + cf->hold_time;
> + babel_announce_rte(p, r->e, r);
> }
> else
> {
> @@ -210,7 +215,7 @@ babel_expire_route(struct babel_proto *p, struct babel_route *r)
> static void
> babel_refresh_route(struct babel_proto *p, struct babel_route *r)
> {
> - if (r == r->e->selected)
> + if (r == babel_get_selected_route(p, r->e))
> babel_send_route_request(p, r->e, r->neigh);
>
> r->refresh_time = 0;
> @@ -221,16 +226,15 @@ babel_expire_routes_(struct babel_proto *p, struct fib *rtable)
> {
> struct babel_config *cf = (void *) p->p.cf;
> struct babel_route *r, *rx;
> + struct babel_entry *retract_head = NULL, *delete_head = NULL;
> + struct babel_route *expire_head = NULL;
> struct fib_iterator fit;
> btime now_ = current_time();
>
> - FIB_ITERATE_INIT(&fit, rtable);
>
> -loop:
> + FIB_ITERATE_INIT(&fit, rtable);
> FIB_ITERATE_START(rtable, &fit, struct babel_entry, e)
> {
> - int changed = 0;
> -
> WALK_LIST_DELSAFE(r, rx, e->routes)
> {
> if (r->refresh_time && r->refresh_time <= now_)
> @@ -238,23 +242,11 @@ loop:
>
> if (r->expires && r->expires <= now_)
> {
> - changed = changed || (r == e->selected);
> - babel_expire_route(p, r);
> + r->expire_next = expire_head;
> + expire_head = r;
> }
> }
>
> - if (changed)
> - {
> - /*
> - * We have to restart the iteration because there may be a cascade of
> - * synchronous events babel_select_route() -> nest table change ->
> - * babel_rt_notify() -> rtable change, invalidating hidden variables.
> - */
> - FIB_ITERATE_PUT(&fit);
> - babel_select_route(p, e, NULL);
> - goto loop;
> - }
> -
> /* Clean up stale entries */
> if ((e->valid == BABEL_ENTRY_STALE) && ((e->updated + cf->hold_time) <= now_))
> e->valid = BABEL_ENTRY_DUMMY;
> @@ -262,9 +254,8 @@ loop:
> /* Clean up unreachable route */
> if (e->unreachable && (!e->valid || (e->router_id == p->router_id)))
> {
> - FIB_ITERATE_PUT(&fit);
> - babel_announce_retraction(p, e);
> - goto loop;
> + e->retract_next = retract_head;
> + retract_head = e;
> }
>
> babel_expire_sources(p, e);
> @@ -273,12 +264,25 @@ loop:
> /* Remove empty entries */
> if (!e->valid && EMPTY_LIST(e->routes) && EMPTY_LIST(e->sources) && EMPTY_LIST(e->requests))
> {
> - FIB_ITERATE_PUT(&fit);
> - fib_delete(rtable, e);
> - goto loop;
> + e->delete_next = delete_head;
> + delete_head = e;
> }
> }
> FIB_ITERATE_END;
> +
> + for (struct babel_route *r_next, *r = expire_head; r ; r = r_next) {
> + r_next = r->expire_next;
> + babel_expire_route(p, r);
> + }
> +
> + for (struct babel_entry *e = retract_head; e ; e = e->retract_next) {
> + babel_announce_retraction(p, e);
> + }
> +
> + for (struct babel_entry *e_next, *e = delete_head; e ; e = e_next) {
> + e_next = e->delete_next;
> + fib_delete(rtable, e);
> + }
These also be clearing the ->{expire,delete,rectract}_next pointers? I
know they're not used anywhere else, but they'll be left pointing to
random memory in some cases here...
> }
>
> static void
> @@ -447,6 +451,7 @@ babel_get_neighbor(struct babel_iface *ifa, ip_addr addr)
>
> nbr = mb_allocz(ifa->pool, sizeof(struct babel_neighbor));
> nbr->ifa = ifa;
> + nbr->src = rt_get_source(&p->p, idm_alloc(&p->src_ids));
> nbr->addr = addr;
> nbr->rxcost = BABEL_INFINITY;
> nbr->txcost = BABEL_INFINITY;
> @@ -474,6 +479,8 @@ babel_flush_neighbor(struct babel_proto *p, struct babel_neighbor *nbr)
> }
>
> nbr->ifa = NULL;
> +
> + idm_free(&p->src_ids, nbr->src->private_id);
> rem_node(NODE nbr);
> mb_free(nbr);
> }
> @@ -634,11 +641,40 @@ done:
> WALK_LIST2(r, n, nbr->routes, neigh_route)
> {
> r->metric = babel_compute_metric(nbr, r->advert_metric);
> - babel_select_route(p, r->e, r);
> + babel_announce_rte(p, r->e, r);
> }
> }
> }
>
> +static void
> +babel_announce_rte_unreachable(struct babel_proto *p, struct babel_entry *e, u8 unreachable)
> +{
> + struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
> + rte *rte = NULL;
> +
> + if (unreachable) {
> + /* Unreachable */
> + rta a0 = {
> + .source = RTS_BABEL,
> + .scope = SCOPE_UNIVERSE,
> + .dest = RTD_UNREACHABLE,
> + .pref = 1,
> + };
> +
> + rta *a = rta_lookup(&a0);
> + rte = rte_get_temp(a, p->p.main_source);
> + }
> +
> + e->unreachable = unreachable;
> +
> + /* Unlike the many neighbour routes we only want one unreachable route, for
> + * one it's ugly to have lots of unreachable routes (if we just did this right
> + * in babel_announce_rte) but we also loose access to the neibour rte_src id
> + * when the neighbour is deallocated. This happens on a different schedule
> + * than unreachable route removal. */
> + rte_update2(c, e->n.addr, rte, p->p.main_source);
> +}
OK, so this function took me a while to understand: There's a special
"unreachable" route we announce whenever we lose a route, and this
function announces or retract that particular route based on the
"unreachable" argument. Could you please add a comment to the top of the
function explaining all this (the comment at the bottom is only a
partial explanation).
Also s/loose/lose/
> /**
> * babel_announce_rte - announce selected route to the core
> * @p: Babel protocol instance
> @@ -649,12 +685,11 @@ done:
> * the entry is valid and ours, the unreachable route is announced instead.
> */
> static void
> -babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
> +babel_announce_rte(struct babel_proto *p, struct babel_entry *e, struct babel_route *r)
> {
> - struct babel_route *r = e->selected;
> struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
>
> - if (r)
> + if (r->metric != BABEL_INFINITY && r->feasible)
> {
> rta a0 = {
> .source = RTS_BABEL,
> @@ -698,124 +733,27 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
> a0.nh.flags = RNF_ONLINK;
>
> rta *a = rta_lookup(&a0);
> - rte *rte = rte_get_temp(a, p->p.main_source);
> + rte *rte = rte_get_temp(a, r->neigh->src);
>
> - e->unreachable = 0;
> - rte_update2(c, e->n.addr, rte, p->p.main_source);
> - }
> - else if (e->valid && (e->router_id != p->router_id))
> - {
> - /* Unreachable */
> - rta a0 = {
> - .source = RTS_BABEL,
> - .scope = SCOPE_UNIVERSE,
> - .dest = RTD_UNREACHABLE,
> - .pref = 1,
> - };
> -
> - rta *a = rta_lookup(&a0);
> - rte *rte = rte_get_temp(a, p->p.main_source);
> -
> - e->unreachable = 1;
> - rte_update2(c, e->n.addr, rte, p->p.main_source);
> + rte_update2(c, e->n.addr, rte, r->neigh->src);
> + babel_announce_rte_unreachable(p, e, 0);
> }
> else
> - {
> - /* Retraction */
> - e->unreachable = 0;
> - rte_update2(c, e->n.addr, NULL, p->p.main_source);
> - }
> + rte_update2(c, e->n.addr, NULL, r->neigh->src);
> }
>
> /* Special case of babel_announce_rte() just for retraction */
> static inline void
> babel_announce_retraction(struct babel_proto *p, struct babel_entry *e)
> {
> + TRACE(D_EVENTS, "Retract unreachable route for %N",
> + e->n.addr);
> +
> struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
> e->unreachable = 0;
> rte_update2(c, e->n.addr, NULL, p->p.main_source);
> }
>
> -
> -/**
> - * babel_select_route - select best route for given route entry
> - * @p: Babel protocol instance
> - * @e: Babel entry to select the best route for
> - * @mod: Babel route that was modified or NULL if unspecified
> - *
> - * Select the best reachable and feasible route for a given prefix among the
> - * routes received from peers, and propagate it to the nest. This just selects
> - * the reachable and feasible route with the lowest metric, but keeps selected
> - * the old one in case of tie.
> - *
> - * If no feasible route is available for a prefix that previously had a route
> - * selected, a seqno request is sent to try to get a valid route. If the entry
> - * is valid and not owned by us, the unreachable route is announced to the nest
> - * (to blackhole packets going to it, as per section 2.8). It is later removed
> - * by babel_expire_routes(). Otherwise, the route is just removed from the nest.
> - *
> - * Argument @mod is used to optimize best route calculation. When specified, the
> - * function can assume that only the @mod route was modified to avoid full best
> - * route selection and announcement when non-best route was modified in minor
> - * way. The caller is advised to not call babel_select_route() when no change is
> - * done (e.g. periodic route updates) to avoid unnecessary announcements of the
> - * same best route. The caller is not required to call the function in case of a
> - * retraction of a non-best route.
> - *
> - * Note that the function does not active triggered updates. That is done by
> - * babel_rt_notify() when the change is propagated back to Babel.
> - */
> -static void
> -babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod)
> -{
> - struct babel_route *r, *best = e->selected;
> -
> - /* Shortcut if only non-best was modified */
> - if (mod && (mod != best))
> - {
> - /* Either select modified route, or keep old best route */
> - if ((mod->metric < (best ? best->metric : BABEL_INFINITY)) && mod->feasible)
> - best = mod;
> - else
> - return;
> - }
> - else
> - {
> - /* Selected route may be modified and no longer admissible */
> - if (!best || (best->metric == BABEL_INFINITY) || !best->feasible)
> - best = NULL;
> -
> - /* Find the best feasible route from all routes */
> - WALK_LIST(r, e->routes)
> - if ((r->metric < (best ? best->metric : BABEL_INFINITY)) && r->feasible)
> - best = r;
> - }
> -
> - if (best)
> - {
> - if (best != e->selected)
> - TRACE(D_EVENTS, "Picked new route for prefix %N: router-id %lR metric %d",
> - e->n.addr, best->router_id, best->metric);
> - }
> - else if (e->selected)
> - {
> - /*
> - * We have lost all feasible routes. We have to broadcast seqno request
> - * (Section 3.8.2.1) and keep unreachable route for a while (section 2.8).
> - * The later is done automatically by babel_announce_rte().
> - */
> -
> - TRACE(D_EVENTS, "Lost feasible route for prefix %N", e->n.addr);
> - if (e->valid && (e->selected->router_id == e->router_id))
> - babel_add_seqno_request(p, e, e->selected->router_id, e->selected->seqno + 1, 0, NULL);
With your patch we no longer send seqno requests. That seems bad? :)
> - }
> - else
> - return;
> -
> - e->selected = best;
> - babel_announce_rte(p, e);
> -}
> -
> /*
> * Functions to send replies
> */
> @@ -1300,7 +1238,7 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
> s = babel_find_source(e, msg->router_id); /* for feasibility */
> feasible = babel_is_feasible(s, msg->seqno, msg->metric);
> metric = babel_compute_metric(nbr, msg->metric);
> - best = e->selected;
> + best = babel_get_selected_route(p, e);
>
> /*
> * RFC section 3.8.2.2 - Dealing with unfeasible updates. Generate a one-off
> @@ -1338,7 +1276,7 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
> e->updated = current_time();
> }
>
> - babel_select_route(p, e, r);
> + babel_announce_rte(p, e, r);
> }
>
> void
> @@ -2003,10 +1941,10 @@ babel_dump_route(struct babel_route *r)
> }
>
> static void
> -babel_dump_entry(struct babel_entry *e)
> +babel_dump_entry(struct babel_proto *p, struct babel_entry *e)
> {
> struct babel_source *s;
> - struct babel_route *r;
> + struct babel_route *r, *best = babel_get_selected_route(p, e);
>
> debug("Babel: Entry %N:\n", e->n.addr);
>
> @@ -2016,7 +1954,7 @@ babel_dump_entry(struct babel_entry *e)
> WALK_LIST(r,e->routes)
> {
> debug(" ");
> - if (r == e->selected) debug("*");
> + if (r == best) debug("*");
> babel_dump_route(r);
> }
> }
> @@ -2057,12 +1995,12 @@ babel_dump(struct proto *P)
>
> FIB_WALK(&p->ip4_rtable, struct babel_entry, e)
> {
> - babel_dump_entry(e);
> + babel_dump_entry(p, e);
> }
> FIB_WALK_END;
> FIB_WALK(&p->ip6_rtable, struct babel_entry, e)
> {
> - babel_dump_entry(e);
> + babel_dump_entry(p, e);
> }
> FIB_WALK_END;
> }
> @@ -2186,7 +2124,7 @@ babel_show_entries_(struct babel_proto *p, struct fib *rtable)
>
> FIB_WALK(rtable, struct babel_entry, e)
> {
> - struct babel_route *r = NULL;
> + struct babel_route *r = NULL, *best = babel_get_selected_route(p, e);
> uint rts = 0, srcs = 0;
> node *n;
>
> @@ -2199,7 +2137,7 @@ babel_show_entries_(struct babel_proto *p, struct fib *rtable)
> if (e->valid)
> cli_msg(-1025, "%-*N %-23lR %6u %5u %7u %7u", width,
> e->n.addr, e->router_id, e->metric, e->seqno, rts, srcs);
> - else if (r = e->selected)
> + else if (r = best)
> cli_msg(-1025, "%-*N %-23lR %6u %5u %7u %7u", width,
> e->n.addr, r->router_id, r->metric, r->seqno, rts, srcs);
> else
> @@ -2236,10 +2174,10 @@ babel_show_routes_(struct babel_proto *p, struct fib *rtable)
>
> FIB_WALK(rtable, struct babel_entry, e)
> {
> - struct babel_route *r;
> + struct babel_route *r, *best = babel_get_selected_route(p, e);
> WALK_LIST(r, e->routes)
> {
> - char c = (r == e->selected) ? '*' : (r->feasible ? '+' : ' ');
> + char c = (r == best) ? '*' : (r->feasible ? '+' : ' ');
> btime time = r->expires ? r->expires - current_time() : 0;
> cli_msg(-1025, "%-*N %-25I %-10s %5u %c %5u %7t", width,
> e->n.addr, r->next_hop, r->neigh->ifa->ifname,
> @@ -2320,15 +2258,18 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
> {
> struct babel_proto *p = (void *) P;
> struct babel_entry *e;
> + struct babel_iface *ifa;
> + struct babel_neighbor *n;
>
> if (new)
> {
> /* Update */
> + uint internal = new->src->proto == P;
> uint rt_seqno;
> uint rt_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, 0);
> u64 rt_router_id = 0;
>
> - if (new->src->proto == P)
> + if (internal)
> {
> rt_seqno = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data;
> eattr *e = ea_find(new->attrs->eattrs, EA_BABEL_ROUTER_ID);
> @@ -2350,6 +2291,18 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
>
> e = babel_get_entry(p, net->n.addr);
>
> + while (internal) {
> + ifa = babel_find_iface(p, new->attrs->nh.iface);
> + if (!ifa) break;
> + n = babel_find_neighbor(ifa, new->attrs->from);
> + if (!n) break;
> + e->selected_nbr = n;
> + break;
> + }
> + if (!internal) {
> + e->selected_nbr = NULL;
> + }
Using a loop+break statements as some kind of weird 'goto'? Yuck!
Maybe just make this a helper function?
babel_entry_update_selected(p, e, new->attrs->nh.iface, internal ? new->attrs->from : NULL)
Also, if we can't find the iface or neighbour object (can that ever
happen) shouldn't we be resetting e->selected_nbr to NULL instead of
keeping whatever old value is there?
> +
> /* Activate triggered updates */
> if ((e->valid != BABEL_ENTRY_VALID) ||
> (e->router_id != rt_router_id))
> @@ -2373,6 +2326,9 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
>
> e->valid = BABEL_ENTRY_STALE;
> e->metric = BABEL_INFINITY;
> + e->selected_nbr = NULL;
> +
> + babel_announce_rte_unreachable(p, e, 1);
>
> babel_trigger_update(p);
> e->updated = current_time();
> @@ -2464,6 +2420,7 @@ babel_start(struct proto *P)
> p->source_slab = sl_new(P->pool, sizeof(struct babel_source));
> p->msg_slab = sl_new(P->pool, sizeof(struct babel_msg_node));
> p->seqno_slab = sl_new(P->pool, sizeof(struct babel_seqno_request));
> + idm_init(&p->src_ids, P->pool, 8);
>
> p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 };
>
> diff --git a/proto/babel/babel.h b/proto/babel/babel.h
> index da8386b3..1d7e19f0 100644
> --- a/proto/babel/babel.h
> +++ b/proto/babel/babel.h
> @@ -25,6 +25,7 @@
> #include "lib/socket.h"
> #include "lib/string.h"
> #include "lib/timer.h"
> +#include "lib/idm.h"
>
> #define EA_BABEL_METRIC EA_CODE(PROTOCOL_BABEL, 0)
> #define EA_BABEL_ROUTER_ID EA_CODE(PROTOCOL_BABEL, 1)
> @@ -174,6 +175,7 @@ struct babel_proto {
> slab *source_slab;
> slab *msg_slab;
> slab *seqno_slab;
> + struct idm src_ids;
>
> struct tbf log_pkt_tbf; /* TBF for packet messages */
> };
> @@ -216,6 +218,7 @@ struct babel_iface {
> struct babel_neighbor {
> node n;
> struct babel_iface *ifa;
> + struct rte_src *src;
>
> ip_addr addr;
> u16 rxcost; /* Sent in last IHU */
> @@ -256,6 +259,7 @@ struct babel_source {
> struct babel_route {
> node n;
> node neigh_route;
> + struct babel_route *expire_next;
> struct babel_entry *e;
> struct babel_neighbor *neigh;
>
> @@ -281,7 +285,9 @@ struct babel_seqno_request {
> };
>
> struct babel_entry {
> - struct babel_route *selected;
> + struct babel_neighbor *selected_nbr;
> +
> + struct babel_entry *retract_next, *delete_next;
>
> list routes; /* Routes for this prefix (struct babel_route) */
> list sources; /* Source entries for this prefix (struct babel_source). */
> --
> 2.30.2
More information about the Bird-users
mailing list