[PATCH master v2] IPv6 ECMP support fixes for linux
Mikhail Sennikovskii
mikhail.sennikovskii at profitbricks.com
Mon Apr 4 08:59:52 CEST 2016
Hi Ondrej and all,
More than a month has passes since I've submitted the updated patch based
on Ondrej's feedback,
but I've got no response yet.
Any feedback/update on this?
Thanks,
Mikhail
2016-03-03 13:51 GMT+01:00 Mikhail Sennikovskii <
mikhail.sennikovskii at profitbricks.com>:
> Hi Ondrej,
>
> Any update/feedback regarding this latest patch?
> I'm opened for further discussions and adjustments if you feel they are
> needed.
>
> Thanks,
> Mikhail
>
> 2016-02-22 13:01 GMT+01:00 Mikhail Sennikovskii <
> mikhail.sennikovskii at profitbricks.com>:
>
>> The API for configuring ECMP for IPv6 on Linux is not symmetrical.
>> Routes can be set via the multipath structures, but Linux kernel
>> splits this up into separate routes internally.
>> As a result, ECMP routes are retorned as separate independent
>> routes when queried.
>> This patch works around this issue by making bird collect
>> individual routes for the same destination in one multipath route.
>> It also implements deletion of multipath routes as a set of
>> delete operations for each route entry.
>> Asynchronous motification are still not supported for now.
>>
>> Signed-off-by: Mikhail Sennikovskii <
>> mikhail.sennikovskii at profitbricks.com>
>> ---
>> nest/route.h | 2 +
>> nest/rt-attr.c | 145 +++++++++++++++++++
>> nest/rt-table.c | 41 +++++-
>> sysdep/linux/netlink.c | 371
>> +++++++++++++++++++++++++++++++++++++++++++------
>> 4 files changed, 512 insertions(+), 47 deletions(-)
>>
>> diff --git a/nest/route.h b/nest/route.h
>> index c435b9e..3b87a0e 100644
>> --- a/nest/route.h
>> +++ b/nest/route.h
>> @@ -498,6 +498,8 @@ int mpnh__same(struct mpnh *x, struct mpnh *y); /*
>> Compare multipath nexthops */
>> static inline int mpnh_same(struct mpnh *x, struct mpnh *y)
>> { return (x == y) || mpnh__same(x, y); }
>> struct mpnh *mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry,
>> int max, linpool *lp);
>> +struct mpnh *mpnh_sub(struct mpnh *x, struct mpnh *y, linpool *lp);
>> +struct mpnh *mpnh_sort(struct mpnh *x, linpool *lp);
>>
>> void rta_init(void);
>> rta *rta_lookup(rta *); /* Get rta equivalent to
>> this one, uc++ */
>> diff --git a/nest/rt-attr.c b/nest/rt-attr.c
>> index 7fa05d6..335c96e 100644
>> --- a/nest/rt-attr.c
>> +++ b/nest/rt-attr.c
>> @@ -302,6 +302,151 @@ mpnh_merge(struct mpnh *x, struct mpnh *y, int rx,
>> int ry, int max, linpool *lp)
>> return root;
>> }
>>
>> +/**
>> + * mpnh_sub - subtract one nexthop list from another.
>> + * I.e. returns a list of entries, that existed in list1, but did not
>> + * exist in list 2.
>> + * The input lists must be sorted and the
>> + * result is sorted too.
>> + *
>> + * @x: list 1
>> + * @y: list 2
>> + * @lp: linpool if not NULL list 1 is not reusable,
>> + * new entries are to be allocated using this pool.
>> + * list 2 is never modified.
>> + *
>> + * The argument linpool determines whether the list1
>> + * consumed by the function (i.e. its nodes reused in the resulting
>> list).
>> + * If NULL, the list1 is reused, otherwise the resulting list
>> + * is populated with the new entries, allocated using the linpool.
>> + * To eliminate issues with deallocation of this list,
>> + * the caller should use some form of bulk deallocation
>> + * (e.g. stack or linpool) to free these nodes when the
>> + * resulting list is no longer needed.
>> + */
>> +struct mpnh *
>> +mpnh_sub(struct mpnh *x, struct mpnh *y, linpool *lp)
>> +{
>> + struct mpnh *root = NULL;
>> + struct mpnh **n = &root;
>> +
>> + while (x || y)
>> + {
>> + int cmp = mpnh_compare_node(x, y);
>> + if (cmp < 0)
>> + {
>> + *n = !lp ? x : mpnh_copy_node(x, lp);
>> + x = x->next;
>> + n = &((*n)->next);
>> + }
>> + else if (cmp > 0)
>> + y = y->next;
>> + else
>> + {
>> + x = x->next;
>> + y = y->next;
>> + }
>> + }
>> +
>> + *n = NULL;
>> +
>> + return root;
>> +}
>> +
>> +/**
>> + * mpnh_copy_lp copies nexthop list using given linpool
>> + * (unlike mpnh_copy, which uses sl_alloc)
>> + */
>> +static struct mpnh *
>> +mpnh_copy_lp(struct mpnh *o, linpool *lp)
>> +{
>> + struct mpnh *first = NULL;
>> + struct mpnh **last = &first;
>> +
>> + for (; o; o = o->next)
>> + {
>> + struct mpnh *n = mpnh_copy_node(o, lp);
>> + *last = n;
>> + last = &(n->next);
>> + }
>> +
>> + return first;
>> +}
>> +
>> +/*
>> + * mpnh_sort - sort the nexthop list
>> + * @x: the list to be sorted
>> + * @lp: if not NULL - the list will be copied in case it needs to be
>> reordered,
>> + * in this case the given list always remains unchanged.
>> + * If however the list is ordered, the given list is just returned,
>> + * and no copy of the list is created.
>> + * If lp is NULL, the given list will be reordered directly
>> + */
>> +struct mpnh *
>> +mpnh_sort(struct mpnh *x, linpool *lp)
>> +{
>> + struct mpnh *ret = x;
>> + struct mpnh *cur;
>> + struct mpnh *prev;
>> + int copy_on_change = !!lp;
>> +
>> + for (cur = ret->next, prev = ret; cur; prev = cur, cur = cur->next)
>> + {
>> + int cmp = mpnh_compare_node(prev, cur);
>> + if (cmp <= 0)
>> + continue;
>> +
>> + if (copy_on_change)
>> + {
>> + /* the list needs to be copied, and prev and cur need to be
>> made
>> + * pointing to the new list entries */
>> +
>> + struct mpnh *old_prev, *new_prev;
>> +
>> + ret = mpnh_copy_lp(x, lp);
>> +
>> + for (old_prev = x, new_prev = ret;
>> + old_prev != prev;
>> + old_prev = old_prev->next, new_prev =
>> new_prev->next);
>> +
>> + prev = new_prev;
>> + cur = new_prev->next;
>> +
>> + copy_on_change = 0;
>> + }
>> +
>> + /* promote the entry */
>> + struct mpnh *cur2;
>> + struct mpnh **next2_ptr;
>> +
>> + for (cur2 = ret, next2_ptr = &ret; ; next2_ptr = &cur2->next, cur2
>> = cur2->next)
>> + {
>> + cmp = mpnh_compare_node(cur2, cur);
>> + if (cmp <= 0)
>> + continue;
>> +
>> + /*
>> + * found the place, where to insert the entry
>> + * do the entry move
>> + */
>> +
>> + /* 1. remove entry from the list */
>> + prev->next = cur->next;
>> +
>> + /* 2. now insert entry to the new place */
>> + *next2_ptr = cur;
>> + cur->next = cur2;
>> +
>> + break;
>> + }
>> +
>> + /* now we have everything sorted upto prev,
>> + * set cur to prev and proceed with the cur->next loop */
>> + cur = prev;
>> + }
>> +
>> + return ret;
>> +}
>>
>> static struct mpnh *
>> mpnh_copy(struct mpnh *o)
>> diff --git a/nest/rt-table.c b/nest/rt-table.c
>> index 57c8b8e..0a90633 100644
>> --- a/nest/rt-table.c
>> +++ b/nest/rt-table.c
>> @@ -592,8 +592,27 @@ static struct mpnh *
>> mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
>> {
>> struct mpnh nh = { .gw = a->gw, .iface = a->iface };
>> - struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
>> - return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
>> + struct mpnh *nh2;
>> + int r2 = 0;
>> +
>> + if (a->dest == RTD_MULTIPATH)
>> + {
>> + /*
>> + * mpnh_merge expects the nexthops list to be sorted,
>> + * while the nexthops returned by the protocols,
>> + * e.g. the "static" one, are actually not.
>> + * Ensures the nh2 is sorted.
>> + */
>> + nh2 = mpnh_sort(a->nexthops, rte_update_pool);
>> + /*
>> + * If the sort was actually done, the nh2 is already copies,
>> + * so no need to copy it once again, set r2 to 1 in this case.
>> + */
>> + r2 = (nh2 != a->nexthops);
>> + }
>> + else
>> + nh2 = &nh;
>> + return mpnh_merge(nhs, nh2, 1, r2, max, rte_update_pool);
>> }
>>
>> rte *
>> @@ -642,6 +661,24 @@ rt_export_merged(struct announce_hook *ah, net *net,
>> rte **rt_free, ea_list **tm
>> best->attrs->nexthops = nhs;
>> }
>> }
>> + else if (best->attrs->dest == RTD_MULTIPATH)
>> + {
>> + /*
>> + * mpnh_merge, mpnh_same and mpnh_sub expect the nexthops list
>> + * to be sorted, while the nexthops returned by the protocols,
>> + * e.g. the "static" one, are actually not.
>> + * This ensures the resulting entry has nexthops sorted,
>> + * and makes the behavior consistent and agnostic to
>> + * the number of elements in the best0 entries list
>> + * (i.e. best0->next processing above)
>> + */
>> + nhs = mpnh_sort(best->attrs->nexthops, rte_update_pool);
>> + if (nhs != best->attrs->nexthops)
>> + {
>> + best = rte_cow_rta(best, rte_update_pool);
>> + best->attrs->nexthops = nhs;
>> + }
>> + }
>>
>> if (best != best0)
>> *rt_free = best;
>> diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
>> index 640d187..ca2648b 100644
>> --- a/sysdep/linux/netlink.c
>> +++ b/sysdep/linux/netlink.c
>> @@ -19,7 +19,6 @@
>> #include "nest/route.h"
>> #include "nest/protocol.h"
>> #include "nest/iface.h"
>> -#include "lib/alloca.h"
>> #include "lib/timer.h"
>> #include "lib/unix.h"
>> #include "lib/krt.h"
>> @@ -46,6 +45,32 @@
>> #define RTA_TABLE 15
>> #endif
>>
>> +/*
>> + * nl parse route context
>> + * its duty is
>> + * 1. To maintain the entry collect state -
>> + * for IPv6 ECMP the nl parsing logic needs to collect
>> + * separate individual entries, representing the multipath
>> + * into one multipath entry
>> + * 2. To hold some temporary data used while parsing
>> + * (like non-cached rta) on the stack.
>> + *
>> + * Implementation note: the context actually maintain two rta entries:
>> + * one to be used for the current rte being processed
>> + * (i.e. being created as a result of the nl data parsing),
>> + * another is used for the current rte being collected,
>> + * (i.e. stored in collect_rte, and for which multipath entries are
>> being collected).
>> + * process_attrs holds the index of the attrs, being used for rte
>> being processed.
>> + * Once the rte being processed becomes the one being collected,
>> + * the attrs used with it become "being collected", and another attrs
>> become "being processed".
>> + */
>> +typedef struct nl_parsectx
>> +{
>> + struct krt_proto *collect_p; /* Protocol, for which entries are
>> currently being processed */
>> + rte *collect_rte; /* Entry, for which multipath entries are currently
>> being collected */
>> + int process_attrs; /* index in the attrs array for the entry to be
>> used for the "processed" entry */
>> + rta attrs[2];
>> +} nl_parsectx;
>>
>> /*
>> * Synchronous Netlink interface
>> @@ -62,6 +87,8 @@ struct nl_sock
>>
>> #define NL_RX_SIZE 8192
>>
>> +static linpool *netlink_lp;
>> +
>> static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for
>> synchronous scan */
>> static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for
>> requests */
>>
>> @@ -803,7 +830,7 @@ nh_bufsize(struct mpnh *nh)
>> }
>>
>> static int
>> -nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int
>> new)
>> +nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int
>> new, int mp)
>> {
>> eattr *ea;
>> net *net = e->net;
>> @@ -820,7 +847,8 @@ nl_send_route(struct krt_proto *p, rte *e, struct
>> ea_list *eattrs, int new)
>> bzero(&r.r, sizeof(r.r));
>> r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
>> r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
>> - r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ?
>> NLM_F_CREATE|NLM_F_EXCL : 0);
>> + r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK
>> + | (new ? NLM_F_CREATE | (!mp ? NLM_F_EXCL : 0) : 0);
>>
>> r.r.rtm_family = BIRD_AF;
>> r.r.rtm_dst_len = net->n.pxlen;
>> @@ -835,8 +863,12 @@ nl_send_route(struct krt_proto *p, rte *e, struct
>> ea_list *eattrs, int new)
>>
>> /* For route delete, we do not specify route attributes */
>> if (!new)
>> - return nl_exchange(&r.h);
>> -
>> + {
>> + if (mp)
>> + goto set_dest;
>> + else
>> + goto submit;
>> + }
>>
>> if (ea = ea_find(eattrs, EA_KRT_METRIC))
>> nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, ea->u.data);
>> @@ -864,7 +896,7 @@ nl_send_route(struct krt_proto *p, rte *e, struct
>> ea_list *eattrs, int new)
>>
>>
>> /* a->iface != NULL checked in krt_capable() for router and device
>> routes */
>> -
>> +set_dest:
>> switch (a->dest)
>> {
>> case RTD_ROUTER:
>> @@ -892,10 +924,104 @@ nl_send_route(struct krt_proto *p, rte *e, struct
>> ea_list *eattrs, int new)
>> default:
>> bug("krt_capable inconsistent with nl_send_route");
>> }
>> -
>> +submit:
>> return nl_exchange(&r.h);
>> }
>>
>> +/*
>> + * this is just to unify the code for bird1.x and bird2
>> + * for bird1.x it is just a define, resolving to 1
>> + * for IPV6 and 0 for IPV4
>> + *
>> + * for bird2 it is a function, making a decision based
>> + * on the p->p.table->addr_type
>> + *
>> + * static int
>> + * trk_is_use_collect_mode(struct krt_proto *p);
>> + */
>> +#ifdef IPV6
>> +#define trk_is_use_collect_mode(_p) 1
>> +#else
>> +#define trk_is_use_collect_mode(_p) 0
>> +#endif
>> +
>> +static struct mpnh *
>> +krt_mp_merge_rta(struct mpnh *nhs, rta *a, int max)
>> +{
>> + struct mpnh nh = { .gw = a->gw, .iface = a->iface };
>> + struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
>> + return mpnh_merge(nhs, nh2, 1, 0, max, netlink_lp);
>> +}
>> +
>> +static struct mpnh *
>> +krt_mp_sub_rte_rta(rta *ax, rta *ay)
>> +{
>> + struct mpnh nhx = { .gw = ax->gw, .iface = ax->iface };
>> + struct mpnh nhy = { .gw = ay->gw, .iface = ay->iface };
>> + struct mpnh *nhpx = (ax->dest == RTD_MULTIPATH) ? ax->nexthops : &nhx;
>> + struct mpnh *nhpy = (ay->dest == RTD_MULTIPATH) ? ay->nexthops : &nhy;
>> + return mpnh_sub(nhpx, nhpy, netlink_lp);
>> +}
>> +
>> +static int
>> +krt_send_nh_multipath(struct krt_proto *p, rte *base, struct mpnh *nh,
>> struct ea_list *eattrs, int new)
>> +{
>> + rte *e;
>> + int err = 0;
>> + rta ra = {
>> + .src= p->p.main_source,
>> + .source = RTS_INHERIT,
>> + .scope = SCOPE_UNIVERSE,
>> + .cast = RTC_UNICAST
>> + };
>> +
>> + e = rte_get_temp(&ra);
>> + e->net = base->net;
>> + e->u.krt = base->u.krt;
>> +
>> + for (; nh; nh = nh->next)
>> + {
>> + ra.gw = nh->gw;
>> + ra.iface = nh->iface;
>> +
>> + err = nl_send_route(p, e, eattrs, new, 1);
>> + if (err < 0)
>> + DBG("deleting route failed %d\n", err);
>> + }
>> +
>> + rte_free(e);
>> +
>> + return err;
>> +}
>> +
>> +static int
>> +krt_adjust_rte_multipath(struct krt_proto *p, rte *new, rte *old, struct
>> ea_list *eattrs)
>> +{
>> + struct mpnh *nhold, *nhnew;
>> + int err = 0;
>> +
>> + nhold = krt_mp_sub_rte_rta(old->attrs, new->attrs);
>> + nhnew = krt_mp_sub_rte_rta(new->attrs, old->attrs);
>> +
>> + if (nhold)
>> + {
>> + if (old->attrs->dest == RTD_MULTIPATH)
>> + err = krt_send_nh_multipath(p, old, nhold, NULL, 0);
>> + else
>> + err = nl_send_route(p, old, NULL, 0, 1);
>> + }
>> +
>> + if (nhnew)
>> + {
>> + if (new->attrs->dest == RTD_MULTIPATH)
>> + err |= krt_send_nh_multipath(p, new, nhnew, eattrs, 1);
>> + else
>> + err |= nl_send_route(p, new, eattrs, 1, 1);
>> + }
>> +
>> + return err;
>> +}
>> +
>> void
>> krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct
>> ea_list *eattrs)
>> {
>> @@ -909,10 +1035,27 @@ krt_replace_rte(struct krt_proto *p, net *n, rte
>> *new, rte *old, struct ea_list
>> */
>>
>> if (old)
>> - nl_send_route(p, old, NULL, 0);
>> + {
>> + if (trk_is_use_collect_mode(p))
>> + {
>> + if (new && ( new->attrs->dest == RTD_MULTIPATH
>> + || old->attrs->dest == RTD_MULTIPATH))
>> + {
>> + err = krt_adjust_rte_multipath(p, new, old, eattrs);
>> + /* zero up "new" to ensure the below "if (new)" branch is
>> not triggered */
>> + new = NULL;
>> + }
>> + else if (old->attrs->dest == RTD_MULTIPATH)
>> + krt_send_nh_multipath(p, old, old->attrs->nexthops, NULL, 0);
>> + else
>> + nl_send_route(p, old, NULL, 0, 0);
>> + }
>> + else
>> + nl_send_route(p, old, NULL, 0, 0);
>> + }
>>
>> if (new)
>> - err = nl_send_route(p, new, eattrs, 1);
>> + err = nl_send_route(p, new, eattrs, 1, 0);
>>
>> if (err < 0)
>> n->n.flags |= KRF_SYNC_ERROR;
>> @@ -920,11 +1063,138 @@ krt_replace_rte(struct krt_proto *p, net *n, rte
>> *new, rte *old, struct ea_list
>> n->n.flags &= ~KRF_SYNC_ERROR;
>> }
>>
>> +static int
>> +krt_mp_is_collectable(struct krt_proto *p, rte *e)
>> +{
>> + if (!trk_is_use_collect_mode(p))
>> + return 0;
>> +
>> + struct rta *a = e->attrs;
>> +
>> + if (a->dest != RTD_ROUTER && a->dest != RTD_DEVICE)
>> + return 0;
>> +
>> + return 1;
>> +}
>> +
>> +static int
>> +krt_mp_is_mergable(struct krt_proto *p, rte *e1, rte *e2)
>> +{
>> + if (e1->net != e2->net)
>> + return 0;
>> +
>> + if (!rte_is_valid(e1) || !rte_is_valid(e2))
>> + return 0;
>> +
>> + if (e1->pref != e2->pref)
>> + return 0;
>> +
>> + if (e1->attrs->src->proto->proto != e2->attrs->src->proto->proto)
>> + return 0;
>> +
>> + return 1;
>> +}
>> +
>> +static rte *
>> +krt_mp_collect_do_add(struct krt_proto *p, rte *mp_collect_rte, rte *e)
>> +{
>> + struct rta *attrs = mp_collect_rte->attrs;
>> +
>> + ASSERT(!rta_is_cached(attrs));
>> +
>> + /* sanity to check our tmp attrs selection logic works correctly */
>> + ASSERT(attrs != e->attrs);
>> +
>> + if (attrs->dest != RTD_MULTIPATH)
>> + {
>> + attrs->nexthops = krt_mp_merge_rta(NULL, attrs, p->p.merge_limit);
>> + attrs->dest = RTD_MULTIPATH;
>> + }
>> +
>> + attrs->nexthops = krt_mp_merge_rta(attrs->nexthops, e->attrs,
>> p->p.merge_limit);
>> +
>> + return mp_collect_rte;
>> +}
>> +
>> +static int
>> +krt_mp_can_collect(struct krt_proto *p, rte *mp_collect_rte, rte *e)
>> +{
>> + if (!krt_mp_is_collectable(p, e))
>> + return 0;
>> +
>> + if (!krt_mp_is_mergable(p, mp_collect_rte, e))
>> + return 0;
>> +
>> + return 1;
>> +}
>> +
>> +static rta* nl_parse_get_tmp_rta(nl_parsectx *ctx)
>> +{
>> + rta *a = &ctx->attrs[ctx->process_attrs];
>> +
>> + memset(a, 0, sizeof(*a));
>> + return a;
>> +}
>> +
>> +static void nl_parse_collect_complete(nl_parsectx *ctx)
>> +{
>> + if (ctx->collect_p)
>> + {
>> + DBG("KRT: collected\n");
>> + krt_got_route(ctx->collect_p, ctx->collect_rte);
>> + ctx->collect_p = NULL;
>> + ctx->collect_rte = NULL;
>> + lp_flush(netlink_lp);
>> + }
>> +}
>> +
>> +static void
>> +nl_parse_collect_rte(nl_parsectx *ctx, struct krt_proto *p, rte *e)
>> +{
>> + if (ctx->collect_p)
>> + {
>> + ASSERT(ctx->collect_rte);
>> + if (ctx->collect_p == p && krt_mp_can_collect(p, ctx->collect_rte,
>> e))
>> + {
>> + ctx->collect_rte = krt_mp_collect_do_add(p, ctx->collect_rte, e);
>> + DBG("KRT: collecting[add]\n");
>> + return;
>> + }
>> +
>> + nl_parse_collect_complete(ctx);
>> + }
>> +
>> + ASSERT(!ctx->collect_p);
>> + ASSERT(!ctx->collect_rte);
>> +
>> + if (krt_mp_is_collectable(p, e))
>> + {
>> + ASSERT(e->attrs == &ctx->attrs[ctx->process_attrs]);
>> + ASSERT(!rta_is_cached(e->attrs));
>> + ctx->collect_p = p;
>> + ctx->collect_rte = e;
>> + ctx->process_attrs = (ctx->process_attrs + 1) % 2;
>> + DBG("KRT: collecting\n");
>> + return;
>> + }
>> +
>> + krt_got_route(p, e);
>> +}
>> +
>> +static void nl_parse_begin(nl_parsectx *ctx)
>> +{
>> + memset(ctx, 0, sizeof (*ctx));
>> +}
>> +
>> +static void nl_parse_end(nl_parsectx *ctx)
>> +{
>> + nl_parse_collect_complete(ctx);
>> +}
>>
>> #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; }
>> while(0)
>>
>> static void
>> -nl_parse_route(struct nlmsghdr *h, int scan)
>> +nl_parse_route(nl_parsectx *ctx, struct nlmsghdr *h, int scan)
>> {
>> struct krt_proto *p;
>> struct rtmsg *i;
>> @@ -1022,12 +1292,12 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>
>> net *net = net_get(p->p.table, dst, i->rtm_dst_len);
>>
>> - rta ra = {
>> - .src= p->p.main_source,
>> - .source = RTS_INHERIT,
>> - .scope = SCOPE_UNIVERSE,
>> - .cast = RTC_UNICAST
>> - };
>> + rta *ra = nl_parse_get_tmp_rta(ctx);
>> +
>> + ra->src= p->p.main_source,
>> + ra->source = RTS_INHERIT,
>> + ra->scope = SCOPE_UNIVERSE,
>> + ra->cast = RTC_UNICAST;
>>
>> switch (i->rtm_type)
>> {
>> @@ -1035,9 +1305,9 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>
>> if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
>> {
>> - ra.dest = RTD_MULTIPATH;
>> - ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
>> - if (!ra.nexthops)
>> + ra->dest = RTD_MULTIPATH;
>> + ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
>> + if (!ra->nexthops)
>> {
>> log(L_ERR "KRT: Received strange multipath route %I/%d",
>> net->n.prefix, net->n.pxlen);
>> @@ -1047,8 +1317,8 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>> break;
>> }
>>
>> - ra.iface = if_find_by_index(oif);
>> - if (!ra.iface)
>> + ra->iface = if_find_by_index(oif);
>> + if (!ra->iface)
>> {
>> log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
>> net->n.prefix, net->n.pxlen, oif);
>> @@ -1058,39 +1328,39 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>> if (a[RTA_GATEWAY])
>> {
>> neighbor *ng;
>> - ra.dest = RTD_ROUTER;
>> - memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw));
>> - ipa_ntoh(ra.gw);
>> + ra->dest = RTD_ROUTER;
>> + memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw));
>> + ipa_ntoh(ra->gw);
>>
>> #ifdef IPV6
>> /* Silently skip strange 6to4 routes */
>> - if (ipa_in_net(ra.gw, IPA_NONE, 96))
>> + if (ipa_in_net(ra->gw, IPA_NONE, 96))
>> return;
>> #endif
>>
>> - ng = neigh_find2(&p->p, &ra.gw, ra.iface,
>> + ng = neigh_find2(&p->p, &ra->gw, ra->iface,
>> (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK :
>> 0);
>> if (!ng || (ng->scope == SCOPE_HOST))
>> {
>> log(L_ERR "KRT: Received route %I/%d with strange next-hop
>> %I",
>> - net->n.prefix, net->n.pxlen, ra.gw);
>> + net->n.prefix, net->n.pxlen, ra->gw);
>> return;
>> }
>> }
>> else
>> {
>> - ra.dest = RTD_DEVICE;
>> + ra->dest = RTD_DEVICE;
>> }
>>
>> break;
>> case RTN_BLACKHOLE:
>> - ra.dest = RTD_BLACKHOLE;
>> + ra->dest = RTD_BLACKHOLE;
>> break;
>> case RTN_UNREACHABLE:
>> - ra.dest = RTD_UNREACHABLE;
>> + ra->dest = RTD_UNREACHABLE;
>> break;
>> case RTN_PROHIBIT:
>> - ra.dest = RTD_PROHIBIT;
>> + ra->dest = RTD_PROHIBIT;
>> break;
>> /* FIXME: What about RTN_THROW? */
>> default:
>> @@ -1098,7 +1368,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>> return;
>> }
>>
>> - rte *e = rte_get_temp(&ra);
>> + rte *e = rte_get_temp(ra);
>> e->net = net;
>> e->u.krt.src = src;
>> e->u.krt.proto = i->rtm_protocol;
>> @@ -1114,24 +1384,24 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>> memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
>> ipa_ntoh(ps);
>>
>> - ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
>> - ea->next = ra.eattrs;
>> - ra.eattrs = ea;
>> + ea_list *ea = lp_alloc(netlink_lp, sizeof(ea_list) +
>> sizeof(eattr));
>> + ea->next = ra->eattrs;
>> + ra->eattrs = ea;
>> ea->flags = EALF_SORTED;
>> ea->count = 1;
>> ea->attrs[0].id = EA_KRT_PREFSRC;
>> ea->attrs[0].flags = 0;
>> ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
>> - ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(ps));
>> + ea->attrs[0].u.ptr = lp_alloc(netlink_lp, sizeof(struct adata) +
>> sizeof(ps));
>> ea->attrs[0].u.ptr->length = sizeof(ps);
>> memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
>> }
>>
>> if (a[RTA_FLOW])
>> {
>> - ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
>> - ea->next = ra.eattrs;
>> - ra.eattrs = ea;
>> + ea_list *ea = lp_alloc(netlink_lp, sizeof(ea_list) +
>> sizeof(eattr));
>> + ea->next = ra->eattrs;
>> + ra->eattrs = ea;
>> ea->flags = EALF_SORTED;
>> ea->count = 1;
>> ea->attrs[0].id = EA_KRT_REALM;
>> @@ -1143,7 +1413,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>> if (a[RTA_METRICS])
>> {
>> u32 metrics[KRT_METRICS_MAX];
>> - ea_list *ea = alloca(sizeof(ea_list) + KRT_METRICS_MAX *
>> sizeof(eattr));
>> + ea_list *ea = lp_alloc(netlink_lp, sizeof(ea_list) +
>> KRT_METRICS_MAX * sizeof(eattr));
>> int t, n = 0;
>>
>> if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics))
>> < 0)
>> @@ -1165,15 +1435,15 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>
>> if (n > 0)
>> {
>> - ea->next = ra.eattrs;
>> + ea->next = ra->eattrs;
>> ea->flags = EALF_SORTED;
>> ea->count = n;
>> - ra.eattrs = ea;
>> + ra->eattrs = ea;
>> }
>> }
>>
>> if (scan)
>> - krt_got_route(p, e);
>> + nl_parse_collect_rte(ctx, p, e);
>> else
>> krt_got_route_async(p, e, new);
>> }
>> @@ -1182,13 +1452,19 @@ void
>> krt_do_scan(struct krt_proto *p UNUSED) /*
>> CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
>> {
>> struct nlmsghdr *h;
>> + nl_parsectx ctx;
>>
>> nl_request_dump(BIRD_AF, RTM_GETROUTE);
>> +
>> + nl_parse_begin(&ctx);
>> +
>> while (h = nl_get_scan())
>> if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
>> - nl_parse_route(h, 1);
>> + nl_parse_route(&ctx, h, 1);
>> else
>> log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)",
>> h->nlmsg_type);
>> +
>> + nl_parse_end(&ctx);
>> }
>>
>> /*
>> @@ -1201,12 +1477,16 @@ static byte *nl_async_rx_buffer; /*
>> Receive buffer */
>> static void
>> nl_async_msg(struct nlmsghdr *h)
>> {
>> + nl_parsectx ctx;
>> +
>> switch (h->nlmsg_type)
>> {
>> case RTM_NEWROUTE:
>> case RTM_DELROUTE:
>> DBG("KRT: Received async route notification (%d)\n",
>> h->nlmsg_type);
>> - nl_parse_route(h, 0);
>> + nl_parse_begin(&ctx);
>> + nl_parse_route(&ctx, h, 0);
>> + nl_parse_end(&ctx);
>> break;
>> case RTM_NEWLINK:
>> case RTM_DELLINK:
>> @@ -1325,6 +1605,7 @@ void
>> krt_sys_io_init(void)
>> {
>> HASH_INIT(nl_table_map, krt_pool, 6);
>> + netlink_lp = lp_new(krt_pool, 4080);
>> }
>>
>> int
>> --
>> 2.5.0
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://trubka.network.cz/pipermail/bird-users/attachments/20160404/3dc03de3/attachment.html>
More information about the Bird-users
mailing list