[PATCH master v2] IPv6 ECMP support fixes for linux

Mikhail Sennikovskii mikhail.sennikovskii at profitbricks.com
Mon Apr 4 08:59:52 CEST 2016


Hi Ondrej and all,

More than a month has passes since I've submitted the updated patch based
on Ondrej's feedback,
but I've got no response yet.
Any feedback/update on this?

Thanks,
Mikhail

2016-03-03 13:51 GMT+01:00 Mikhail Sennikovskii <
mikhail.sennikovskii at profitbricks.com>:

> Hi Ondrej,
>
> Any update/feedback regarding this latest patch?
> I'm opened for further discussions and adjustments if you feel they are
> needed.
>
> Thanks,
> Mikhail
>
> 2016-02-22 13:01 GMT+01:00 Mikhail Sennikovskii <
> mikhail.sennikovskii at profitbricks.com>:
>
>> The API for configuring ECMP for IPv6 on Linux is not symmetrical.
>> Routes can be set via the multipath structures, but Linux kernel
>> splits this up into separate routes internally.
>> As a result, ECMP routes are retorned as separate independent
>> routes when queried.
>> This patch works around this issue by making bird collect
>> individual routes for the same destination in one multipath route.
>> It also implements deletion of multipath routes as a set of
>> delete operations for each route entry.
>> Asynchronous motification are still not supported for now.
>>
>> Signed-off-by: Mikhail Sennikovskii <
>> mikhail.sennikovskii at profitbricks.com>
>> ---
>>  nest/route.h           |   2 +
>>  nest/rt-attr.c         | 145 +++++++++++++++++++
>>  nest/rt-table.c        |  41 +++++-
>>  sysdep/linux/netlink.c | 371
>> +++++++++++++++++++++++++++++++++++++++++++------
>>  4 files changed, 512 insertions(+), 47 deletions(-)
>>
>> diff --git a/nest/route.h b/nest/route.h
>> index c435b9e..3b87a0e 100644
>> --- a/nest/route.h
>> +++ b/nest/route.h
>> @@ -498,6 +498,8 @@ int mpnh__same(struct mpnh *x, struct mpnh *y); /*
>> Compare multipath nexthops */
>>  static inline int mpnh_same(struct mpnh *x, struct mpnh *y)
>>  { return (x == y) || mpnh__same(x, y); }
>>  struct mpnh *mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry,
>> int max, linpool *lp);
>> +struct mpnh *mpnh_sub(struct mpnh *x, struct mpnh *y, linpool *lp);
>> +struct mpnh *mpnh_sort(struct mpnh *x, linpool *lp);
>>
>>  void rta_init(void);
>>  rta *rta_lookup(rta *);                        /* Get rta equivalent to
>> this one, uc++ */
>> diff --git a/nest/rt-attr.c b/nest/rt-attr.c
>> index 7fa05d6..335c96e 100644
>> --- a/nest/rt-attr.c
>> +++ b/nest/rt-attr.c
>> @@ -302,6 +302,151 @@ mpnh_merge(struct mpnh *x, struct mpnh *y, int rx,
>> int ry, int max, linpool *lp)
>>    return root;
>>  }
>>
>> +/**
>> + * mpnh_sub - subtract one nexthop list from another.
>> + * I.e. returns a list of entries, that existed in list1, but did not
>> + * exist in list 2.
>> + * The input lists must be sorted and the
>> + * result is sorted too.
>> + *
>> + * @x: list 1
>> + * @y: list 2
>> + * @lp: linpool if not NULL list 1 is not reusable,
>> + *      new entries are to be allocated using this pool.
>> + *      list 2 is never modified.
>> + *
>> + * The argument linpool determines whether the list1
>> + * consumed by the function (i.e. its nodes reused in the resulting
>> list).
>> + * If NULL, the list1 is reused, otherwise the resulting list
>> + * is populated with the new entries, allocated using the linpool.
>> + * To eliminate issues with deallocation of this list,
>> + * the caller should use some form of bulk deallocation
>> + * (e.g. stack or linpool) to free these nodes when the
>> + * resulting list is no longer needed.
>> + */
>> +struct mpnh *
>> +mpnh_sub(struct mpnh *x, struct mpnh *y, linpool *lp)
>> +{
>> +  struct mpnh *root = NULL;
>> +  struct mpnh **n = &root;
>> +
>> +  while (x || y)
>> +  {
>> +    int cmp = mpnh_compare_node(x, y);
>> +    if (cmp < 0)
>> +      {
>> +        *n = !lp ? x : mpnh_copy_node(x, lp);
>> +        x = x->next;
>> +        n = &((*n)->next);
>> +      }
>> +    else if (cmp > 0)
>> +      y = y->next;
>> +    else
>> +      {
>> +        x = x->next;
>> +        y = y->next;
>> +      }
>> +  }
>> +
>> +  *n = NULL;
>> +
>> +  return root;
>> +}
>> +
>> +/**
>> + * mpnh_copy_lp copies nexthop list using given linpool
>> + * (unlike mpnh_copy, which uses sl_alloc)
>> + */
>> +static struct mpnh *
>> +mpnh_copy_lp(struct mpnh *o, linpool *lp)
>> +{
>> +  struct mpnh *first = NULL;
>> +  struct mpnh **last = &first;
>> +
>> +  for (; o; o = o->next)
>> +    {
>> +      struct mpnh *n = mpnh_copy_node(o, lp);
>> +      *last = n;
>> +      last = &(n->next);
>> +    }
>> +
>> +  return first;
>> +}
>> +
>> +/*
>> + * mpnh_sort - sort the nexthop list
>> + * @x: the list to be sorted
>> + * @lp: if not NULL - the list will be copied in case it needs to be
>> reordered,
>> + * in this case the given list always remains unchanged.
>> + * If however the list is ordered, the given list is just returned,
>> + * and no copy of the list is created.
>> + * If lp is NULL, the given list will be reordered directly
>> + */
>> +struct mpnh *
>> +mpnh_sort(struct mpnh *x, linpool *lp)
>> +{
>> +  struct mpnh *ret = x;
>> +  struct mpnh *cur;
>> +  struct mpnh *prev;
>> +  int copy_on_change = !!lp;
>> +
>> +  for (cur = ret->next, prev = ret; cur; prev = cur, cur = cur->next)
>> +    {
>> +      int cmp = mpnh_compare_node(prev, cur);
>> +      if (cmp <= 0)
>> +        continue;
>> +
>> +      if (copy_on_change)
>> +        {
>> +          /* the list needs to be copied, and prev and cur need to be
>> made
>> +           * pointing to the new list entries */
>> +
>> +          struct mpnh *old_prev, *new_prev;
>> +
>> +          ret = mpnh_copy_lp(x, lp);
>> +
>> +          for (old_prev = x, new_prev = ret;
>> +                    old_prev != prev;
>> +                    old_prev = old_prev->next, new_prev =
>> new_prev->next);
>> +
>> +          prev = new_prev;
>> +          cur = new_prev->next;
>> +
>> +          copy_on_change = 0;
>> +        }
>> +
>> +      /* promote the entry */
>> +      struct mpnh *cur2;
>> +      struct mpnh **next2_ptr;
>> +
>> +      for (cur2 = ret, next2_ptr = &ret; ; next2_ptr = &cur2->next, cur2
>> = cur2->next)
>> +        {
>> +          cmp = mpnh_compare_node(cur2, cur);
>> +          if (cmp <= 0)
>> +            continue;
>> +
>> +          /*
>> +           * found the place, where to insert the entry
>> +           * do the entry move
>> +           */
>> +
>> +          /* 1. remove entry from the list */
>> +          prev->next = cur->next;
>> +
>> +          /* 2. now insert entry to the new place */
>> +          *next2_ptr = cur;
>> +          cur->next = cur2;
>> +
>> +          break;
>> +        }
>> +
>> +      /* now we have everything sorted upto prev,
>> +       * set cur to prev and proceed with the cur->next loop */
>> +      cur = prev;
>> +    }
>> +
>> +  return ret;
>> +}
>>
>>  static struct mpnh *
>>  mpnh_copy(struct mpnh *o)
>> diff --git a/nest/rt-table.c b/nest/rt-table.c
>> index 57c8b8e..0a90633 100644
>> --- a/nest/rt-table.c
>> +++ b/nest/rt-table.c
>> @@ -592,8 +592,27 @@ static struct mpnh *
>>  mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
>>  {
>>    struct mpnh nh = { .gw = a->gw, .iface = a->iface };
>> -  struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
>> -  return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
>> +  struct mpnh *nh2;
>> +  int r2 = 0;
>> +
>> +  if (a->dest == RTD_MULTIPATH)
>> +    {
>> +      /*
>> +       * mpnh_merge expects the nexthops list to be sorted,
>> +       * while the nexthops returned by the protocols,
>> +       * e.g. the "static" one, are actually not.
>> +       * Ensures the nh2 is sorted.
>> +       */
>> +      nh2 = mpnh_sort(a->nexthops, rte_update_pool);
>> +      /*
>> +       * If the sort was actually done, the nh2 is already copies,
>> +       * so no need to copy it once again, set r2 to 1 in this case.
>> +       */
>> +      r2 = (nh2 != a->nexthops);
>> +    }
>> +  else
>> +    nh2 = &nh;
>> +  return mpnh_merge(nhs, nh2, 1, r2, max, rte_update_pool);
>>  }
>>
>>  rte *
>> @@ -642,6 +661,24 @@ rt_export_merged(struct announce_hook *ah, net *net,
>> rte **rt_free, ea_list **tm
>>        best->attrs->nexthops = nhs;
>>      }
>>    }
>> +  else if (best->attrs->dest == RTD_MULTIPATH)
>> +  {
>> +    /*
>> +     * mpnh_merge, mpnh_same and mpnh_sub expect the nexthops list
>> +     * to be sorted, while the nexthops returned by the protocols,
>> +     * e.g. the "static" one, are actually not.
>> +     * This ensures the resulting entry has nexthops sorted,
>> +     * and makes the behavior consistent and agnostic to
>> +     * the number of elements in the best0 entries list
>> +     * (i.e. best0->next processing above)
>> +     */
>> +    nhs = mpnh_sort(best->attrs->nexthops, rte_update_pool);
>> +    if (nhs != best->attrs->nexthops)
>> +    {
>> +      best = rte_cow_rta(best, rte_update_pool);
>> +      best->attrs->nexthops = nhs;
>> +    }
>> +  }
>>
>>    if (best != best0)
>>      *rt_free = best;
>> diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
>> index 640d187..ca2648b 100644
>> --- a/sysdep/linux/netlink.c
>> +++ b/sysdep/linux/netlink.c
>> @@ -19,7 +19,6 @@
>>  #include "nest/route.h"
>>  #include "nest/protocol.h"
>>  #include "nest/iface.h"
>> -#include "lib/alloca.h"
>>  #include "lib/timer.h"
>>  #include "lib/unix.h"
>>  #include "lib/krt.h"
>> @@ -46,6 +45,32 @@
>>  #define RTA_TABLE  15
>>  #endif
>>
>> +/*
>> + * nl parse route context
>> + * its duty is
>> + * 1. To maintain the entry collect state -
>> + *      for IPv6 ECMP the nl parsing logic needs to collect
>> + *      separate individual entries, representing the multipath
>> + *      into one multipath entry
>> + * 2. To hold some temporary data used while parsing
>> + *    (like non-cached rta) on the stack.
>> + *
>> + *    Implementation note: the context actually maintain two rta entries:
>> + *    one to be used for the current rte being processed
>> + *    (i.e. being created as a result of the nl data parsing),
>> + *    another is used for the current rte being collected,
>> + *    (i.e. stored in collect_rte, and for which multipath entries are
>> being collected).
>> + *    process_attrs holds the index of the attrs, being used for rte
>> being processed.
>> + *    Once the rte being processed becomes the one being collected,
>> + *    the attrs used with it become "being collected", and another attrs
>> become "being processed".
>> + */
>> +typedef struct nl_parsectx
>> +{
>> +  struct krt_proto *collect_p; /* Protocol, for which entries are
>> currently being processed */
>> +  rte *collect_rte; /* Entry, for which multipath entries are currently
>> being collected */
>> +  int process_attrs; /* index in the attrs array for the entry to be
>> used for the "processed" entry */
>> +  rta attrs[2];
>> +} nl_parsectx;
>>
>>  /*
>>   *     Synchronous Netlink interface
>> @@ -62,6 +87,8 @@ struct nl_sock
>>
>>  #define NL_RX_SIZE 8192
>>
>> +static linpool *netlink_lp;
>> +
>>  static struct nl_sock nl_scan = {.fd = -1};    /* Netlink socket for
>> synchronous scan */
>>  static struct nl_sock nl_req  = {.fd = -1};    /* Netlink socket for
>> requests */
>>
>> @@ -803,7 +830,7 @@ nh_bufsize(struct mpnh *nh)
>>  }
>>
>>  static int
>> -nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int
>> new)
>> +nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int
>> new, int mp)
>>  {
>>    eattr *ea;
>>    net *net = e->net;
>> @@ -820,7 +847,8 @@ nl_send_route(struct krt_proto *p, rte *e, struct
>> ea_list *eattrs, int new)
>>    bzero(&r.r, sizeof(r.r));
>>    r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
>>    r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
>> -  r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ?
>> NLM_F_CREATE|NLM_F_EXCL : 0);
>> +  r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK
>> +                 | (new ? NLM_F_CREATE | (!mp ? NLM_F_EXCL : 0) : 0);
>>
>>    r.r.rtm_family = BIRD_AF;
>>    r.r.rtm_dst_len = net->n.pxlen;
>> @@ -835,8 +863,12 @@ nl_send_route(struct krt_proto *p, rte *e, struct
>> ea_list *eattrs, int new)
>>
>>    /* For route delete, we do not specify route attributes */
>>    if (!new)
>> -    return nl_exchange(&r.h);
>> -
>> +    {
>> +      if (mp)
>> +        goto set_dest;
>> +      else
>> +        goto submit;
>> +    }
>>
>>    if (ea = ea_find(eattrs, EA_KRT_METRIC))
>>      nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, ea->u.data);
>> @@ -864,7 +896,7 @@ nl_send_route(struct krt_proto *p, rte *e, struct
>> ea_list *eattrs, int new)
>>
>>
>>    /* a->iface != NULL checked in krt_capable() for router and device
>> routes */
>> -
>> +set_dest:
>>    switch (a->dest)
>>      {
>>      case RTD_ROUTER:
>> @@ -892,10 +924,104 @@ nl_send_route(struct krt_proto *p, rte *e, struct
>> ea_list *eattrs, int new)
>>      default:
>>        bug("krt_capable inconsistent with nl_send_route");
>>      }
>> -
>> +submit:
>>    return nl_exchange(&r.h);
>>  }
>>
>> +/*
>> + * this is just to unify the code for bird1.x and bird2
>> + * for bird1.x it is just a define, resolving to 1
>> + * for IPV6 and 0 for IPV4
>> + *
>> + * for bird2 it is a function, making a decision based
>> + * on the p->p.table->addr_type
>> + *
>> + * static int
>> + * trk_is_use_collect_mode(struct krt_proto *p);
>> + */
>> +#ifdef IPV6
>> +#define trk_is_use_collect_mode(_p) 1
>> +#else
>> +#define trk_is_use_collect_mode(_p) 0
>> +#endif
>> +
>> +static struct mpnh *
>> +krt_mp_merge_rta(struct mpnh *nhs, rta *a, int max)
>> +{
>> +  struct mpnh nh = { .gw = a->gw, .iface = a->iface };
>> +  struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
>> +  return mpnh_merge(nhs, nh2, 1, 0, max, netlink_lp);
>> +}
>> +
>> +static struct mpnh *
>> +krt_mp_sub_rte_rta(rta *ax, rta *ay)
>> +{
>> +  struct mpnh nhx = { .gw = ax->gw, .iface = ax->iface };
>> +  struct mpnh nhy = { .gw = ay->gw, .iface = ay->iface };
>> +  struct mpnh *nhpx = (ax->dest == RTD_MULTIPATH) ? ax->nexthops : &nhx;
>> +  struct mpnh *nhpy = (ay->dest == RTD_MULTIPATH) ? ay->nexthops : &nhy;
>> +  return mpnh_sub(nhpx, nhpy, netlink_lp);
>> +}
>> +
>> +static int
>> +krt_send_nh_multipath(struct krt_proto *p, rte *base, struct mpnh *nh,
>> struct ea_list *eattrs, int new)
>> +{
>> +  rte *e;
>> +  int err = 0;
>> +  rta ra = {
>> +    .src= p->p.main_source,
>> +    .source = RTS_INHERIT,
>> +    .scope = SCOPE_UNIVERSE,
>> +    .cast = RTC_UNICAST
>> +  };
>> +
>> +  e = rte_get_temp(&ra);
>> +  e->net = base->net;
>> +  e->u.krt = base->u.krt;
>> +
>> +  for (; nh; nh = nh->next)
>> +    {
>> +      ra.gw = nh->gw;
>> +      ra.iface = nh->iface;
>> +
>> +      err = nl_send_route(p, e, eattrs, new, 1);
>> +      if (err < 0)
>> +        DBG("deleting route failed %d\n", err);
>> +    }
>> +
>> +  rte_free(e);
>> +
>> +  return err;
>> +}
>> +
>> +static int
>> +krt_adjust_rte_multipath(struct krt_proto *p, rte *new, rte *old, struct
>> ea_list *eattrs)
>> +{
>> +  struct mpnh *nhold, *nhnew;
>> +  int err = 0;
>> +
>> +  nhold = krt_mp_sub_rte_rta(old->attrs, new->attrs);
>> +  nhnew = krt_mp_sub_rte_rta(new->attrs, old->attrs);
>> +
>> +  if (nhold)
>> +  {
>> +    if (old->attrs->dest == RTD_MULTIPATH)
>> +      err = krt_send_nh_multipath(p, old, nhold, NULL, 0);
>> +    else
>> +      err = nl_send_route(p, old, NULL, 0, 1);
>> +  }
>> +
>> +  if (nhnew)
>> +    {
>> +      if (new->attrs->dest == RTD_MULTIPATH)
>> +        err |= krt_send_nh_multipath(p, new, nhnew, eattrs, 1);
>> +      else
>> +        err |= nl_send_route(p, new, eattrs, 1, 1);
>> +    }
>> +
>> +  return err;
>> +}
>> +
>>  void
>>  krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct
>> ea_list *eattrs)
>>  {
>> @@ -909,10 +1035,27 @@ krt_replace_rte(struct krt_proto *p, net *n, rte
>> *new, rte *old, struct ea_list
>>     */
>>
>>    if (old)
>> -    nl_send_route(p, old, NULL, 0);
>> +    {
>> +      if (trk_is_use_collect_mode(p))
>> +        {
>> +          if (new && ( new->attrs->dest == RTD_MULTIPATH
>> +                        || old->attrs->dest == RTD_MULTIPATH))
>> +            {
>> +              err = krt_adjust_rte_multipath(p, new, old, eattrs);
>> +              /* zero up "new" to ensure the below "if (new)" branch is
>> not triggered */
>> +              new = NULL;
>> +            }
>> +          else if (old->attrs->dest == RTD_MULTIPATH)
>> +            krt_send_nh_multipath(p, old, old->attrs->nexthops, NULL, 0);
>> +          else
>> +            nl_send_route(p, old, NULL, 0, 0);
>> +        }
>> +      else
>> +        nl_send_route(p, old, NULL, 0, 0);
>> +    }
>>
>>    if (new)
>> -    err = nl_send_route(p, new, eattrs, 1);
>> +    err = nl_send_route(p, new, eattrs, 1, 0);
>>
>>    if (err < 0)
>>      n->n.flags |= KRF_SYNC_ERROR;
>> @@ -920,11 +1063,138 @@ krt_replace_rte(struct krt_proto *p, net *n, rte
>> *new, rte *old, struct ea_list
>>      n->n.flags &= ~KRF_SYNC_ERROR;
>>  }
>>
>> +static int
>> +krt_mp_is_collectable(struct krt_proto *p, rte *e)
>> +{
>> +  if (!trk_is_use_collect_mode(p))
>> +    return 0;
>> +
>> +  struct rta *a = e->attrs;
>> +
>> +  if (a->dest != RTD_ROUTER && a->dest != RTD_DEVICE)
>> +      return 0;
>> +
>> +  return 1;
>> +}
>> +
>> +static int
>> +krt_mp_is_mergable(struct krt_proto *p, rte *e1, rte *e2)
>> +{
>> +  if (e1->net != e2->net)
>> +    return 0;
>> +
>> +  if (!rte_is_valid(e1) || !rte_is_valid(e2))
>> +    return 0;
>> +
>> +  if (e1->pref != e2->pref)
>> +    return 0;
>> +
>> +  if (e1->attrs->src->proto->proto != e2->attrs->src->proto->proto)
>> +    return 0;
>> +
>> +  return 1;
>> +}
>> +
>> +static rte *
>> +krt_mp_collect_do_add(struct krt_proto *p, rte *mp_collect_rte, rte *e)
>> +{
>> +  struct rta *attrs = mp_collect_rte->attrs;
>> +
>> +  ASSERT(!rta_is_cached(attrs));
>> +
>> +  /* sanity to check our tmp attrs selection logic works correctly */
>> +  ASSERT(attrs != e->attrs);
>> +
>> +  if (attrs->dest != RTD_MULTIPATH)
>> +    {
>> +      attrs->nexthops = krt_mp_merge_rta(NULL, attrs, p->p.merge_limit);
>> +      attrs->dest = RTD_MULTIPATH;
>> +    }
>> +
>> +  attrs->nexthops = krt_mp_merge_rta(attrs->nexthops, e->attrs,
>> p->p.merge_limit);
>> +
>> +  return mp_collect_rte;
>> +}
>> +
>> +static int
>> +krt_mp_can_collect(struct krt_proto *p, rte *mp_collect_rte, rte *e)
>> +{
>> +  if (!krt_mp_is_collectable(p, e))
>> +    return 0;
>> +
>> +  if (!krt_mp_is_mergable(p, mp_collect_rte, e))
>> +    return 0;
>> +
>> +  return 1;
>> +}
>> +
>> +static rta* nl_parse_get_tmp_rta(nl_parsectx *ctx)
>> +{
>> +  rta *a = &ctx->attrs[ctx->process_attrs];
>> +
>> +  memset(a, 0, sizeof(*a));
>> +  return a;
>> +}
>> +
>> +static void nl_parse_collect_complete(nl_parsectx *ctx)
>> +{
>> +  if (ctx->collect_p)
>> +    {
>> +      DBG("KRT: collected\n");
>> +      krt_got_route(ctx->collect_p, ctx->collect_rte);
>> +      ctx->collect_p = NULL;
>> +      ctx->collect_rte = NULL;
>> +      lp_flush(netlink_lp);
>> +    }
>> +}
>> +
>> +static void
>> +nl_parse_collect_rte(nl_parsectx *ctx, struct krt_proto *p, rte *e)
>> +{
>> +  if (ctx->collect_p)
>> +    {
>> +      ASSERT(ctx->collect_rte);
>> +      if (ctx->collect_p == p && krt_mp_can_collect(p, ctx->collect_rte,
>> e))
>> +      {
>> +        ctx->collect_rte = krt_mp_collect_do_add(p, ctx->collect_rte, e);
>> +        DBG("KRT: collecting[add]\n");
>> +        return;
>> +      }
>> +
>> +      nl_parse_collect_complete(ctx);
>> +    }
>> +
>> +  ASSERT(!ctx->collect_p);
>> +  ASSERT(!ctx->collect_rte);
>> +
>> +  if (krt_mp_is_collectable(p, e))
>> +    {
>> +      ASSERT(e->attrs == &ctx->attrs[ctx->process_attrs]);
>> +      ASSERT(!rta_is_cached(e->attrs));
>> +      ctx->collect_p = p;
>> +      ctx->collect_rte = e;
>> +      ctx->process_attrs = (ctx->process_attrs + 1) % 2;
>> +      DBG("KRT: collecting\n");
>> +      return;
>> +    }
>> +
>> +  krt_got_route(p, e);
>> +}
>> +
>> +static void nl_parse_begin(nl_parsectx *ctx)
>> +{
>> +  memset(ctx, 0, sizeof (*ctx));
>> +}
>> +
>> +static void nl_parse_end(nl_parsectx *ctx)
>> +{
>> +  nl_parse_collect_complete(ctx);
>> +}
>>
>>  #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; }
>> while(0)
>>
>>  static void
>> -nl_parse_route(struct nlmsghdr *h, int scan)
>> +nl_parse_route(nl_parsectx *ctx, struct nlmsghdr *h, int scan)
>>  {
>>    struct krt_proto *p;
>>    struct rtmsg *i;
>> @@ -1022,12 +1292,12 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>
>>    net *net = net_get(p->p.table, dst, i->rtm_dst_len);
>>
>> -  rta ra = {
>> -    .src= p->p.main_source,
>> -    .source = RTS_INHERIT,
>> -    .scope = SCOPE_UNIVERSE,
>> -    .cast = RTC_UNICAST
>> -  };
>> +  rta *ra = nl_parse_get_tmp_rta(ctx);
>> +
>> +  ra->src= p->p.main_source,
>> +  ra->source = RTS_INHERIT,
>> +  ra->scope = SCOPE_UNIVERSE,
>> +  ra->cast = RTC_UNICAST;
>>
>>    switch (i->rtm_type)
>>      {
>> @@ -1035,9 +1305,9 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>
>>        if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
>>         {
>> -         ra.dest = RTD_MULTIPATH;
>> -         ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
>> -         if (!ra.nexthops)
>> +         ra->dest = RTD_MULTIPATH;
>> +         ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
>> +         if (!ra->nexthops)
>>             {
>>               log(L_ERR "KRT: Received strange multipath route %I/%d",
>>                   net->n.prefix, net->n.pxlen);
>> @@ -1047,8 +1317,8 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>           break;
>>         }
>>
>> -      ra.iface = if_find_by_index(oif);
>> -      if (!ra.iface)
>> +      ra->iface = if_find_by_index(oif);
>> +      if (!ra->iface)
>>         {
>>           log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
>>               net->n.prefix, net->n.pxlen, oif);
>> @@ -1058,39 +1328,39 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>        if (a[RTA_GATEWAY])
>>         {
>>           neighbor *ng;
>> -         ra.dest = RTD_ROUTER;
>> -         memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw));
>> -         ipa_ntoh(ra.gw);
>> +         ra->dest = RTD_ROUTER;
>> +         memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw));
>> +         ipa_ntoh(ra->gw);
>>
>>  #ifdef IPV6
>>           /* Silently skip strange 6to4 routes */
>> -         if (ipa_in_net(ra.gw, IPA_NONE, 96))
>> +         if (ipa_in_net(ra->gw, IPA_NONE, 96))
>>             return;
>>  #endif
>>
>> -         ng = neigh_find2(&p->p, &ra.gw, ra.iface,
>> +         ng = neigh_find2(&p->p, &ra->gw, ra->iface,
>>                            (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK :
>> 0);
>>           if (!ng || (ng->scope == SCOPE_HOST))
>>             {
>>               log(L_ERR "KRT: Received route %I/%d with strange next-hop
>> %I",
>> -                 net->n.prefix, net->n.pxlen, ra.gw);
>> +                 net->n.prefix, net->n.pxlen, ra->gw);
>>               return;
>>             }
>>         }
>>        else
>>         {
>> -         ra.dest = RTD_DEVICE;
>> +         ra->dest = RTD_DEVICE;
>>         }
>>
>>        break;
>>      case RTN_BLACKHOLE:
>> -      ra.dest = RTD_BLACKHOLE;
>> +      ra->dest = RTD_BLACKHOLE;
>>        break;
>>      case RTN_UNREACHABLE:
>> -      ra.dest = RTD_UNREACHABLE;
>> +      ra->dest = RTD_UNREACHABLE;
>>        break;
>>      case RTN_PROHIBIT:
>> -      ra.dest = RTD_PROHIBIT;
>> +      ra->dest = RTD_PROHIBIT;
>>        break;
>>      /* FIXME: What about RTN_THROW? */
>>      default:
>> @@ -1098,7 +1368,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>        return;
>>      }
>>
>> -  rte *e = rte_get_temp(&ra);
>> +  rte *e = rte_get_temp(ra);
>>    e->net = net;
>>    e->u.krt.src = src;
>>    e->u.krt.proto = i->rtm_protocol;
>> @@ -1114,24 +1384,24 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>        memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
>>        ipa_ntoh(ps);
>>
>> -      ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
>> -      ea->next = ra.eattrs;
>> -      ra.eattrs = ea;
>> +      ea_list *ea = lp_alloc(netlink_lp, sizeof(ea_list) +
>> sizeof(eattr));
>> +      ea->next = ra->eattrs;
>> +      ra->eattrs = ea;
>>        ea->flags = EALF_SORTED;
>>        ea->count = 1;
>>        ea->attrs[0].id = EA_KRT_PREFSRC;
>>        ea->attrs[0].flags = 0;
>>        ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
>> -      ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(ps));
>> +      ea->attrs[0].u.ptr = lp_alloc(netlink_lp, sizeof(struct adata) +
>> sizeof(ps));
>>        ea->attrs[0].u.ptr->length = sizeof(ps);
>>        memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
>>      }
>>
>>    if (a[RTA_FLOW])
>>      {
>> -      ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
>> -      ea->next = ra.eattrs;
>> -      ra.eattrs = ea;
>> +      ea_list *ea = lp_alloc(netlink_lp, sizeof(ea_list) +
>> sizeof(eattr));
>> +      ea->next = ra->eattrs;
>> +      ra->eattrs = ea;
>>        ea->flags = EALF_SORTED;
>>        ea->count = 1;
>>        ea->attrs[0].id = EA_KRT_REALM;
>> @@ -1143,7 +1413,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>    if (a[RTA_METRICS])
>>      {
>>        u32 metrics[KRT_METRICS_MAX];
>> -      ea_list *ea = alloca(sizeof(ea_list) + KRT_METRICS_MAX *
>> sizeof(eattr));
>> +      ea_list *ea = lp_alloc(netlink_lp, sizeof(ea_list) +
>> KRT_METRICS_MAX * sizeof(eattr));
>>        int t, n = 0;
>>
>>        if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics))
>> < 0)
>> @@ -1165,15 +1435,15 @@ nl_parse_route(struct nlmsghdr *h, int scan)
>>
>>        if (n > 0)
>>          {
>> -         ea->next = ra.eattrs;
>> +         ea->next = ra->eattrs;
>>           ea->flags = EALF_SORTED;
>>           ea->count = n;
>> -         ra.eattrs = ea;
>> +         ra->eattrs = ea;
>>         }
>>      }
>>
>>    if (scan)
>> -    krt_got_route(p, e);
>> +    nl_parse_collect_rte(ctx, p, e);
>>    else
>>      krt_got_route_async(p, e, new);
>>  }
>> @@ -1182,13 +1452,19 @@ void
>>  krt_do_scan(struct krt_proto *p UNUSED)        /*
>> CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
>>  {
>>    struct nlmsghdr *h;
>> +  nl_parsectx ctx;
>>
>>    nl_request_dump(BIRD_AF, RTM_GETROUTE);
>> +
>> +  nl_parse_begin(&ctx);
>> +
>>    while (h = nl_get_scan())
>>      if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
>> -      nl_parse_route(h, 1);
>> +      nl_parse_route(&ctx, h, 1);
>>      else
>>        log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)",
>> h->nlmsg_type);
>> +
>> +  nl_parse_end(&ctx);
>>  }
>>
>>  /*
>> @@ -1201,12 +1477,16 @@ static byte *nl_async_rx_buffer;        /*
>> Receive buffer */
>>  static void
>>  nl_async_msg(struct nlmsghdr *h)
>>  {
>> +  nl_parsectx ctx;
>> +
>>    switch (h->nlmsg_type)
>>      {
>>      case RTM_NEWROUTE:
>>      case RTM_DELROUTE:
>>        DBG("KRT: Received async route notification (%d)\n",
>> h->nlmsg_type);
>> -      nl_parse_route(h, 0);
>> +      nl_parse_begin(&ctx);
>> +      nl_parse_route(&ctx, h, 0);
>> +      nl_parse_end(&ctx);
>>        break;
>>      case RTM_NEWLINK:
>>      case RTM_DELLINK:
>> @@ -1325,6 +1605,7 @@ void
>>  krt_sys_io_init(void)
>>  {
>>    HASH_INIT(nl_table_map, krt_pool, 6);
>> +  netlink_lp = lp_new(krt_pool, 4080);
>>  }
>>
>>  int
>> --
>> 2.5.0
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://trubka.network.cz/pipermail/bird-users/attachments/20160404/3dc03de3/attachment.html>


More information about the Bird-users mailing list