[PATCH] RFC: Add netlink KRT dump filters on Linux

Tomas Hlavacek tmshlvck at gmail.com
Mon Jan 10 23:47:57 CET 2022


Add netlink KRT dump filter on Linux to avoid PMTU cache records from FNHE
table dump along with KRT.

Linux Kernel added FNHE table dump to the netlink API in patch
 https://patchwork.ozlabs.org/project/netdev/patch/8d3b68cd37fb5fddc470904cdd6793fcf480c6c1.1561131177.git.sbrivio@redhat.com/

The filter mitigates the risk of receiving unknown and potentially large
number of FNHE records that would block BIRD I/O in each sync. There is a
known issue caused by the GRE tunnels on Linux that seems to be creating
one FNHE record for each destination IP address that is routed through the
tunnel, even when the PMTU equals to GRE interface MTU (tested with kernel
5.5 - 5.16-rc7).
---
 sysdep/linux/netlink.c | 44 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
index f85bcf35..79414122 100644
--- a/sysdep/linux/netlink.c
+++ b/sysdep/linux/netlink.c
@@ -128,7 +128,7 @@ struct nl_sock
   uint last_size;
 };
 
-#define NL_RX_SIZE 8192
+#define NL_RX_SIZE 32768
 
 #define NL_OP_DELETE	0
 #define NL_OP_ADD	(NLM_F_CREATE|NLM_F_EXCL)
@@ -143,11 +143,18 @@ static struct nl_sock nl_req  = {.fd = -1};	/* Netlink socket for requests */
 static void
 nl_open_sock(struct nl_sock *nl)
 {
+  int sndbuf = 32768;
+  int rcvbuf = 1024*1024;
+
   if (nl->fd < 0)
     {
-      nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+      nl->fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
       if (nl->fd < 0)
 	die("Unable to open rtnetlink socket: %m");
+
+      setsockopt(nl->fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf));
+      setsockopt(nl->fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf));
+
       nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
       nl->rx_buffer = xmalloc(NL_RX_SIZE);
       nl->last_hdr = NULL;
@@ -155,6 +162,12 @@ nl_open_sock(struct nl_sock *nl)
     }
 }
 
+static void
+nl_set_strict_dump(struct nl_sock *nl, int strict)
+{
+  setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
+}
+
 static void
 nl_open(void)
 {
@@ -192,6 +205,29 @@ nl_request_dump(int af, int cmd)
   nl_send(&nl_scan, &req.nh);
 }
 
+static void
+nl_request_dump_rt(int af, int cmd)
+{
+  struct {
+    struct nlmsghdr nh;
+    struct rtmsg rtm;
+    char buf[128];
+  } req = {
+    .nh.nlmsg_type = cmd,
+    .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
+    .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+    .nh.nlmsg_seq = ++(nl_scan.seq),
+    .nh.nlmsg_pid = 0,
+    .rtm.rtm_protocol = RTPROT_UNSPEC,
+    .rtm.rtm_family = af
+    /* .rtm.rtm_flags is defaults to zero, hence RTM_F_CLONED is not set */
+  };
+
+  send(nl_scan.fd, &req, sizeof(req), 0);
+  nl_scan.last_hdr = NULL;
+}
+
+
 static struct nlmsghdr *
 nl_get_reply(struct nl_sock *nl)
 {
@@ -1864,13 +1900,15 @@ krt_do_scan(struct krt_proto *p UNUSED)	/* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
   struct nl_parse_state s;
 
   nl_parse_begin(&s, 1);
-  nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
+  nl_set_strict_dump(&nl_scan, 1);
+  nl_request_dump_rt(AF_UNSPEC, RTM_GETROUTE);
   while (h = nl_get_scan())
     if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
       nl_parse_route(&s, h);
     else
       log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
   nl_parse_end(&s);
+  nl_set_strict_dump(&nl_scan, 0);
 }
 
 /*
-- 
2.25.1



More information about the Bird-users mailing list