[PATCH] Filter: Add support for setting TCP congestion control algorithm

Trisha Biswas tbiswas at fastly.com
Fri Mar 18 00:32:02 CET 2022


Add support for setting the TCP congestion control algorithm per
destination by

modifying route attributes in BIRD. Previously supported kernel route
attributes

in BIRD were all integer values. Subsequently, this also adds string
handling of

kernel metrics.


Usage: set krt_cc_algo in the BIRD filter configs.

Allowed values can be found with net.ipv4.tcp_allowed_congestion_control.


---

diff --git a/doc/bird.sgml b/doc/bird.sgml
index 1d5ae056..310aef37 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -3377,7 +3377,7 @@ Supported attributes are:
 <cf/krt_sstresh/, <cf/krt_lock_sstresh/, <cf/krt_cwnd/, <cf/krt_lock_cwnd/,
 <cf/krt_advmss/, <cf/krt_lock_advmss/, <cf/krt_reordering/,
<cf/krt_lock_reordering/,
 <cf/krt_hoplimit/, <cf/krt_lock_hoplimit/, <cf/krt_rto_min/,
<cf/krt_lock_rto_min/,
-<cf/krt_initcwnd/, <cf/krt_initrwnd/, <cf/krt_quickack/,
+<cf/krt_initcwnd/, <cf/krt_initrwnd/, <cf/krt_quickack/, <cf/krt_cc_algo/,
 <cf/krt_feature_ecn/, <cf/krt_feature_allfrag/

 <sect1>Example
diff --git a/filter/config.Y b/filter/config.Y
index 8916ea97..8916d08d 100644
--- a/filter/config.Y
+++ b/filter/config.Y
@@ -178,6 +178,9 @@ f_generate_empty(struct f_dynamic_attr dyn)
     case EAF_TYPE_LC_SET:
       empty = f_const_empty_lclist;
       break;
+    case EAF_TYPE_CC_ALGO:
+      empty = f_const_empty_cc_algo;
+      break;
     default:
       cf_error("Can't empty that attribute");
   }
@@ -816,6 +819,7 @@ term:
  | '-' EMPTY '-' { $$ = f_new_inst(FI_CONSTANT, f_const_empty_clist); }
  | '-' '-' EMPTY '-' '-' { $$ = f_new_inst(FI_CONSTANT,
f_const_empty_eclist); }
  | '-' '-' '-' EMPTY '-' '-' '-' { $$ = f_new_inst(FI_CONSTANT,
f_const_empty_lclist); }
+ | '-' '-' '-' '-' EMPTY '-' '-' '-' '-' { $$ = f_new_inst(FI_CONSTANT,
f_const_empty_cc_algo); }
  | PREPEND '(' term ',' term ')' { $$ = f_new_inst(FI_PATH_PREPEND, $3,
$5); }
  | ADD '(' term ',' term ')' { $$ = f_new_inst(FI_CLIST_ADD, $3, $5); }
  | DELETE '(' term ',' term ')' { $$ = f_new_inst(FI_CLIST_DEL, $3, $5); }
diff --git a/filter/data.c b/filter/data.c
index 56c1fb17..84e91d59 100644
--- a/filter/data.c
+++ b/filter/data.c
@@ -91,6 +91,9 @@ const struct f_val f_const_empty_path = {
 }, f_const_empty_lclist = {
   .type = T_LCLIST,
   .val.ad = &null_adata,
+}, f_const_empty_cc_algo = {
+  .type = T_STRING,
+  .val.ad = &null_adata,
 };

 static struct adata *
diff --git a/filter/data.h b/filter/data.h
index 4cb6b7a8..46216e92 100644
--- a/filter/data.h
+++ b/filter/data.h
@@ -262,6 +262,7 @@ trie_match_next_longest_ip6(net_addr_ip6 *n, ip6_addr
*found)


 #define F_CMP_ERROR 999
+#define TCP_CA_NAME_MAX 16

 const char *f_type_name(enum f_type t);

@@ -297,7 +298,7 @@ undef_value(struct f_val v)
     (v.val.ad == &null_adata);
 }

-extern const struct f_val f_const_empty_path, f_const_empty_clist,
f_const_empty_eclist, f_const_empty_lclist;
+extern const struct f_val f_const_empty_path, f_const_empty_clist,
f_const_empty_eclist, f_const_empty_lclist, f_const_empty_cc_algo;

 enum filter_return f_eval(const struct f_line *expr, struct linpool
*tmp_pool, struct f_val *pres);

diff --git a/filter/f-inst.c b/filter/f-inst.c
index 901d2939..735c643a 100644
--- a/filter/f-inst.c
+++ b/filter/f-inst.c
@@ -709,6 +709,9 @@
       case EAF_TYPE_LC_SET:
  RESULT_(T_LCLIST, ad, e->u.ptr);
  break;
+      case EAF_TYPE_CC_ALGO:
+ RESULT_(T_STRING, s, (const char *) e->u.ptr->data);
+ break;
       case EAF_TYPE_UNDEF:
  RESULT_VOID;
  break;
@@ -758,7 +761,16 @@
       case EAF_TYPE_LC_SET:
  l->attrs[0].u.ptr = v1.val.ad;
  break;
-
+      case EAF_TYPE_CC_ALGO:
+ if (v1.type != T_STRING)
+  runtime( "Setting cc_algo attribute to non-string value" );
+ else if (strlen(v1.val.s) >= TCP_CA_NAME_MAX)
+  runtime( "Setting cc_algo attribute out of bounds (> 15 chars)" );
+ /* Store cc_algo string in byte[], making sure to copy the null
terminator */
+ struct adata *d = lp_alloc_adata(fs->pool, strlen(v1.val.s) + 1);
+ memcpy(d->data, v1.val.s, d->length);
+ l->attrs[0].u.ptr = d;
+ break;
       case EAF_TYPE_BITFIELD:
  {
   /* First, we have to find the old value */
diff --git a/filter/f-util.c b/filter/f-util.c
index 410999a6..3cf68270 100644
--- a/filter/f-util.c
+++ b/filter/f-util.c
@@ -121,6 +121,8 @@ ca_lookup(pool *p, const char *name, int f_type)
       break;
     case T_LCLIST:
       ea_type = EAF_TYPE_LC_SET;
+    case T_STRING:
+      ea_type = EAF_TYPE_CC_ALGO;
       break;
     default:
       cf_error("Custom route attribute of unsupported type");
diff --git a/nest/route.h b/nest/route.h
index 7930058a..053d2f1e 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -587,6 +587,7 @@ const char *ea_custom_name(uint ea);
 #define EAF_TYPE_INT_SET 0x0a /* Set of u32's (e.g., a community list) */
 #define EAF_TYPE_EC_SET 0x0e /* Set of pairs of u32's - ext. community
list */
 #define EAF_TYPE_LC_SET 0x12 /* Set of triplets of u32's - large community
list */
+#define EAF_TYPE_CC_ALGO 0x18 /* String to specify congestion control
algorithm */
 #define EAF_TYPE_UNDEF 0x1f /* `force undefined' entry */
 #define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type
spec) */
 #define EAF_VAR_LENGTH 0x02 /* Attribute length is variable (part of type
spec) */
diff --git a/nest/rt-attr.c b/nest/rt-attr.c
index c630aa95..b4fdfd85 100644
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -969,6 +969,9 @@ ea_show(struct cli *c, const eattr *e)
  case EAF_TYPE_LC_SET:
   ea_show_lc_set(c, ad, pos, buf, end);
   return;
+ case EAF_TYPE_CC_ALGO:
+  bsnprintf(pos, ad->length, "%s", (char *)ad->data);
+  return;
  case EAF_TYPE_UNDEF:
  default:
   bsprintf(pos, "<type %02x>", e->type);
diff --git a/sysdep/linux/krt-sys.h b/sysdep/linux/krt-sys.h
index 8897f889..f265ceb7 100644
--- a/sysdep/linux/krt-sys.h
+++ b/sysdep/linux/krt-sys.h
@@ -39,7 +39,7 @@ static inline struct ifa * kif_get_primary_ip(struct
iface *i UNUSED) { return N
 #define EA_KRT_SCOPE EA_CODE(PROTOCOL_KERNEL, 0x12)


-#define KRT_METRICS_MAX 0x10 /* RTAX_QUICKACK+1 */
+#define KRT_METRICS_MAX 0x11 /* RTAX_CC_ALGO+1 */
 #define KRT_METRICS_OFFSET 0x20 /* Offset of EA_KRT_* vs RTAX_* */

 #define KRT_FEATURES_MAX 4
@@ -64,7 +64,7 @@ static inline struct ifa * kif_get_primary_ip(struct
iface *i UNUSED) { return N
 #define EA_KRT_RTO_MIN EA_CODE(PROTOCOL_KERNEL, 0x2d)
 #define EA_KRT_INITRWND EA_CODE(PROTOCOL_KERNEL, 0x2e)
 #define EA_KRT_QUICKACK EA_CODE(PROTOCOL_KERNEL, 0x2f)
-
+#define EA_KRT_CC_ALGO EA_CODE(PROTOCOL_KERNEL, 0x30)

 struct krt_params {
   u32 table_id; /* Kernel table ID we sync with */
diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y
index 487ad1d8..b07fa842 100644
--- a/sysdep/linux/netlink.Y
+++ b/sysdep/linux/netlink.Y
@@ -14,7 +14,7 @@ CF_KEYWORDS(KERNEL, TABLE, METRIC, NETLINK, RX, BUFFER,
     KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW,
     KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING,
     KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK,
-    KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR,
+    KRT_CC_ALGO, KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT,
KRT_LOCK_RTTVAR,
     KRT_LOCK_SSTRESH, KRT_LOCK_CWND, KRT_LOCK_ADVMSS, KRT_LOCK_REORDERING,
     KRT_LOCK_HOPLIMIT, KRT_LOCK_RTO_MIN, KRT_FEATURE_ECN,
KRT_FEATURE_ALLFRAG)

@@ -45,6 +45,7 @@ dynamic_attr: KRT_INITCWND { $$ =
f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT
 dynamic_attr: KRT_RTO_MIN { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT,
EA_KRT_RTO_MIN); } ;
 dynamic_attr: KRT_INITRWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT,
EA_KRT_INITRWND); } ;
 dynamic_attr: KRT_QUICKACK { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT,
EA_KRT_QUICKACK); } ;
+dynamic_attr: KRT_CC_ALGO { $$ = f_new_dynamic_attr(EAF_TYPE_CC_ALGO,
T_STRING, EA_KRT_CC_ALGO); } ;

 /* Bits of EA_KRT_LOCK, based on RTAX_* constants */

diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
index 29b744cb..4d4d704b 100644
--- a/sysdep/linux/netlink.c
+++ b/sysdep/linux/netlink.c
@@ -73,6 +73,10 @@
 #define NETLINK_GET_STRICT_CHK 12
 #endif

+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX 16
+#endif
+
 #define krt_ipv4(p) ((p)->af == AF_INET)
 #define krt_ecmp6(p) ((p)->af == AF_INET6)

@@ -534,6 +538,9 @@ static inline u16 rta_get_u16(struct rtattr *a)
 static inline u32 rta_get_u32(struct rtattr *a)
 { return *(u32 *) RTA_DATA(a); }

+static inline char *rta_get_str(struct rtattr *a)
+{ return (char *) RTA_DATA(a); }
+
 static inline ip4_addr rta_get_ip4(struct rtattr *a)
 { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }

@@ -624,6 +631,12 @@ nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int
code, u32 data)
   nl_add_attr(h, bufsize, code, &data, 4);
 }

+static inline void
+nl_add_attr_str(struct nlmsghdr *h, unsigned bufsize, int code, char *str)
+{
+  nl_add_attr(h, bufsize, code, str, strlen(str) + 1);
+}
+
 static inline void
 nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
 {
@@ -880,20 +893,25 @@ err:
 }

 static void
-nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
+nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, char
*cc_algo, int max)
 {
   struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
   int t;

-  for (t = 1; t < max; t++)
-    if (metrics[0] & (1 << t))
-      nl_add_attr_u32(h, bufsize, t, metrics[t]);
+  for (t = 1; t < max; t++) {
+    if (metrics[0] & (1 << t)) {
+      if (EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t) ==
EA_KRT_CC_ALGO)
+        nl_add_attr_str(h, bufsize, t, cc_algo);
+      else
+        nl_add_attr_u32(h, bufsize, t, metrics[t]);
+    }
+  }

   nl_close_attr(h, a);
 }

 static int
-nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
+nl_parse_metrics(struct rtattr *hdr, u32 *metrics, char *cc_algo, int max)
 {
   struct rtattr *a = RTA_DATA(hdr);
   int len = RTA_PAYLOAD(hdr);
@@ -911,7 +929,19 @@ nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int
max)
       return -1;

     metrics[0] |= 1 << a->rta_type;
-    metrics[a->rta_type] = rta_get_u32(a);
+
+    if (EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + a->rta_type) ==
EA_KRT_CC_ALGO) {
+      char *str = rta_get_str(a);
+      if (strlen(str) < TCP_CA_NAME_MAX) {
+        memcpy(cc_algo, str, strlen(str) + 1);
+      } else {
+        log(L_ERR "KRT: Received route with cc_algo attribute out of
bounds (> 15 chars)");
+        return -1;
+      }
+      metrics[a->rta_type] = 0;
+    } else {
+      metrics[a->rta_type] = rta_get_u32(a);
+    }
   }

   if (len > 0)
@@ -1427,6 +1457,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op,
int dest, struct nexthop *nh)


   u32 metrics[KRT_METRICS_MAX];
+  char cc_algo[TCP_CA_NAME_MAX];
   metrics[0] = 0;

   struct ea_walk_state ews = { .eattrs = eattrs };
@@ -1434,11 +1465,16 @@ nl_send_route(struct krt_proto *p, rte *e, int op,
int dest, struct nexthop *nh)
   {
     int id = ea->id - EA_KRT_METRICS;
     metrics[0] |= 1 << id;
-    metrics[id] = ea->u.data;
+    if(ea->id == EA_KRT_CC_ALGO) {
+      metrics[id] = 0;
+      memcpy(cc_algo, ea->u.ptr->data, ea->u.ptr->length);
+    } else {
+      metrics[id] = ea->u.data;
+    }
   }

   if (metrics[0])
-    nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
+    nl_add_metrics(&r->h, rsize, metrics, cc_algo, KRT_METRICS_MAX);


 dest:
@@ -1907,10 +1943,12 @@ nl_parse_route(struct nl_parse_state *s, struct
nlmsghdr *h)
   if (a[RTA_METRICS])
     {
       u32 metrics[KRT_METRICS_MAX];
+      char *cc_algo = lp_alloc(s->pool, TCP_CA_NAME_MAX);
       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX *
sizeof(eattr));
+      struct adata *d = lp_alloc(s->pool, sizeof(struct adata) +
TCP_CA_NAME_MAX);
       int t, n = 0;

-      if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) <
0)
+      if (nl_parse_metrics(a[RTA_METRICS], metrics, cc_algo,
ARRAY_SIZE(metrics)) < 0)
         {
   log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute",
net->n.addr);
   return;
@@ -1921,8 +1959,15 @@ nl_parse_route(struct nl_parse_state *s, struct
nlmsghdr *h)
   {
     ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
     ea->attrs[n].flags = 0;
-    ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD
*/
-    ea->attrs[n].u.data = metrics[t];
+    if (ea->attrs[n].id == EA_KRT_CC_ALGO) {
+      ea->attrs[n].type = EAF_TYPE_CC_ALGO;
+      d->length = strlen(cc_algo) + 1;
+      memcpy(d->data, cc_algo, d->length);
+      ea->attrs[n].u.ptr = d;
+    } else {
+      ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are
EAF_TYPE_BITFIELD */
+      ea->attrs[n].u.data = metrics[t];
+    }
     n++;
   }

@@ -2225,7 +2270,8 @@ krt_sys_copy_config(struct krt_config *d, struct
krt_config *s)

 static const char *krt_metrics_names[KRT_METRICS_MAX] = {
   NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd",
"advmss",
-  "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd",
"quickack"
+  "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd",
"quickack",
+  "cc_algo"
 };

 static const char *krt_features_names[KRT_FEATURES_MAX] = {

---


Please also find the patch attached.


Thank you,

Trisha
--

*Trisha Biswas* | Sr. Software Engineer, Network Systems
fastly.com | @fastly <https://twitter.com/fastly> | LinkedIn
<http://www.linkedin.com/company/fastly>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://trubka.network.cz/pipermail/bird-users/attachments/20220317/650382f9/attachment.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: cc-algo.patch
Type: application/octet-stream
Size: 12826 bytes
Desc: not available
URL: <http://trubka.network.cz/pipermail/bird-users/attachments/20220317/650382f9/attachment.obj>


More information about the Bird-users mailing list