[PATCH 2/3] babel: Add support for the RTT extension

Toke Høiland-Jørgensen toke at toke.dk
Sun Feb 26 23:10:05 CET 2023


This adds support to the Babel protocol for the RTT extension specified in
draft-ietf-babel-rtt-extension. While this extension is not yet at the RFC
stage, it is one of the more useful extensions to Babel[0], so it seems
worth having in Bird as well.

The extension adds timestamps to Hello and IHU TLVs and uses these to
compute an RTT to each neighbour. An extra per-neighbour cost is then
computed from the RTT based on a minimum and maximum interval and cost
value specified in the configuration. The primary use case for this is
improving routing in a geographically distributed tunnel-based overlay
network.

The implementation follows the babeld implementation when picking constants
and default configuration values. It also uses the same RTT smoothing
algorithm as babeld, and follows it in adding a new 'tunnel' interface type
which enables RTT by default.

[0] https://alioth-lists.debian.net/pipermail/babel-users/2022-April/003932.html

Signed-off-by: Toke Høiland-Jørgensen <toke at toke.dk>
---
 doc/bird.sgml         |  51 ++++++++++++++---
 proto/babel/babel.c   |  84 ++++++++++++++++++++++++++--
 proto/babel/babel.h   |  24 ++++++++
 proto/babel/config.Y  |  20 ++++++-
 proto/babel/packets.c | 126 ++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 288 insertions(+), 17 deletions(-)

diff --git a/doc/bird.sgml b/doc/bird.sgml
index 85711c31336f..451dff4031d5 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -1916,7 +1916,7 @@ protocol babel [<name>] {
 	ipv6 [sadr] { <channel config> };
         randomize router id <switch>;
 	interface <interface pattern> {
-		type <wired|wireless>;
+		type <wired|wireless|tunnel>;
 		rxcost <number>;
 		limit <number>;
 		hello interval <time>;
@@ -1930,6 +1930,11 @@ protocol babel [<name>] {
 		next hop ipv4 <address>;
 		next hop ipv6 <address>;
 		extended next hop <switch>;
+		rtt cost <number>;
+		rtt min <time>;
+		rtt max <time>;
+		rtt decay <number>;
+		send timestamps <switch>;
 		authentication none|mac [permissive];
 		password "<text>";
 		password "<text>" {
@@ -1960,15 +1965,16 @@ protocol babel [<name>] {
       router ID every time it starts up, which avoids this problem at the cost
       of not having stable router IDs in the network. Default: no.
 
-      <tag><label id="babel-type">type wired|wireless </tag>
-      This option specifies the interface type: Wired or wireless. On wired
-      interfaces a neighbor is considered unreachable after a small number of
-      Hello packets are lost, as described by <cf/limit/ option. On wireless
+      <tag><label id="babel-type">type wired|wireless|tunnel </tag>
+      This option specifies the interface type: Wired, wireless or tunnel. On
+      wired interfaces a neighbor is considered unreachable after a small number
+      of Hello packets are lost, as described by <cf/limit/ option. On wireless
       interfaces the ETX link quality estimation technique is used to compute
       the metrics of routes discovered over this interface. This technique will
       gradually degrade the metric of routes when packets are lost rather than
-      the more binary up/down mechanism of wired type links. Default:
-      <cf/wired/.
+      the more binary up/down mechanism of wired type links. A tunnel is like a
+      wired interface, but turns on RTT-based metrics with a default cost of 96.
+      Default: <cf/wired/.
 
       <tag><label id="babel-rxcost">rxcost <m/num/</tag>
       This option specifies the nominal RX cost of the interface. The effective
@@ -2039,6 +2045,37 @@ protocol babel [<name>] {
       hop when IPv4 addresses are absent from the interface as described in
       <rfc id="9229">. Default: yes.
 
+      <tag><label id="babel-rtt-cost">rtt cost <m/number/</tag>
+      The RTT-based cost that will be applied to all routes from each neighbour
+      based on the measured RTT to that neighbour. If this value is set,
+      timestamps will be included in generated Babel Hello and IHU messages, and
+      (if the neighbours also have timestamps enabled), the RTT to each
+      neighbour will be computed. An additional cost is added to a neighbour if
+      its RTT is above the <ref id="babel-rtt-min" name="rtt min"> value
+      configured on the interface. The added cost scales linearly from 0 up to
+      the RTT cost configured in this option; the full cost is applied if the
+      neighbour RTT reaches the RTT configured in the <ref id="babel-rtt-max"
+      name="rtt max"> option (and for all RTTs above this value). Default: 0
+      (disabled), except for tunnel interfaces, where it is 96.
+
+      <tag><label id="babel-rtt-min">rtt min <m/time/ s|ms</tag>
+      The minimum RTT above which the RTT cost will start to be applied (scaling
+      linearly from zero up to the full cost). Default: 10 ms
+
+      <tag><label id="babel-rtt-max">rtt max <m/time/ s|ms</tag>
+      The maximum RTT above which the full RTT cost will start be applied.
+      Default: 120 ms
+
+      <tag><label id="babel-rtt-decay">rtt decay <m/number/</tag>
+      The decay factor used for the exponentional moving average of the RTT
+      samples from each neighbour, in units of 1/256. Higher values discards old
+      RTT samples faster. Must be between 1 and 256. Default: 42
+
+      <tag><label id="babel-send-timestamps">send timestamps <m/switch/</tag>
+      Whether to send the timestamps used for RTT calculation on this interface.
+      Sending the timestamps enables peers to calculate an RTT to this node,
+      even if no RTT cost is applied to the route metrics. Default: yes.
+
       <tag><label id="babel-authentication">authentication none|mac [permissive]</tag>
       Selects authentication method to be used. <cf/none/ means that packets
       are not authenticated at all, <cf/mac/ means MAC authentication is
diff --git a/proto/babel/babel.c b/proto/babel/babel.c
index 9f33dd3458bd..04613788303c 100644
--- a/proto/babel/babel.c
+++ b/proto/babel/babel.c
@@ -596,6 +596,7 @@ babel_update_cost(struct babel_neighbor *nbr)
   switch (cf->type)
   {
   case BABEL_IFACE_TYPE_WIRED:
+  case BABEL_IFACE_TYPE_TUNNEL:
     /* k-out-of-j selection - Appendix 2.1 in the RFC. */
 
     /* Link is bad if less than cf->limit/16 of expected hellos were received */
@@ -624,6 +625,24 @@ babel_update_cost(struct babel_neighbor *nbr)
     break;
   }
 
+  if (cf->rtt_cost && nbr->srtt > cf->rtt_min)
+  {
+    uint rtt_cost = cf->rtt_cost;
+
+    if (nbr->srtt < cf->rtt_max)
+    {
+      uint rtt_interval = cf->rtt_max TO_US - cf->rtt_min TO_US;
+      uint rtt_diff = (nbr->srtt TO_US - cf->rtt_min TO_US);
+
+      rtt_cost = (rtt_cost * rtt_diff) / rtt_interval;
+    }
+
+    txcost = MIN(txcost + rtt_cost, BABEL_INFINITY);
+
+    TRACE(D_EVENTS, "Added RTT cost %u to nbr %I on %s with srtt %u.%03u ms",
+	  rtt_cost, nbr->addr, nbr->ifa->iface->name, nbr->srtt/1000, nbr->srtt%1000);
+  }
+
 done:
   /* If RX cost changed, send IHU with next Hello */
   if (rxcost != nbr->rxcost)
@@ -854,6 +873,12 @@ babel_build_ihu(union babel_msg *msg, struct babel_iface *ifa, struct babel_neig
   msg->ihu.rxcost = n->rxcost;
   msg->ihu.interval = ifa->cf->ihu_interval;
 
+  if (n->last_tstamp_rcvd && ifa->cf->rtt_send)
+  {
+    msg->ihu.tstamp = n->last_tstamp;
+    msg->ihu.tstamp_rcvd = n->last_tstamp_rcvd TO_US;
+  }
+
   TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %t",
         msg->ihu.addr, msg->ihu.rxcost, (btime) msg->ihu.interval);
 }
@@ -893,6 +918,9 @@ babel_send_hello(struct babel_iface *ifa, uint interval)
   msg.hello.seqno = ifa->hello_seqno++;
   msg.hello.interval = interval ?: ifa->cf->hello_interval;
 
+  if (ifa->cf->rtt_send)
+    msg.hello.tstamp = 1; /* real timestamp will be set on TLV write */
+
   TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t",
 	ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval);
 
@@ -1199,14 +1227,26 @@ babel_handle_hello(union babel_msg *m, struct babel_iface *ifa)
 	msg->seqno, (btime) msg->interval);
 
   struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender);
+  struct babel_iface_config *cf = n->ifa->cf;
   int first_hello = !n->hello_cnt;
 
+  if (msg->tstamp)
+  {
+    n->last_tstamp = msg->tstamp;
+    n->last_tstamp_rcvd = msg->pkt_received;
+  }
   babel_update_hello_history(n, msg->seqno, msg->interval);
   babel_update_cost(n);
 
   /* Speed up session establishment by sending IHU immediately */
   if (first_hello)
-    babel_send_ihu(ifa, n);
+  {
+    /* if using RTT, all IHUs must be paired with hellos */
+    if(cf->rtt_send)
+      babel_send_hello(ifa, 0);
+    else
+      babel_send_ihu(ifa, n);
+  }
 }
 
 void
@@ -1225,6 +1265,39 @@ babel_handle_ihu(union babel_msg *m, struct babel_iface *ifa)
   struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender);
   n->txcost = msg->rxcost;
   n->ihu_expiry = current_time() + BABEL_IHU_EXPIRY_FACTOR(msg->interval);
+
+  if (msg->tstamp)
+  {
+    u32 rtt_sample = 0, pkt_received = msg->pkt_received TO_US;
+    int remote_time, full_time;
+
+    /* processing time reported by peer */
+    remote_time = (n->last_tstamp - msg->tstamp_rcvd);
+    /* time since we sent the last timestamp - RTT including remote time */
+    full_time = (pkt_received - msg->tstamp);
+
+    /* sanity checks */
+    if (remote_time < 0 || full_time < 0 ||
+        remote_time US_ > BABEL_RTT_MAX_VALUE || full_time US_ > BABEL_RTT_MAX_VALUE)
+      goto out;
+
+    if (remote_time < full_time)
+      rtt_sample = full_time - remote_time;
+
+    if (n->srtt)
+    {
+      uint decay = n->ifa->cf->rtt_decay;
+
+      n->srtt = (decay * rtt_sample + (256 - decay) * n->srtt) / 256;
+    }
+    else
+      n->srtt = rtt_sample;
+
+    TRACE(D_EVENTS, "RTT sample for neighbour %I on %s: %u us (srtt %u.%03u ms)",
+          n->addr, ifa->ifname, rtt_sample, n->srtt/1000, n->srtt%1000);
+  }
+
+out:
   babel_update_cost(n);
 }
 
@@ -2199,8 +2272,8 @@ babel_show_neighbors(struct proto *P, const char *iff)
   }
 
   cli_msg(-1024, "%s:", p->p.name);
-  cli_msg(-1024, "%-25s %-10s %6s %6s %6s %7s %4s",
-	  "IP address", "Interface", "Metric", "Routes", "Hellos", "Expires", "Auth");
+  cli_msg(-1024, "%-25s %-10s %6s %6s %6s %7s %4s %11s",
+	  "IP address", "Interface", "Metric", "Routes", "Hellos", "Expires", "Auth", "RTT");
 
   WALK_LIST(ifa, p->interfaces)
   {
@@ -2215,9 +2288,10 @@ babel_show_neighbors(struct proto *P, const char *iff)
 
       uint hellos = u32_popcount(n->hello_map);
       btime timer = (n->hello_expiry ?: n->init_expiry) - current_time();
-      cli_msg(-1024, "%-25I %-10s %6u %6u %6u %7t %-4s",
+      cli_msg(-1024, "%-25I %-10s %6u %6u %6u %7t %-4s %5u.%03ums",
 	      n->addr, ifa->iface->name, n->cost, rts, hellos, MAX(timer, 0),
-              n->auth_passed ? "Yes" : "No");
+              n->auth_passed ? "Yes" : "No",
+              n->srtt/1000, n->srtt%1000);
     }
   }
 }
diff --git a/proto/babel/babel.h b/proto/babel/babel.h
index dcd303e13ecd..edde4cabe6b1 100644
--- a/proto/babel/babel.h
+++ b/proto/babel/babel.h
@@ -53,10 +53,16 @@
 #define BABEL_GARBAGE_INTERVAL		(300 S_)
 #define BABEL_RXCOST_WIRED		96
 #define BABEL_RXCOST_WIRELESS		256
+#define BABEL_RXCOST_RTT		96
 #define BABEL_INITIAL_HOP_COUNT		255
 #define BABEL_MAX_SEND_INTERVAL		5	/* Unused ? */
 #define BABEL_INITIAL_NEIGHBOR_TIMEOUT	(60 S_)
 
+#define BABEL_RTT_MAX_VALUE		(600 S_)
+#define BABEL_RTT_MIN			(10 MS_)
+#define BABEL_RTT_MAX			(120 MS_)
+#define BABEL_RTT_DECAY			42
+
 /* Max interval that will not overflow when carried as 16-bit centiseconds */
 #define BABEL_TIME_UNITS		10000	/* On-wire times are counted in centiseconds */
 #define BABEL_MIN_INTERVAL		(0x0001 * BABEL_TIME_UNITS)
@@ -96,6 +102,8 @@ enum babel_tlv_type {
 enum babel_subtlv_type {
   BABEL_SUBTLV_PAD1		= 0,
   BABEL_SUBTLV_PADN		= 1,
+  BABEL_SUBTLV_DIVERSITY	= 2, /* we don't support this */
+  BABEL_SUBTLV_TIMESTAMP	= 3,
 
   /* Mandatory subtlvs */
   BABEL_SUBTLV_SOURCE_PREFIX    = 128,
@@ -106,6 +114,7 @@ enum babel_iface_type {
   BABEL_IFACE_TYPE_UNDEF	= 0,
   BABEL_IFACE_TYPE_WIRED	= 1,
   BABEL_IFACE_TYPE_WIRELESS	= 2,
+  BABEL_IFACE_TYPE_TUNNEL	= 3,
   BABEL_IFACE_TYPE_MAX
 };
 
@@ -141,6 +150,12 @@ struct babel_iface_config {
   uint ihu_interval;			/* IHU interval, in us */
   uint update_interval;			/* Update interval, in us */
 
+  btime rtt_min;			/* rtt above which to start penalising metric */
+  btime rtt_max;			/* max rtt metric penalty applied above this */
+  u16 rtt_cost;			/* metric penalty to apply at rtt_max */
+  u16 rtt_decay;			/* decay of neighbour RTT (units of 1/256) */
+  u8  rtt_send;			/* whether to send timestamps on this interface */
+
   u16 rx_buffer;			/* RX buffer size, 0 for MTU */
   u16 tx_length;			/* TX packet length limit (including headers), 0 for MTU */
   int tx_tos;
@@ -229,6 +244,10 @@ struct babel_neighbor {
   u16 next_hello_seqno;
   uint last_hello_int;
 
+  u32 last_tstamp;
+  btime last_tstamp_rcvd;
+  btime srtt;
+
   u32 auth_pc_unicast;
   u32 auth_pc_multicast;
   u8 auth_passed;
@@ -326,6 +345,8 @@ struct babel_msg_hello {
   u16 seqno;
   uint interval;
   ip_addr sender;
+  u32 tstamp;
+  btime pkt_received;
 };
 
 struct babel_msg_ihu {
@@ -335,6 +356,9 @@ struct babel_msg_ihu {
   uint interval;
   ip_addr addr;
   ip_addr sender;
+  u32 tstamp;
+  u32 tstamp_rcvd;
+  btime pkt_received;
 };
 
 struct babel_msg_update {
diff --git a/proto/babel/config.Y b/proto/babel/config.Y
index 1b4dc6f5f6c5..b8af02679f0c 100644
--- a/proto/babel/config.Y
+++ b/proto/babel/config.Y
@@ -26,7 +26,7 @@ CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT,
 	TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK,
 	NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS,
 	ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE,
-	EXTENDED)
+	EXTENDED, TUNNEL, RTT, MIN, MAX, DECAY, SEND, TIMESTAMPS)
 
 CF_GRAMMAR
 
@@ -67,6 +67,10 @@ babel_iface_start:
   BABEL_IFACE->limit = BABEL_HELLO_LIMIT;
   BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL;
   BABEL_IFACE->tx_priority = sk_priority_control;
+  BABEL_IFACE->rtt_min = BABEL_RTT_MIN;
+  BABEL_IFACE->rtt_max = BABEL_RTT_MAX;
+  BABEL_IFACE->rtt_decay = BABEL_RTT_DECAY;
+  BABEL_IFACE->rtt_send = 1;
   BABEL_IFACE->check_link = 1;
   BABEL_IFACE->ext_next_hop = 1;
 };
@@ -87,8 +91,16 @@ babel_iface_finish:
       BABEL_IFACE->hello_interval = BABEL_HELLO_INTERVAL_WIRED;
     if (!BABEL_IFACE->rxcost)
       BABEL_IFACE->rxcost = BABEL_RXCOST_WIRED;
+    if (BABEL_IFACE->type == BABEL_IFACE_TYPE_TUNNEL && !BABEL_IFACE->rtt_cost)
+      BABEL_IFACE->rtt_cost = BABEL_RXCOST_RTT;
   }
 
+  if (BABEL_IFACE->rtt_cost && !BABEL_IFACE->rtt_send)
+    cf_error("Can't set RTT cost when sending timestamps is disabled");
+
+  if (BABEL_IFACE->rtt_min >= BABEL_IFACE->rtt_max)
+    cf_error("Min RTT must be smaller than max RTT");
+
   /* Make sure we do not overflow the 16-bit centisec fields */
   if (!BABEL_IFACE->update_interval)
     BABEL_IFACE->update_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR, BABEL_MAX_INTERVAL);
@@ -136,6 +148,7 @@ babel_iface_item:
  | LIMIT expr { BABEL_IFACE->limit = $2; if (($2<1) || ($2>16)) cf_error("Limit must be in range 1-16"); }
  | TYPE WIRED { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; }
  | TYPE WIRELESS { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRELESS; }
+ | TYPE TUNNEL { BABEL_IFACE->type = BABEL_IFACE_TYPE_TUNNEL; }
  | HELLO INTERVAL expr_us { BABEL_IFACE->hello_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Hello interval must be in range 10 ms - 655 s"); }
  | UPDATE INTERVAL expr_us { BABEL_IFACE->update_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Update interval must be in range 10 ms - 655 s"); }
  | RX BUFFER expr { BABEL_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX buffer must be in range 256-65535"); }
@@ -149,6 +162,11 @@ babel_iface_item:
  | AUTHENTICATION NONE { BABEL_IFACE->auth_type = BABEL_AUTH_NONE; }
  | AUTHENTICATION MAC { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 0; }
  | AUTHENTICATION MAC PERMISSIVE { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 1; }
+ | RTT MIN expr_us { BABEL_IFACE->rtt_min = $3; }
+ | RTT MAX expr_us { BABEL_IFACE->rtt_max = $3; }
+ | RTT COST expr { BABEL_IFACE->rtt_cost = $3; if ($3 >= BABEL_INFINITY) cf_error("RTT cost must be < 65535"); }
+ | RTT DECAY expr { BABEL_IFACE->rtt_decay = $3; if (($3 < 1) || ($3 > 256)) cf_error("RTT decay must be between 1-256"); }
+ | SEND TIMESTAMPS bool { BABEL_IFACE->rtt_send = $3; }
  | password_list
  ;
 
diff --git a/proto/babel/packets.c b/proto/babel/packets.c
index 61c94cc5133e..f18956558190 100644
--- a/proto/babel/packets.c
+++ b/proto/babel/packets.c
@@ -58,6 +58,13 @@ struct babel_tlv_ihu {
   u8 addr[0];
 } PACKED;
 
+struct babel_subtlv_timestamp {
+  u8 type;
+  u8 length;
+  u32 tstamp;
+  u32 tstamp_rcvd; /* only used in IHU */
+} PACKED;
+
 struct babel_tlv_router_id {
   u8 type;
   u8 length;
@@ -161,6 +168,7 @@ struct babel_parse_state {
   const struct babel_tlv_data* (*get_subtlv_data)(u8 type);
   struct babel_proto *proto;
   struct babel_iface *ifa;
+  btime received_time;
   ip_addr saddr;
   ip_addr next_hop_ip4;
   ip_addr next_hop_ip6;
@@ -172,6 +180,7 @@ struct babel_parse_state {
   u8 def_ip6_prefix_seen;	/* def_ip6_prefix is valid */
   u8 def_ip4_prefix_seen;	/* def_ip4_prefix is valid */
   u8 def_ip4_via_ip6_prefix_seen; /* def_ip4_via_ip6_prefix is valid */
+  u8 hello_tstamp_seen;	/* pkt contains a hello timestamp */
   u8 current_tlv_endpos;	/* End of self-terminating TLVs (offset from start) */
   u8 sadr_enabled;
   u8 is_unicast;
@@ -336,6 +345,7 @@ static int babel_read_update(struct babel_tlv *hdr, union babel_msg *msg, struct
 static int babel_read_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
 static int babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
 static int babel_read_source_prefix(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
+static int babel_read_timestamp(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
 
 static uint babel_write_ack(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
 static uint babel_write_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
@@ -344,6 +354,7 @@ static uint babel_write_update(struct babel_tlv *hdr, union babel_msg *msg, stru
 static uint babel_write_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
 static uint babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
 static int babel_write_source_prefix(struct babel_tlv *hdr, net_addr *net, uint max_len);
+static int babel_write_timestamp(struct babel_tlv *hdr, u32 tstamp, u32 tstamp_rcvd, uint max_len);
 
 static const struct babel_tlv_data tlv_data[BABEL_TLV_MAX] = {
   [BABEL_TLV_ACK_REQ] = {
@@ -419,6 +430,13 @@ static const struct babel_tlv_data *get_packet_tlv_data(u8 type)
   return type < sizeof(tlv_data) / sizeof(*tlv_data) ? &tlv_data[type] : NULL;
 }
 
+static const struct babel_tlv_data timestamp_tlv_data = {
+  sizeof(struct babel_subtlv_timestamp),
+  babel_read_timestamp,
+  NULL,
+  NULL
+};
+
 static const struct babel_tlv_data source_prefix_tlv_data = {
   sizeof(struct babel_subtlv_source_prefix),
   babel_read_source_prefix,
@@ -430,6 +448,8 @@ static const struct babel_tlv_data *get_packet_subtlv_data(u8 type)
 {
   switch (type)
   {
+  case BABEL_SUBTLV_TIMESTAMP:
+    return &timestamp_tlv_data;
   case BABEL_SUBTLV_SOURCE_PREFIX:
     return &source_prefix_tlv_data;
 
@@ -491,16 +511,34 @@ babel_read_hello(struct babel_tlv *hdr, union babel_msg *m,
 
 static uint
 babel_write_hello(struct babel_tlv *hdr, union babel_msg *m,
-                  struct babel_write_state *state UNUSED, uint max_len UNUSED)
+                  struct babel_write_state *state UNUSED, uint max_len)
 {
   struct babel_tlv_hello *tlv = (void *) hdr;
   struct babel_msg_hello *msg = &m->hello;
+  uint len = sizeof(struct babel_tlv_hello);
 
   TLV_HDR0(tlv, BABEL_TLV_HELLO);
   put_u16(&tlv->seqno, msg->seqno);
   put_time16(&tlv->interval, msg->interval);
 
-  return sizeof(struct babel_tlv_hello);
+  if (msg->tstamp)
+  {
+    /*
+     * There can be a substantial delay between when the babel_msg was created
+     * and when it is serialised. We don't want this included in the RTT
+     * measurement, so replace the timestamp with the current time to get as
+     * close as possible to on-wire time for the packet.
+     */
+    u32 tstamp = current_time_now() TO_US;
+
+    int l = babel_write_timestamp(hdr, tstamp, 0, max_len);
+    if (l < 0)
+      return 0;
+
+    len += l;
+  }
+
+  return len;
 }
 
 static int
@@ -565,6 +603,7 @@ babel_write_ihu(struct babel_tlv *hdr, union babel_msg *m,
 {
   struct babel_tlv_ihu *tlv = (void *) hdr;
   struct babel_msg_ihu *msg = &m->ihu;
+  uint len = sizeof(*tlv);
 
   if (ipa_is_link_local(msg->addr) && max_len < sizeof(struct babel_tlv_ihu) + 8)
     return 0;
@@ -576,12 +615,24 @@ babel_write_ihu(struct babel_tlv *hdr, union babel_msg *m,
   if (!ipa_is_link_local(msg->addr))
   {
     tlv->ae = BABEL_AE_WILDCARD;
-    return sizeof(struct babel_tlv_ihu);
+    goto out;
   }
   put_ip6_ll(&tlv->addr, msg->addr);
   tlv->ae = BABEL_AE_IP6_LL;
   hdr->length += 8;
-  return sizeof(struct babel_tlv_ihu) + 8;
+  len += 8;
+
+out:
+  if (msg->tstamp)
+  {
+    int l = babel_write_timestamp(hdr, msg->tstamp, msg->tstamp_rcvd, max_len);
+    if (l < 0)
+      return 0;
+
+    len += l;
+  }
+
+  return len;
 }
 
 static int
@@ -1249,6 +1300,66 @@ babel_write_source_prefix(struct babel_tlv *hdr, net_addr *n, uint max_len)
   return len;
 }
 
+static int
+babel_read_timestamp(struct babel_tlv *hdr, union babel_msg *msg,
+                     struct babel_parse_state *state)
+{
+  struct babel_subtlv_timestamp *tlv = (void *) hdr;
+
+  switch (msg->type)
+  {
+  case BABEL_TLV_HELLO:
+    if (tlv->length < 4)
+      return PARSE_ERROR;
+
+    msg->hello.tstamp = get_u32(&tlv->tstamp);
+    msg->hello.pkt_received = state->received_time;
+    state->hello_tstamp_seen = 1;
+    break;
+
+  case BABEL_TLV_IHU:
+    if (tlv->length < 8)
+      return PARSE_ERROR;
+
+    /* RTT calculation relies on a Hello always being present with an IHU */
+    if (!state->hello_tstamp_seen)
+      break;
+
+    msg->ihu.tstamp = get_u32(&tlv->tstamp);
+    msg->ihu.tstamp_rcvd = get_u32(&tlv->tstamp_rcvd);
+    msg->ihu.pkt_received = state->received_time;
+    break;
+
+  default:
+    return PARSE_ERROR;
+  }
+
+  return PARSE_SUCCESS;
+}
+
+static int
+babel_write_timestamp(struct babel_tlv *hdr, u32 tstamp, u32 tstamp_rcvd, uint max_len)
+{
+  struct babel_subtlv_timestamp *tlv = (void *) NEXT_TLV(hdr);
+  uint len = sizeof(*tlv);
+
+  if (hdr->type == BABEL_TLV_HELLO)
+    len -= 4;
+
+  if (len > max_len)
+    return -1;
+
+  TLV_HDR(tlv, BABEL_SUBTLV_TIMESTAMP, len);
+  hdr->length += len;
+
+  put_u32(&tlv->tstamp, tstamp);
+
+  if (hdr->type == BABEL_TLV_IHU)
+    put_u32(&tlv->tstamp_rcvd, tstamp_rcvd);
+
+  return len;
+}
+
 static inline int
 babel_read_subtlvs(struct babel_tlv *hdr,
 		   union babel_msg *msg,
@@ -1518,6 +1629,13 @@ babel_process_packet(struct babel_iface *ifa,
     .saddr           = saddr,
     .next_hop_ip6    = saddr,
     .sadr_enabled    = babel_sadr_enabled(p),
+
+    /*
+     * The core updates current_time() after returning from poll(), so this is
+     * actually the time the packet was received, even though there may have
+     * been a bit of delay before we got to process it
+     */
+    .received_time   = current_time(),
   };
 
   if ((pkt->magic != BABEL_MAGIC) || (pkt->version != BABEL_VERSION))
-- 
2.39.1



More information about the Bird-users mailing list