Will bird block on syslog() call?
Pavlos Parissis
pavlos.parissis at gmail.com
Fri Feb 24 13:13:55 CET 2017
Hi,
We have observed some instability on BFD protocol, where upstream router and/or
the server (Linux RedHat 7.3) declares the BFD session dead and as consequence
upstream router stops forwarding traffic to the server (we utilize ECMP).
Our current hypothesis is that Bird log messages (only BGP KEEPALIVE messages
when there isn't any route change) via syslog glibc function, which connects to
UNIX socket (/dev/log) and the sender (Bird daemon) may block when the receiver
(rsyslogd) doesn't response fast enough or the buffer is full.
On RedHat 7 servers there is a chain of daemons, which receive log messages via
UNIX socket.
systemd-journald.service listens on /dev/log UNIX SOCKET and forwards messages
to /run/systemd/journal/syslog UNIX SOCKET where rsyslogd listens on.
As far as I can see in the code and in the output of ps -eLl, Bird daemon is a
single threaded process (please correct me if I am wrong), so it could be that a
call to syslog blocks for X seconds when X is higher than the failure detection
time.
Can you confirm this hypothesis?
Here is an sanitized config:
log syslog { debug, trace, info, remote, warning, error, auth, fatal, bug };
/*
Source configuration snippets
Please make sure they are valid snippets for Bird
*/
include "/etc/bird.d/*.conf";
include "/etc/bird.d/4/*.conf";
router id 1.2.3.1;
protocol device {
scan time 10;
}
protocol static {
disabled yes;
}
protocol direct direct1 {
interface "lo";
debug all;
export none;
import where net ~ ANYCAST_NETWORKS;
}
protocol bfd BFD1 {
debug { states, routes, filters, interfaces, events };
interface "north", "south" {
min rx interval 400 ms;
min tx interval 400 ms;
idle tx interval 1000 ms;
multiplier 3;
};
}
protocol bgp BGP1 {
disabled no;
description "Peer-BGP1";
neighbor 1.2.3.254 as 4;
source address 1.2.3.1;
bfd on;
debug all;
import none;
export where match_route_north();
direct;
hold time 10;
startup hold time 240;
connect retry time 120;
keepalive time 3;
start delay time 5;
error wait time 60, 300;
error forget time 300;
disable after error off;
next hop self;
path metric 1;
default bgp_med 0;
default bgp_local_pref 0;
local as 1;
}
protocol bgp BGP2 {
disabled no;
description "Peer-BGP2";
neighbor 1.2.4.254 as 7;
source address 1.2.3.1;
bfd on;
debug all;
import none;
export where match_route_south();
direct;
hold time 10;
startup hold time 240;
connect retry time 120;
keepalive time 3;
start delay time 5;
error wait time 60, 300;
error forget time 300;
disable after error off;
next hop self;
path metric 1;
default bgp_med 0;
default bgp_local_pref 0;
local as 1;
}
Cheers,
Pavlos
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <http://trubka.network.cz/pipermail/bird-users/attachments/20170224/dcc5e80c/attachment.asc>
More information about the Bird-users
mailing list