网桥的工作流程(数据转发和STP协议)
参考了独孤九贱大侠的文章,针对2.6.16代码。
零. STP的几个状态:
. Block:阻断状态,接收但不转发数据。
. Listening:侦听状态,不转发数据,可以收发BPDU,执行选举root bridge,root port,designate port等动作。
. Learning:学习状态,不转发数据,开始学习MAC,为数据转发作准备,所以称之为Learning状态
. Forward:转发状态,转发数据。
. Disable:禁用状态,既不参与STP计算,也不转发数据。
一. 首先是Bridge注册过程:
static int __init br_init(void)
{
br_fdb_init();
#ifdef CONFIG_BRIDGE_NETFILTER
if (br_netfilter_init())
return 1;
#endif
brioctl_set(br_ioctl_deviceless_stub);
/* 数据包的处理函数注册为br_handle_frame */
br_handle_frame_hook = br_handle_frame;
br_fdb_get_hook = br_fdb_get;
br_fdb_put_hook = br_fdb_put;
register_netdevice_notifier(&br_device_notifier);
return 0;
}
二. 网桥处理函数
int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
const unsigned char *dest = eth_hdr(skb)->h_dest;
if (p->state == BR_STATE_DISABLED)
goto err;
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
goto err;
/*
网桥打开了STP功能,
是否为网桥组播地址,PS:bridge_ula定义为 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 },只比较前5个字段,
为什么第5个字段要和0XF0比较,就不知道了 (01-80-c2-00-00-F0 ???)
*/
if (p->br->stp_enabled &&
!memcmp(dest, bridge_ula, 5) &&
!(dest[5] & 0xF0)) {
if (!dest[5]) {
/* 涉及到STP功能的数据包都交给函数br_stp_handle_bpdu来处理 */
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
NULL, br_stp_handle_bpdu);
return 1;
}
goto err;
}
/* 如果端口处于Learning或Forwarding状态时,要记录从端口通过的数据包的源MAC地址,以update CAM表 */
if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
if (br_should_route_hook) {
if (br_should_route_hook(pskb))
return 0;
skb = *pskb;
dest = eth_hdr(skb)->h_dest;
}
if (!compare_ether_addr(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
/* 在br_handle_frame中更新CAM表 */
NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish);
return 1;
}
err:
kfree_skb(skb);
return 1;
}
三. 接下来我们首先分析更新CAM表的函数br_handle_frame_finish,在第4小节再分析STP的处理函数br_stp_handle_bpdu
int br_handle_frame_finish(struct sk_buff *skb)
{
const unsigned char *dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
struct net_bridge *br;
struct net_bridge_fdb_entry *dst;
int passedup = 0;
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
/*
insert into forwarding database after filtering to avoid spoofing
使用数据包的源MAC地址来更新CAM表
*/
br = p->br;
br_fdb_update(br, p, eth_hdr(skb)->h_source);
if (p->state == BR_STATE_LEARNING)
goto drop;
/*
如果网桥的虚拟网卡处于混杂模式,那么每个接收到的数据包都需要克隆一份送到AF_PACKET协议处理
*/
if (br->dev->flags & IFF_PROMISC) {
struct sk_buff *skb2;
skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 != NULL) {
passedup = 1;
br_pass_frame_up(br, skb2);
}
}
/*
目的MAC为广播或多播,则需要向本机的上层协议栈传送这个数据包,这里有一个标志变量passedup用于表示是否传送过了,如果已传送过,那就算了
*/
if (is_multicast_ether_addr(dest)) {
br_flood_forward(br, skb, !passedup);
if (!passedup)
br_pass_frame_up(br, skb);
goto out;
}
dst = __br_fdb_get(br, dest);
if (dst != NULL && dst->is_local) {
if (!passedup)
br_pass_frame_up(br, skb);
else
kfree_skb(skb);
goto out;
}
if (dst != NULL) {
br_forward(dst->dst, skb);
goto out;
}
/* CAM表中没有,只能flood了。。。。 */
br_flood_forward(br, skb, 0);
out:
return 0;
drop:
kfree_skb(skb);
goto out;
}
四. 在介绍br_stp_handle_bpdu这个函数之前,我们首先来看一下内核中用来表示一个BPDU的数据结构是什么 -- br_config_bpdu
struct br_config_bpdu
{
unsigned topology_change:1; /* 拓扑改变标志 */
unsigned topology_change_ack:1; /* 拓扑改变回应标志 */
bridge_id root; /* 根ID,用于会聚后的网桥网络中,所有配置 BPDU 中的该字段都应该具有相同值(同VLAN),又可分为两个 BID 子字段:网桥优先级和网桥 MAC 地址 */
int root_path_cost; /* 路径开销,通向有根网桥(Root Bridge)的所有链路的积累资本 */
bridge_id bridge_id; /* 创建当前 BPDU 的网桥 BID */
port_id port_id; /* 端口ID,每个端口值都是唯一的 */
int message_age; /* 记录 Root Bridge 生成当前 BPDU 起源信息的所消耗时间
*/
int max_age; /* 保存 BPDU 的最长时间,也反映了拓朴变化通知(Topology Change Notification)过程中的网桥表生存时间情况 */
int hello_time; /* 周期性配置 BPDU 间的时间 */
int forward_delay; /* 用于在 Listening 和 Learning 状态的时间,也反映了拓朴变化通知(Topology Change Notification)过程中的时间情况 */
};
五. BPDU处理函数br_stp_handle_bpdu
int br_stp_handle_bpdu(struct sk_buff *skb)
{
struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
struct net_bridge *br;
unsigned char *buf;
if (!p)
goto err;
br = p->br;
spin_lock(&br->lock);
if (p->state == BR_STATE_DISABLED || !(br->dev->flags & IFF_UP))
goto out;
/*
insert into forwarding database after filtering to avoid spoofing
根据BPDU中的MAC地址来更新CAM表
*/
br_fdb_update(br, p, eth_hdr(skb)->h_source);
if (!br->stp_enabled)
goto out;
/* need at least the 802 and STP headers */
if (!pskb_may_pull(skb, sizeof(header)+1) ||
memcmp(skb->data, header, sizeof(header)))
goto out;
buf = skb_pull(skb, sizeof(header));
if (buf[0] == BPDU_TYPE_CONFIG) {
struct br_config_bpdu bpdu;
if (!pskb_may_pull(skb, 32))
goto out;
/*
802.3数据包格式:
目的地址 | 源地址 | 长度 | DSAP | SSAP | cntl | org_code | 类型 | 数据 | CRC
6 6 2 1 1 1 3 2 38~1492 4
*/
buf = skb->data; // 跳过了数据字段的6个字节,buf指向了类型字段
/*
STP的数据包不是以太网格式的,而是802.3格式的,
*/
bpdu.topology_change = (buf[1] & 0x01) ? 1 : 0;
bpdu.topology_change_ack = (buf[1] & 0x80) ? 1 : 0;
bpdu.root.prio[0] = buf[2];
bpdu.root.prio[1] = buf[3];
bpdu.root.addr[0] = buf[4];
bpdu.root.addr[1] = buf[5];
bpdu.root.addr[2] = buf[6];
bpdu.root.addr[3] = buf[7];
bpdu.root.addr[4] = buf[8];
bpdu.root.addr[5] = buf[9];
bpdu.root_path_cost =
(buf[10]
(buf[11]
(buf[12]
buf[13];
bpdu.bridge_id.prio[0] = buf[14];
bpdu.bridge_id.prio[1] = buf[15];
bpdu.bridge_id.addr[0] = buf[16];
bpdu.bridge_id.addr[1] = buf[17];
bpdu.bridge_id.addr[2] = buf[18];
bpdu.bridge_id.addr[3] = buf[19];
bpdu.bridge_id.addr[4] = buf[20];
bpdu.bridge_id.addr[5] = buf[21];
bpdu.port_id = (buf[22]
bpdu.message_age = br_get_ticks(buf+24); // 0
bpdu.max_age = br_get_ticks(buf+26); // 20
bpdu.hello_time = br_get_ticks(buf+28); // 2
bpdu.forward_delay = br_get_ticks(buf+30); // 15
/* 若是收到了config bpdu,则调用下面的函数 */
br_received_config_bpdu(p, &bpdu);
}
/* 若是拓扑变化通知TCN,则调用下面的函数 */
else if (buf[0] == BPDU_TYPE_TCN) {
br_received_tcn_bpdu(p);
}
out:
spin_unlock(&br->lock);
err:
kfree_skb(skb);
return 0;
六. 在处理BPDU之前,我们先来看一下net_bridge和net_bridge_port这两个数据结构,这两个是很重要的数据结构,如果不理解这两个结构,后面的分析就很难进行了。
struct net_bridge
{
spinlock_t lock;
struct list_head port_list;
struct net_device *dev;
struct net_device_stats statistics;
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
struct list_head age_list;
unsigned long feature_mask;
/* STP */
bridge_id designated_root; //网络中根桥的ID
bridge_id bridge_id; //网桥本身的ID
u32 root_path_cost; //???
unsigned long max_age;
unsigned long hello_time;
unsigned long forward_delay;
unsigned long bridge_max_age;
unsigned long ageing_time;
unsigned long bridge_hello_time;
unsigned long bridge_forward_delay;
u16 root_port;
unsigned char stp_enabled;
unsigned char topology_change;
unsigned char topology_change_detected;
struct timer_list hello_timer;
struct timer_list tcn_timer;
struct timer_list topology_change_timer;
struct timer_list gc_timer;
struct kobject ifobj;
};
-------------------
struct net_bridge_port
{
struct net_bridge *br;
struct net_device *dev;
struct list_head list;
/* STP */
u8 priority;
u8 state;
u16 port_no;
unsigned char topology_change_ack;
unsigned char config_pending;
port_id port_id; //端口的ID,在从端口发出的BPDU中,会将这个端口的ID存入BPDU中
port_id designated_port;
bridge_id designated_root;
bridge_id designated_bridge;
u32 path_cost;
u32 designated_cost;
struct timer_list forward_delay_timer;
struct timer_list hold_timer;
struct timer_list message_age_timer;
struct kobject kobj;
struct work_struct carrier_check;
struct rcu_head rcu;
};
七. config_bpdu的处理函数--br_received_config_bpdu
void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
{
struct net_bridge *br;
int was_root;
br = p->br;
was_root = br_is_root_bridge(br);
if (br_supersedes_port_info(p, bpdu)) { //第 八 部分介绍
br_record_config_information(p, bpdu);
br_configuration_update(br);
br_port_state_selection(br);
if (!br_is_root_bridge(br) && was_root) {
del_timer(&br->hello_timer);
if (br->topology_change_detected) {
del_timer(&br->topology_change_timer);
br_transmit_tcn(br);
mod_timer(&br->tcn_timer,
jiffies + br->bridge_hello_time);
}
}
if (p->port_no == br->root_port) {
br_record_config_timeout_values(br, bpdu);
br_config_bpdu_generation(br);
if (bpdu->topology_change_ack)
br_topology_change_acknowledged(br);
}
}
else if (br_is_designated_port(p)) {
br_reply(p);
}
}
八. br_supersedes_port_info(p, bpdu)
这个函数,就是把包中的值,同先前指定的对应值进行判断和比较,经确定是否需要更新
/* called under bridge lock */
/* 如果要更新则返回1,不然返回0 */
static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
{
int t;
/* 收到的BPDU的root ID和当前交换机端口上记录的root ID进行比较 */
t = memcmp(&bpdu->root, &p->designated_root, 8);
if (t
return 1;
else if (t > 0)
return 0;
/* 若相等,则往下 */
if (bpdu->root_path_cost
designated_cost)
return 1;
else if (bpdu->root_path_cost > p->designated_cost)
return 0;
t = memcmp(&bpdu->bridge_id, &p->designated_bridge, 8);
if (t
return 1;
else if (t > 0)
return 0;
if (memcmp(&bpdu->bridge_id, &p->br->bridge_id, 8))
return 1;
if (bpdu->port_id designated_port)
return 1;
return 0;
}