Linux内核网络源码中经常看到的几个与TCP相关的关键结构体如下图所示,本文围绕下面这个图展开分析,分别介绍各个结构体以及关系
上图关系可以这样描述:
四个结构的关系具有面向对象的特征,通过层层继承,实现了类的复用;内核中网络相关的很多函数,参数往往都是struct sock, 函数内部依照不同的业务逻辑,将struct sock转换为不同的业务结构 ;这样做的好处:
分别看一下这几个结构体如下:
struct tcp_sock的结构体成员大都是与tcp协议本身相关的关键字段,可以看到该结构体的的一个成员即为struct inet_connection_sock结构体,即struct tcp_sock从struct inet_connection_sock结构体的基础上继承而来,增加了一些tcp协议相关的字段,如滑动窗口协议,拥塞算法等一些TCP专有的属性。
更多linux内核视频教程文档资料免费领取后台私信【内核】自行获取.
Linux内核源码/内存调优/文件系统/进程管理/设备驱动/网络协议栈-学习视频教程-腾讯课堂
struct tcp_sock { struct inet_connection_sock inet_conn;//inet_connection结构体,而非指针 u16 tcp_header_len; /* Bytes of tcp header to send */ u16 gso_segs; /* Max number of segs per GSO packet */ u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes * were acked. */ u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn * total number of segments in. */ u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn * total number of data segments in. */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ u32 snd_nxt; /* Next sequence we send */ u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut * The total number of segments sent. */ u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut * total number of data segments sent. */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. */ u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ u32 tsoffset; /* timestamp offset */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ u32 snd_wl1; /* Sequence for window update */ u32 snd_wnd; /* The window we expect to receive */ u32 max_window; /* Maximal window ever seen from peer */ u32 mss_cache; /* Cached effective mss, not including SACKS */....../* RTT measurement */ u64 tcp_mstamp; /* most recent packet received/sent */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ struct minmax rtt_min; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ u32 max_packets_out; /* max packets_out in last window */ u32 max_packets_seq; /* right edge of max_packets_out flight */ u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ u8 keepalive_probes; /* num of allowed keep alive probes */ u32 reordering; /* Packet reordering metric. */ u32 snd_up; /* Urgent pointer *//* * Options received (usually on last packet, some only on SYN packets). */ struct tcp_options_received rx_opt;/* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ u32 snd_ssthresh; /* Slow start size threshold */ u32 snd_cwnd; /* Sending congestion window */ u32 snd_cwnd_cnt; /* Linear increase counter */ u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ u32 app_limited; /* limited until "delivered" reaches this val */ u64 first_tx_mstamp; /* start of window send phase */ u64 delivered_mstamp; /* time we reached "delivered" */ u32 rate_delivered; /* saved rate sample: packets delivered */ u32 rate_interval_us; /* saved rate sample: time elapsed */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ u32 pushed_seq; /* Last pushed seq, required to talk to windows */ u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ ...... struct request_sock *fastopen_rsk; u32 *saved_syn;};
继续看结构体,如下所示,它的一个域是 ,即struct inet_connection_sock结构体从struct inet_sock的基础上继承而来,增加了一些面向连接需要的字段。struct inet_connection_sock``struct inet_sock
struct inet_connection_sock { struct inet_sock icsk_inet;//INET协议族的sock结构 struct request_sock_queue icsk_accept_queue; //确定接收的队列 struct inet_bind_bucket *icsk_bind_hash;//绑定的桶结构 unsigned long icsk_timeout;//超时 struct timer_list icsk_retransmit_timer;//没有ACK时的重发定时器 struct timer_list icsk_delack_timer;//确定删掉的定时器 __u32 icsk_rto;//重发超时 __u32 icsk_pmtu_cookie;//最近的pmtu const struct tcp_congestion_ops *icsk_ca_ops;//拥挤情况下的处理函数表 const struct inet_connection_sock_af_ops *icsk_af_ops;//AF_INET指定的函数表 const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, //拥挤情况的处理状态 icsk_ca_setsockopt:1, //重发数量 icsk_ca_dst_locked:1; __u8 icsk_retransmits; //重发数量 __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; __u8 icsk_probes_out; __u16 icsk_ext_hdr_len; struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ __u8 pingpong; /* The session is interactive */ __u8 blocked; /* Delayed ACK was blocked by socket lock */ __u32 ato; /* Predicted tick of soft clock */ unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; struct { int enabled; /* Range of MTUs to search */ int search_high; int search_low; /* Information on the current probe. */ int probe_size; u32 probe_timestamp; } icsk_mtup; u32 icsk_user_timeout; u64 icsk_ca_priv[88 / sizeof(u64)];#define ICSK_CA_PRIV_SIZE (11 * sizeof(u64))};
如下所示,可以看到结构体,可以看到该结构的第一个成员是结构体,即struct inet_sock是struct sock从的基础上基础而来,增加了一些INET域专有的一些属性,比如TTL,组播列表,IP地址,端口等。struct inet_sock``struct sock
struct inet_sock { struct sock sk; //注意:是sock结构体而不是指针#if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *pinet6;#endif /* Socket demultiplex comparisons on incoming packets. */#define inet_daddr sk.__sk_common.skc_daddr#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr#define inet_dport sk.__sk_common.skc_dport#define inet_num sk.__sk_common.skc_num// 通过系统调用connect,bind或setsocktopt可以设置下面的部分值。 __be32 inet_saddr; //外部IPV$地址 __s16 uc_ttl; //单播TTL __u16 cmsg_flags; __be16 inet_sport;//源端口,即发送方的端口 __u16 inet_id; struct ip_options_rcu __rcu *inet_opt; int rx_dst_ifindex; __u8 tos;//服务类型 __u8 min_ttl; __u8 mc_ttl; __u8 pmtudisc; //下面这些基本上都是socket的option __u8 recverr:1, is_icsk:1, freebind:1, hdrincl:1, mc_loop:1, transparent:1, mc_all:1, nodefrag:1; __u8 bind_address_no_port:1, defer_connect:1; __u8 rcv_tos; __u8 convert_csum; int uc_index; int mc_index; __be32 mc_addr; //组播地址 struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork;};
struct sock结构体如下所示,是最基础的sock结构体,也是网络中最核心的结构体。
struct sock { struct sock_common __sk_common;#define sk_node __sk_common.skc_node#define sk_nulls_node __sk_common.skc_nulls_node#define sk_refcnt __sk_common.skc_refcnt#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping...... /* ===== cache line for TX ===== */ int sk_wmem_queued; refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; union { struct sk_buff *sk_send_head; struct rb_root tcp_rtx_queue; }; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; __u32 sk_dst_pending_confirm; u32 sk_pacing_status; /* see enum sk_pacing */ long sk_sndtimeo; struct timer_list sk_timer; __u32 sk_priority; __u32 sk_mark; u32 sk_pacing_rate; /* bytes per second */ u32 sk_max_pacing_rate; struct page_frag sk_frag; netdev_features_t sk_route_caps; netdev_features_t sk_route_nocaps; int sk_gso_type; unsigned int sk_gso_max_size; gfp_t sk_allocation; __u32 sk_txhash; ...... struct mem_cgroup *sk_memcg; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; struct rcu_head sk_rcu;};
看到这个结构体很自然想到struct socket结构体,如下所示,经常会有人问struct socket与struct sk有什么关联?
struct socket是通用BSD的socket定义,面向上层,struct sock面向下层,struct sock结构体定义非常大,根据使用的不通协议而挂入到struct socket,之所以从socket中分离出sock一个这样重要的结构是因为socket是通用的套接字结构体,而sock与具体使用的协议相关。总而言之把重要项放在与应用系统关系密切的结构 struct socket里,其他(如struct sock)因为要占用大量的内存空间,而将这此结构变量分离出来放在另外一些结构中,再让两个结构体彼此关联。
struct socket { socket_state state; short type; unsigned long flags; struct socket_wq __rcu *wq; struct file *file; struct sock *sk; const struct proto_ops *ops;};
既然提到了这四个结构体的继承关系,文章看到也提到说这种继承关系的便利,如内核中网络相关的很多函数,参数往往都是struct sock, 函数内部依照不同的业务逻辑,将struct sock转换为不同的业务结构, 下面分析几个不同结构体直接互相转换的函数:
struct sock与struct inet_sock之间:
static inline struct inet_sock *inet_sk(const struct sock *sk){ return (struct inet_sock *)sk;}
struct sock与struct inet_connection_sock之间
static inline struct inet_connection_sock *inet_csk(const struct sock *sk){ return (struct inet_connection_sock *)sk;}
struct sock与struct tcp_sock之间
static inline struct tcp_sock *tcp_sk(const struct sock *sk){ return (struct tcp_sock *)sk;}
总之:tcp_sock,inet_connection_sock,inet_sock这几个结构体的第一个成员全都是struct sock;一层层继承下来,每一层都有自己的扩展,而且这些结构体在申请大小的时候都是按照最大值sizeof(struct tcp_sock)申请的,所以强转也不会越界。
原文地址:Linux内核网络基础-TCP相关的几个关键结构体-小记 - 网络协议栈 - 我爱内核网 - 构建全国最权威的内核技术交流分享论坛
留言与评论(共有 0 条评论) “” |