Linux内核网络基础-TCP相关的几个关键结构体-小记

Linux内核网络源码中经常看到的几个与TCP相关的关键结构体如下图所示,本文围绕下面这个图展开分析,分别介绍各个结构体以及关系


上图关系可以这样描述:

  • struct inet_sock继承自struct sock
  • struct inet_connection_sock继承自struct inet_sock
  • struct tcp_sock继承自struct inet_connection_sock

四个结构的关系具有面向对象的特征,通过层层继承,实现了类的复用;内核中网络相关的很多函数,参数往往都是struct sock, 函数内部依照不同的业务逻辑,将struct sock转换为不同的业务结构 ;这样做的好处:

  1. 简化接口的设计复杂度;
  2. 使用基类作为参数,类似于面向对象中的多态特性,能够有效的增强接口的稳定性、提升扩展性。

分别看一下这几个结构体如下:

struct tcp_sock的结构体成员大都是与tcp协议本身相关的关键字段,可以看到该结构体的的一个成员即为struct inet_connection_sock结构体,即struct tcp_sock从struct inet_connection_sock结构体的基础上继承而来,增加了一些tcp协议相关的字段,如滑动窗口协议,拥塞算法等一些TCP专有的属性。


更多linux内核视频教程文档资料免费领取后台私信【内核】自行获取.

Linux内核源码/内存调优/文件系统/进程管理/设备驱动/网络协议栈-学习视频教程-腾讯课堂


struct tcp_sock { struct inet_connection_sock inet_conn;//inet_connection结构体,而非指针 u16 tcp_header_len; /* Bytes of tcp header to send  */ u16 gso_segs; /* Max number of segs per GSO packet */ u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived     * sum(delta(rcv_nxt)), or how many bytes     * were acked.     */ u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn     * total number of segments in.     */ u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn     * total number of data segments in.     */  u32 rcv_nxt; /* What we want to receive next  */ u32 copied_seq; /* Head of yet unread data  */ u32 rcv_wup; /* rcv_nxt on last window update sent */  u32 snd_nxt; /* Next sequence we send  */ u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut     * The total number of segments sent.     */ u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut     * total number of data segments sent.     */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked     * sum(delta(snd_una)), or how many bytes     * were acked.     */  u32 snd_una; /* First byte we want an ack for */  u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ u32 last_oow_ack_time;  /* timestamp of last out-of-window ACK */ u32 tsoffset; /* timestamp offset */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ u32 snd_wl1; /* Sequence for window update  */ u32 snd_wnd; /* The window we expect to receive */ u32 max_window; /* Maximal window ever seen from peer */ u32 mss_cache; /* Cached effective mss, not including SACKS */....../* RTT measurement */ u64 tcp_mstamp; /* most recent packet received/sent */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation   */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max   */ u32 rtt_seq; /* sequence number to update rttvar */ struct  minmax rtt_min; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out  */ u32 max_packets_out;  /* max packets_out in last window */ u32 max_packets_seq;  /* right edge of max_packets_out flight */ u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits.   */ u8 keepalive_probes; /* num of allowed keep alive probes */ u32 reordering; /* Packet reordering metric.  */ u32 snd_up;  /* Urgent pointer  *//* *      Options received (usually on last packet, some only on SYN packets). */ struct tcp_options_received rx_opt;/* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */  u32 snd_ssthresh; /* Slow start size threshold  */  u32 snd_cwnd; /* Sending congestion window  */ u32 snd_cwnd_cnt; /* Linear increase counter  */ u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to     * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost;  /* Total data packets lost incl. rexmits */ u32 app_limited; /* limited until "delivered" reaches this val */ u64 first_tx_mstamp;  /* start of window send phase */ u64 delivered_mstamp; /* time we reached "delivered" */ u32 rate_delivered;    /* saved rate sample: packets delivered */ u32 rate_interval_us;  /* saved rate sample: time elapsed */  u32 rcv_wnd; /* Current receiver window  */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ u32 pushed_seq; /* Last pushed seq, required to talk to windows */ u32 lost_out; /* Lost packets   */ u32 sacked_out; /* SACK'd packets   */  ...... struct request_sock *fastopen_rsk; u32 *saved_syn;};

继续看结构体,如下所示,它的一个域是 ,即struct inet_connection_sock结构体从struct inet_sock的基础上继承而来,增加了一些面向连接需要的字段。struct inet_connection_sock``struct inet_sock

struct inet_connection_sock { struct inet_sock   icsk_inet;//INET协议族的sock结构 struct request_sock_queue icsk_accept_queue; //确定接收的队列 struct inet_bind_bucket   *icsk_bind_hash;//绑定的桶结构 unsigned long    icsk_timeout;//超时  struct timer_list   icsk_retransmit_timer;//没有ACK时的重发定时器  struct timer_list   icsk_delack_timer;//确定删掉的定时器 __u32     icsk_rto;//重发超时 __u32     icsk_pmtu_cookie;//最近的pmtu const struct tcp_congestion_ops *icsk_ca_ops;//拥挤情况下的处理函数表 const struct inet_connection_sock_af_ops *icsk_af_ops;//AF_INET指定的函数表 const struct tcp_ulp_ops  *icsk_ulp_ops; void     *icsk_ulp_data; unsigned int    (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8     icsk_ca_state:6,   //拥挤情况的处理状态      icsk_ca_setsockopt:1,  //重发数量      icsk_ca_dst_locked:1;   __u8     icsk_retransmits;  //重发数量 __u8     icsk_pending; __u8     icsk_backoff; __u8     icsk_syn_retries; __u8     icsk_probes_out; __u16     icsk_ext_hdr_len; struct {  __u8    pending;  /* ACK is pending      */  __u8    quick;  /* Scheduled number of quick acks    */  __u8    pingpong;  /* The session is interactive     */  __u8    blocked;  /* Delayed ACK was blocked by socket lock */  __u32    ato;   /* Predicted tick of soft clock    */  unsigned long   timeout;  /* Currently scheduled timeout     */  __u32    lrcvtime;  /* timestamp of last received data packet */  __u16    last_seg_size; /* Size of last incoming segment    */  __u16    rcv_mss;  /* MSS used for delayed ACK decisions    */  } icsk_ack; struct {  int    enabled;  /* Range of MTUs to search */  int    search_high;  int    search_low;  /* Information on the current probe. */  int    probe_size;  u32    probe_timestamp; } icsk_mtup; u32     icsk_user_timeout; u64     icsk_ca_priv[88 / sizeof(u64)];#define ICSK_CA_PRIV_SIZE      (11 * sizeof(u64))};

如下所示,可以看到结构体,可以看到该结构的第一个成员是结构体,即struct inet_sock是struct sock从的基础上基础而来,增加了一些INET域专有的一些属性,比如TTL,组播列表,IP地址,端口等。struct inet_sock``struct sock

struct inet_sock { struct sock  sk;  //注意:是sock结构体而不是指针#if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *pinet6;#endif /* Socket demultiplex comparisons on incoming packets. */#define inet_daddr  sk.__sk_common.skc_daddr#define inet_rcv_saddr  sk.__sk_common.skc_rcv_saddr#define inet_dport  sk.__sk_common.skc_dport#define inet_num  sk.__sk_common.skc_num//    通过系统调用connect,bind或setsocktopt可以设置下面的部分值。 __be32   inet_saddr; //外部IPV$地址 __s16   uc_ttl;   //单播TTL __u16   cmsg_flags; __be16   inet_sport;//源端口,即发送方的端口 __u16   inet_id; struct ip_options_rcu __rcu *inet_opt; int   rx_dst_ifindex; __u8   tos;//服务类型 __u8   min_ttl; __u8   mc_ttl; __u8   pmtudisc;  //下面这些基本上都是socket的option __u8   recverr:1,    is_icsk:1,    freebind:1,    hdrincl:1,    mc_loop:1,    transparent:1,    mc_all:1,    nodefrag:1; __u8   bind_address_no_port:1,    defer_connect:1; __u8   rcv_tos; __u8   convert_csum; int   uc_index; int   mc_index; __be32   mc_addr; //组播地址 struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork;};

struct sock结构体如下所示,是最基础的sock结构体,也是网络中最核心的结构体。

struct sock { struct sock_common __sk_common;#define sk_node   __sk_common.skc_node#define sk_nulls_node  __sk_common.skc_nulls_node#define sk_refcnt  __sk_common.skc_refcnt#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping...... /* ===== cache line for TX ===== */ int   sk_wmem_queued; refcount_t  sk_wmem_alloc; unsigned long  sk_tsq_flags; union {  struct sk_buff *sk_send_head;  struct rb_root tcp_rtx_queue; }; struct sk_buff_head sk_write_queue; __s32   sk_peek_off; int   sk_write_pending; __u32   sk_dst_pending_confirm; u32   sk_pacing_status; /* see enum sk_pacing */ long   sk_sndtimeo; struct timer_list sk_timer; __u32   sk_priority; __u32   sk_mark; u32   sk_pacing_rate; /* bytes per second */ u32   sk_max_pacing_rate; struct page_frag sk_frag; netdev_features_t sk_route_caps; netdev_features_t sk_route_nocaps; int   sk_gso_type; unsigned int  sk_gso_max_size; gfp_t   sk_allocation; __u32   sk_txhash;  ...... struct mem_cgroup *sk_memcg; void   (*sk_state_change)(struct sock *sk); void   (*sk_data_ready)(struct sock *sk); void   (*sk_write_space)(struct sock *sk); void   (*sk_error_report)(struct sock *sk); int   (*sk_backlog_rcv)(struct sock *sk,        struct sk_buff *skb); void                    (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; struct rcu_head  sk_rcu;};

看到这个结构体很自然想到struct socket结构体,如下所示,经常会有人问struct socket与struct sk有什么关联?

struct socket是通用BSD的socket定义,面向上层,struct sock面向下层,struct sock结构体定义非常大,根据使用的不通协议而挂入到struct socket,之所以从socket中分离出sock一个这样重要的结构是因为socket是通用的套接字结构体,而sock与具体使用的协议相关。总而言之把重要项放在与应用系统关系密切的结构 struct socket里,其他(如struct sock)因为要占用大量的内存空间,而将这此结构变量分离出来放在另外一些结构中,再让两个结构体彼此关联。

struct socket { socket_state  state; short   type; unsigned long  flags; struct socket_wq __rcu *wq; struct file  *file; struct sock  *sk; const struct proto_ops *ops;};

既然提到了这四个结构体的继承关系,文章看到也提到说这种继承关系的便利,如内核中网络相关的很多函数,参数往往都是struct sock, 函数内部依照不同的业务逻辑,将struct sock转换为不同的业务结构, 下面分析几个不同结构体直接互相转换的函数:

struct sock与struct inet_sock之间:

static inline struct inet_sock *inet_sk(const struct sock *sk){ return (struct inet_sock *)sk;}

struct sock与struct inet_connection_sock之间

static inline struct inet_connection_sock *inet_csk(const struct sock *sk){ return (struct inet_connection_sock *)sk;}

struct sock与struct tcp_sock之间

static inline struct tcp_sock *tcp_sk(const struct sock *sk){ return (struct tcp_sock *)sk;}

总之:tcp_sock,inet_connection_sock,inet_sock这几个结构体的第一个成员全都是struct sock;一层层继承下来,每一层都有自己的扩展,而且这些结构体在申请大小的时候都是按照最大值sizeof(struct tcp_sock)申请的,所以强转也不会越界。


原文地址:Linux内核网络基础-TCP相关的几个关键结构体-小记 - 网络协议栈 - 我爱内核网 - 构建全国最权威的内核技术交流分享论坛

发表评论
留言与评论(共有 0 条评论) “”
   
验证码:

相关文章

推荐文章