socket信号处理

socket I/O事件处理

以TCP socket为例
kernel: 3.10.0-514.16.1.el7.x86_64

socket IO处理函数

1
2
3
4
5
6
7
8
9
10
11
struct sock {
...
struct socket_wq __rcu *sk_wq; /*等待队列和异步队列*/
...
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk, int bytes);
void (*sk_write_space)(struct sock *sk);
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk);
...
};
  • sk_wq

    含有等待队列用来睡眠唤醒程序使用,异步队列异步socket使用

  • sk_state_change

    从SYN_SEND或者SYN_RECV到ES状态,从ES到CLOSE_WAIT状态,当协议栈遇到这些事件时候会调用

  • sk_data_ready

  • sk_write_space

    sock有数据可读和可写时候调用

  • sk_error_report

    sock上存在错误时调用,比如收到RST包

处理函数初始化

1
2
3
4
5
6
7
----------------START-------------------------
0xffffffff81557ed0 : sock_init_data+0x0/0x220 [kernel]
0xffffffff815ec9f4 : inet_create+0x154/0x360 [kernel]
0xffffffff81555200 : __sock_create+0x110/0x260 [kernel]
0xffffffff81556521 : SyS_socket+0x61/0xf0 [kernel]
0xffffffff81697189 : system_call_fastpath+0x16/0x1b [kernel]
----------------END-------------------------

步骤1:通用初始化
socket->SyS_socket->__sock_create->inet_create->sock_init_data

1
2
3
4
5
6
7
sock_init_data:

sk->sk_state_change = sock_def_wakeup;
sk->sk_data_ready = sock_def_readable;
sk->sk_write_space = sock_def_write_space;
sk->sk_error_report = sock_def_error_report;
sk->sk_destruct = sock_def_destruct;

步骤2:对于TCP socket,特有更新

1
2
3
4
5
6
0xffffffff815be170 : tcp_init_sock+0x0/0x200 [kernel]
0xffffffff815d4212 : tcp_v4_init_sock+0x12/0x30 [kernel]
0xffffffff815eca71 : inet_create+0x1d1/0x360 [kernel]
0xffffffff81555200 : __sock_create+0x110/0x260 [kernel]
0xffffffff81556521 : SyS_socket+0x61/0xf0 [kernel]
0xffffffff81697189 : system_call_fastpath+0x16/0x1b [kernel]

inet_create->tcp_v4_init_sock->tcp_init_sock

1
2
tcp_init_sock:
sk->sk_write_space = sk_stream_write_space;

sock_def_wakeup

信号触发时机

无论是作为客户端还是服务端
socket TCP协议栈进入到ES或者CLOSE_WAIT时候,会触发sock_def_wakeup通知用户态进程TCP状态变更
具体来讲:sock_def_wakeup可以唤醒connect或者accept,或者因收到结束喜欢fin而正常结束
send/recv返回值为0

作为客户端主动连接对方获取资源

访问方式:curl -v http://180.97.33.107

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
[root@localhost socketdemo]# curl -v http://180.97.33.107
* About to connect() to 180.97.33.107 port 80 (#0)
* Trying 180.97.33.107...
* Connected to 180.97.33.107 (180.97.33.107) port 80 (#0)
> GET / HTTP/1.1
> User-Agent: curl/7.29.0
> Host: 180.97.33.107
> Accept: */*
>
< HTTP/1.1 200 OK
< Server: bfe/1.0.8.18
< Date: Fri, 23 Jun 2017 10:02:40 GMT
< Content-Type: text/html
< Content-Length: 2381
< Last-Modified: Mon, 23 Jan 2017 13:28:20 GMT
< Connection: Keep-Alive
< ETag: "588604f4-94d"
< Cache-Control: private, no-cache, no-store, proxy-revalidate, no-transform
< Pragma: no-cache
< Set-Cookie: BDORZ=27315; max-age=86400; domain=.baidu.com; path=/
< Accept-Ranges: bytes

systemtap探测sock_def_wakeup被调用情况,并打出调用栈,如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
调用情况:收到对端回复synack后,发出ack时候客户端connect结束,从SYN_SEND跳转到ES状态唤醒用户态进程,此时连接已经成功,可以发送数据了
sock_def_wakeup:[2017/6/23,18:00:59]local=10.0.2.15:60162,remote=180.97.33.107:80 state:ESTABLISHED
0xffffffff81558150 : sock_def_wakeup+0x0/0x40 [kernel]
0xffffffff815cbc09 : tcp_finish_connect+0xc9/0x120 [kernel]
0xffffffff815cc297 : tcp_rcv_state_process+0x637/0xf20 [kernel]
0xffffffff815d5ffb : tcp_v4_do_rcv+0x17b/0x340 [kernel]
0xffffffff815d76d9 : tcp_v4_rcv+0x799/0x9a0 [kernel]
0xffffffff815b1094 : ip_local_deliver_finish+0xb4/0x1f0 [kernel]
0xffffffff815b1379 : ip_local_deliver+0x59/0xd0 [kernel]
0xffffffff815b0d1a : ip_rcv_finish+0x8a/0x350 [kernel]
0xffffffff815b16a6 : ip_rcv+0x2b6/0x410 [kernel]
0xffffffff815700d2 : __netif_receive_skb_core+0x582/0x800 [kernel]
0xffffffff81570368 : __netif_receive_skb+0x18/0x60 [kernel]
0xffffffff815703f0 : netif_receive_skb_internal+0x40/0xc0 [kernel]
0xffffffff81571578 : napi_gro_receive+0xd8/0x130 [kernel]
0xffffffffa00472fc [e1000]
WARNING: Missing unwind data for a module, rerun with 'stap -d e1000'

情况2:访问的服务端主动关闭连接,则客户端从ES进入CLOSE_WAIT,通知用户态进程
sock_def_wakeup:[2017/6/23,18:00:59]local=10.0.2.15:60162,remote=180.97.33.107:80 state:CLOSE_WAIT
0xffffffff81558150 : sock_def_wakeup+0x0/0x40 [kernel]
0xffffffff815c5ca9 : tcp_fin+0x169/0x1e0 [kernel]
0xffffffff815c84f8 : tcp_data_queue+0x7f8/0xdd0 [kernel]
0xffffffff815cb4a7 : tcp_rcv_established+0x217/0x760 [kernel]
0xffffffff815d5f8a : tcp_v4_do_rcv+0x10a/0x340 [kernel]
0xffffffff815d76d9 : tcp_v4_rcv+0x799/0x9a0 [kernel]
0xffffffff815b1094 : ip_local_deliver_finish+0xb4/0x1f0 [kernel]
0xffffffff815b1379 : ip_local_deliver+0x59/0xd0 [kernel]
0xffffffff815b0d1a : ip_rcv_finish+0x8a/0x350 [kernel]
0xffffffff815b16a6 : ip_rcv+0x2b6/0x410 [kernel]
0xffffffff815700d2 : __netif_receive_skb_core+0x582/0x800 [kernel]
0xffffffff81570368 : __netif_receive_skb+0x18/0x60 [kernel]
0xffffffff815703f0 : netif_receive_skb_internal+0x40/0xc0 [kernel]
0xffffffff81571578 : napi_gro_receive+0xd8/0x130 [kernel]
0xffffffffa00472fc [e1000]

作为服务端

结论

作为服务端堵塞在accept时,收到客户端请求,三次握手建立完成后,服务端状态进入ES状态,会调用sock_def_wakeup通知用户态进程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
sock_def_wakeup:[2017/6/26,10:47:00]local=192.168.55.178:8080,remote=192.168.55.165:50536 state:ESTABLISHED
0xffffffff81558150 : sock_def_wakeup+0x0/0x40 [kernel]
0xffffffff815cc3bf : tcp_rcv_state_process+0x75f/0xf20 [kernel]
0xffffffff815d7dde : tcp_child_process+0x3e/0x130 [kernel]
0xffffffff815d60d5 : tcp_v4_do_rcv+0x255/0x340 [kernel]
0xffffffff815d76d9 : tcp_v4_rcv+0x799/0x9a0 [kernel]
0xffffffff815b1094 : ip_local_deliver_finish+0xb4/0x1f0 [kernel]
0xffffffff815b1379 : ip_local_deliver+0x59/0xd0 [kernel]
0xffffffff815b0d1a : ip_rcv_finish+0x8a/0x350 [kernel]
0xffffffff815b16a6 : ip_rcv+0x2b6/0x410 [kernel]
0xffffffff815700d2 : __netif_receive_skb_core+0x582/0x800 [kernel]
0xffffffff81570368 : __netif_receive_skb+0x18/0x60 [kernel]
0xffffffff815703f0 : netif_receive_skb_internal+0x40/0xc0 [kernel]
0xffffffff81571578 : napi_gro_receive+0xd8/0x130 [kernel]
0xffffffffa00a72fc [e1000]

服务端测试程序如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
 //#include <sys/types.h>
#include <sys/socket.h>
//#include <sys/wait.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <netinet/in.h>
#include <fcntl.h>

#define SERVPORT 8080
#define BACKLOG 10
#define MAX_CONNECTED_NO 10
#define MAXDATASIZE 100

int main()
{
struct sockaddr_in server_sockaddr,client_sockaddr;
int sin_size,recvbytes,flags;
int sockfd,client_fd;
char buf[MAXDATASIZE];
if((sockfd = socket(AF_INET,SOCK_STREAM,0))==-1){
perror("socket");
return 0;
}
printf("socket success!,sockfd=%d\n",sockfd);

server_sockaddr.sin_family=AF_INET;
server_sockaddr.sin_port=htons(SERVPORT);
server_sockaddr.sin_addr.s_addr=INADDR_ANY;
bzero(&(server_sockaddr.sin_zero),8);

if(bind(sockfd,(struct sockaddr *)&server_sockaddr,sizeof(struct sockaddr))==-1){
perror("bind");
return 0;
}
printf("bind success!\n");

if(listen(sockfd,BACKLOG)==-1){
perror("listen");
return 0;
}
printf("listening....\n");

if((flags=fcntl( sockfd, F_SETFL, 0))<0)
perror("fcntl F_SETFL");
flags |= O_ASYNC;

if(fcntl( sockfd, F_SETFL,flags)<0)
perror("fcntl");
while(1){
sin_size=sizeof(struct sockaddr_in);
if((client_fd=accept(sockfd,(struct sockaddr*)&client_sockaddr,&sin_size))==-1){
perror("accept");
return 0;
}

printf("%d\n",client_sockaddr.sin_port);

if((recvbytes=recv(client_fd,buf,MAXDATASIZE,0))==-1){
perror("recv");
return 0;
}
printf("recvbytes: %d %s \n ",recvbytes,buf);

close(client_fd);
return;
}
}

运行过程:

1
2
3
4
5
6
[root@localhost socketdemo]# gcc server.c -o server
[root@localhost socketdemo]#
[root@localhost socketdemo]# ./server
socket success!,sockfd=3
bind success!
listening....

服务端stap探测脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
[root@localhost stp]# cat socketsingtal.stp
%{
#include <linux/tcp.h>
#include<linux/rtc.h>
#include <net/tcp.h>

static const char tcp_state_array[][16] = {
"NULL",
"ESTABLISHED",
"SYN_SENT",
"SYN_RECV",
"FIN_WAIT1",
"FIN_WAIT2",
"TIME_WAIT",
"CLOSE",
"CLOSE_WAIT",
"LAST_ACK",
"LISTEN",
"CLOSING"
};
%}

function get_short_time:string()
%{
struct timeval tv;
struct rtc_time tm;
unsigned long time;

do_gettimeofday(&tv);
time = tv.tv_sec + 8 * 3600;
rtc_time_to_tm(time, &tm);

sprintf(STAP_RETVALUE, "%02d:%02d:%02d",
tm.tm_hour, tm.tm_min, tm.tm_sec);
%}

function get_full_time:string()
%{
struct timeval tv;
struct rtc_time tm;
unsigned long time;

do_gettimeofday(&tv);
time = tv.tv_sec + 8 * 3600;
rtc_time_to_tm(time, &tm);

sprintf(STAP_RETVALUE, "%d/%d/%d,%02d:%02d:%02d",
tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec);
%}

function get_conn_lifetime:long (sk:long)
%{
struct sock *sk = (struct sock *)STAP_ARG_sk;
struct stap_info *info = sk->sk_protinfo;
STAP_RETVALUE = jiffies_to_msecs(tcp_time_stamp - info->estab_t);
%}

function get_conn_data:long (sk:long)
%{
struct sock *sk = (struct sock *)STAP_ARG_sk;
struct tcp_sock *tp = tcp_sk(sk);
struct stap_info *info = sk->sk_protinfo;
u32 len = tp->snd_nxt - info->isn;

STAP_RETVALUE = len ? len - 1 : len;
%}

function filter_http_transtime:long (sk:long)
%{
struct sock *sk = (struct sock *)STAP_ARG_sk;
struct stap_info *info = sk->sk_protinfo;

STAP_RETVALUE = info->http_filter;
%}

function get_socket_addr:string (sk:long)
{
laddr = tcpmib_local_addr(sk)
lport = tcpmib_local_port(sk)
raddr = tcpmib_remote_addr(sk)
rport = tcpmib_remote_port(sk)

local_addr = sprintf("%s:%d", ip_ntop(htonl(laddr)), lport)
remote_addr = sprintf("%s:%d", ip_ntop(htonl(raddr)), rport)

return sprintf("local=%s,remote=%s", local_addr, remote_addr)
}


function get_socket_state:string (sk:long)
%{
struct sock *sk = (struct sock *)STAP_ARG_sk;
sprintf(STAP_RETVALUE, "%s", tcp_state_array[sk->sk_state]);
%}

function get_socket_sk_sndbuf:string(sk:long)
%{
struct sock *sk=(struct sock*)STAP_ARG_sk;
sprintf(STAP_RETVALUE,"%d:%d", sk->sk_wmem_queued, sk->sk_sndbuf);
%}



function socket_state_num2str:string (state:long)
%{
sprintf(STAP_RETVALUE, "%s", tcp_state_array[STAP_ARG_state]);
%}

function sshfilter:long(sk:long)
{
lport = tcpmib_local_port(sk)
if(lport == 22)
return 1
return 0
}

probe kernel.function("sock_def_wakeup").call{
if(sshfilter($sk))
next
printf("sock_def_wakeup:[%s]%s state:%s\n",get_full_time(),get_socket_addr($sk),get_socket_state($sk))
print_backtrace()
}

运行过程:

1
[root@localhost stp]# stap -g socketsingtal.stp

sock_def_wakeup状态改变事件实现分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
//判断等待队列释放存在进程
static inline bool wq_has_sleeper(struct socket_wq *wq)
{
//同步使用,具体实现未分析
smp_mb();
return wq && waitqueue_active(&wq->wait);
}

static void sock_def_wakeup(struct sock *sk)
{
struct socket_wq *wq;

rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
//如果等待队列有进程,全部唤醒
if (wq_has_sleeper(wq))
wake_up_interruptible_all(&wq->wait);
rcu_read_unlock();
}

唤醒进程实现如下wake_up_interruptible_all ->wake_up->wake_up_common
比较特殊点是,__wake_up的nr_exclusive为0时候唤醒所有进程。
其它说明是:nr_exclusive为1时候,是为了惊鸿设置的,只唤醒一个进程

1
2
3
4
5
6
7
8
9
10
11
12
#define wake_up_interruptible_all(x)	__wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)

void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags;

spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(__wake_up);

__wake_up_common 参数nr_exclusive为0时候,break不可能被执行

1
2
3
4
5
6
7
8
9
10
11
12
13
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;

list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;

if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}

__wake_up_common 中curr->func是什么呢?

是autoremove_wake_function,将socket睡眠时候,会调用DEFINE_WAIT将autoremove_wake_function设置

1
2
3
4
5
6
7
8
#define DEFINE_WAIT_FUNC(name, function)\
wait_queue_t name = { \
.private = current, \
.func = function, \
.task_list = LIST_HEAD_INIT((name).task_list),x\
}

#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)

autoremove_wake_function 干了什么?
1:default_wake_function ->try_to_wake_up
把进程状态设置为TASK_RUNNING,并把其插入CPU运行队列,从而唤醒睡眠进程
2:待进程状态唤醒后,把等待事件从等待队列中删除

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int ret = default_wake_function(wait, mode, sync, key);

if (ret)
list_del_init(&wait->task_list);//等待队列删除
return ret;
}
EXPORT_SYMBOL(autoremove_wake_function);

int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
void *key)
{
//把进程状态设置为TASK_RUNNING,并把其插入CPU运行队列,从而唤醒睡眠进程
return try_to_wake_up(curr->private, mode, wake_flags);
}
EXPORT_SYMBOL(default_wake_function);

sock_def_readable

sock_def_readable调用时机,sock数据可读会调用此函数唤醒进程

作为服务端

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
//收到syn包后
sock_def_wakeup:[2017/6/26,11:52:07]local=0.0.0.0:8080,remote=0.0.0.0:0 state:LISTEN
0xffffffff81558220 : sock_def_readable+0x0/0x70 [kernel]
0xffffffff815d7eb8 : tcp_child_process+0x118/0x130 [kernel]
0xffffffff815d60d5 : tcp_v4_do_rcv+0x255/0x340 [kernel]
0xffffffff815d76d9 : tcp_v4_rcv+0x799/0x9a0 [kernel]
0xffffffff815b1094 : ip_local_deliver_finish+0xb4/0x1f0 [kernel]
0xffffffff815b1379 : ip_local_deliver+0x59/0xd0 [kernel]
0xffffffff815b0d1a : ip_rcv_finish+0x8a/0x350 [kernel]
0xffffffff815b16a6 : ip_rcv+0x2b6/0x410 [kernel]
0xffffffff815700d2 : __netif_receive_skb_core+0x582/0x800 [kernel]
0xffffffff81570368 : __netif_receive_skb+0x18/0x60 [kernel]
0xffffffff815703f0 : netif_receive_skb_internal+0x40/0xc0 [kernel]
0xffffffff81571578 : napi_gro_receive+0xd8/0x130 [kernel]
0xffffffffa00a72fc [e1000]
//收到数据包后
sock_def_wakeup:
[2017/6/26,11:52:07]local=192.168.55.178:8080,remote=192.168.55.165:50843 state:ESTABLISHED
0xffffffff81558220 : sock_def_readable+0x0/0x70 [kernel]
0xffffffff815c8197 : tcp_data_queue+0x497/0xdd0 [kernel]
0xffffffff815cb4a7 : tcp_rcv_established+0x217/0x760 [kernel]
0xffffffff815d5f8a : tcp_v4_do_rcv+0x10a/0x340 [kernel]
0xffffffff815d76d9 : tcp_v4_rcv+0x799/0x9a0 [kernel]
0xffffffff815b1094 : ip_local_deliver_finish+0xb4/0x1f0 [kernel]
0xffffffff815b1379 : ip_local_deliver+0x59/0xd0 [kernel]
0xffffffff815b0d1a : ip_rcv_finish+0x8a/0x350 [kernel]
0xffffffff815b16a6 : ip_rcv+0x2b6/0x410 [kernel]
0xffffffff815700d2 : __netif_receive_skb_core+0x582/0x800 [kernel]
0xffffffff81570368 : __netif_receive_skb+0x18/0x60 [kernel]
0xffffffff815703f0 : netif_receive_skb_internal+0x40/0xc0 [kernel]
0xffffffff81571578 : napi_gro_receive+0xd8/0x130 [kernel]
0xffffffffa00a72fc [e1000]
[root@localhost stp]#

作为客户端
收到ES状态服务端回复的数据在tcp_data_queue/tcp_rcv_established中调用
具体实现后续分析。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
sock_def_wakeup:[2017/6/26,13:44:35]local=10.0.2.15:43188,remote=180.97.33.107:80 state:ESTABLISHED
0xffffffff81558220 : sock_def_readable+0x0/0x70 [kernel]
0xffffffff815cb6c3 : tcp_rcv_established+0x433/0x760 [kernel]
0xffffffff815d5f8a : tcp_v4_do_rcv+0x10a/0x340 [kernel]
0xffffffff815d76d9 : tcp_v4_rcv+0x799/0x9a0 [kernel]
0xffffffff815b1094 : ip_local_deliver_finish+0xb4/0x1f0 [kernel]
0xffffffff815b1379 : ip_local_deliver+0x59/0xd0 [kernel]
0xffffffff815b0d1a : ip_rcv_finish+0x8a/0x350 [kernel]
0xffffffff815b16a6 : ip_rcv+0x2b6/0x410 [kernel]
0xffffffff815700d2 : __netif_receive_skb_core+0x582/0x800 [kernel]
0xffffffff81570368 : __netif_receive_skb+0x18/0x60 [kernel]
0xffffffff815703f0 : netif_receive_skb_internal+0x40/0xc0 [kernel]
0xffffffff81571578 : napi_gro_receive+0xd8/0x130 [kernel]
0xffffffffa00a72fc [e1000]
sock_def_wakeup:[2017/6/26,13:44:35]local=10.0.2.15:43188,remote=180.97.33.107:80 state:ESTABLISHED
0xffffffff81558220 : sock_def_readable+0x0/0x70 [kernel]
0xffffffff815c8197 : tcp_data_queue+0x497/0xdd0 [kernel]
0xffffffff815cb4a7 : tcp_rcv_established+0x217/0x760 [kernel]
0xffffffff815d5f8a : tcp_v4_do_rcv+0x10a/0x340 [kernel]
0xffffffff815d76d9 : tcp_v4_rcv+0x799/0x9a0 [kernel]
0xffffffff815b1094 : ip_local_deliver_finish+0xb4/0x1f0 [kernel]
0xffffffff815b1379 : ip_local_deliver+0x59/0xd0 [kernel]
0xffffffff815b0d1a : ip_rcv_finish+0x8a/0x350 [kernel]
0xffffffff815b16a6 : ip_rcv+0x2b6/0x410 [kernel]
0xffffffff815700d2 : __netif_receive_skb_core+0x582/0x800 [kernel]
0xffffffff81570368 : __netif_receive_skb+0x18/0x60 [kernel]
0xffffffff815703f0 : netif_receive_skb_internal+0x40/0xc0 [kernel]
0xffffffff81571578 : napi_gro_receive+0xd8/0x130 [kernel]
0xffffffffa00a72fc [e1000]
sock_def_wakeup:[2017/6/26,13:44:35]local=10.0.2.15:43188,remote=180.97.33.107:80 state:ESTABLISHED
0xffffffff81558220 : sock_def_readable+0x0/0x70 [kernel]
0xffffffff815cb6c3 : tcp_rcv_established+0x433/0x760 [kernel]
0xffffffff815d5f8a : tcp_v4_do_rcv+0x10a/0x340 [kernel]
0xffffffff815d76d9 : tcp_v4_rcv+0x799/0x9a0 [kernel]
0xffffffff815b1094 : ip_local_deliver_finish+0xb4/0x1f0 [kernel]
0xffffffff815b1379 : ip_local_deliver+0x59/0xd0 [kernel]
0xffffffff815b0d1a : ip_rcv_finish+0x8a/0x350 [kernel]
0xffffffff815b16a6 : ip_rcv+0x2b6/0x410 [kernel]
0xffffffff815700d2 : __netif_receive_skb_core+0x582/0x800 [kernel]
0xffffffff81570368 : __netif_receive_skb+0x18/0x60 [kernel]
0xffffffff815703f0 : netif_receive_skb_internal+0x40/0xc0 [kernel]
0xffffffff81571578 : napi_gro_receive+0xd8/0x130 [kernel]
0xffffffffa00a72fc [e1000]

服务端主动关闭连接,作为客户端TCP状态机处于CLOSE_WAIT
sock_def_wakeup:[2017/6/26,13:44:35]local=10.0.2.15:43188,remote=180.97.33.107:80 state:CLOSE_WAIT
0xffffffff81558220 : sock_def_readable+0x0/0x70 [kernel]
0xffffffff815c8197 : tcp_data_queue+0x497/0xdd0 [kernel]
0xffffffff815cb4a7 : tcp_rcv_established+0x217/0x760 [kernel]
0xffffffff815d5f8a : tcp_v4_do_rcv+0x10a/0x340 [kernel]
0xffffffff815d76d9 : tcp_v4_rcv+0x799/0x9a0 [kernel]
0xffffffff815b1094 : ip_local_deliver_finish+0xb4/0x1f0 [kernel]
0xffffffff815b1379 : ip_local_deliver+0x59/0xd0 [kernel]
0xffffffff815b0d1a : ip_rcv_finish+0x8a/0x350 [kernel]
0xffffffff815b16a6 : ip_rcv+0x2b6/0x410 [kernel]
0xffffffff815700d2 : __netif_receive_skb_core+0x582/0x800 [kernel]
0xffffffff81570368 : __netif_receive_skb+0x18/0x60 [kernel]
0xffffffff815703f0 : netif_receive_skb_internal+0x40/0xc0 [kernel]
0xffffffff81571578 : napi_gro_receive+0xd8/0x130 [kernel]
0xffffffffa00a72fc [e1000]

sock_def_readable

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
static void sock_def_readable(struct sock *sk, int len)
{
struct socket_wq *wq;

rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
//阻塞队列通知
wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
POLLRDNORM | POLLRDBAND);
//异步队列通知
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}

#define wake_up_interruptible_sync_poll(x, m)\
__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))


void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags;
int wake_flags = WF_SYNC;

if (unlikely(!q))
return;

if (unlikely(!nr_exclusive))
wake_flags = 0;

spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL_GPL(__wake_up_sync_key);

wake_up_interruptible_sync_poll传递的nr_exclusive为1,表示只允许唤醒一个等待进程。

sk_stream_write_space

sk->sk_write_space的实例为sock_def_write_space()。
如果socket是SOCK_STREAM类型的,那么函数指针的值会更新为sk_stream_write_space()。
sk_stream_write_space()在TCP中的调用路径为:
tcp_rcv_established / tcp_rcv_state_process
tcp_data_snd_check
tcp_check_space
tcp_new_space

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
void sk_stream_write_space(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
struct socket_wq *wq;

//发送缓存大小,当要发送数据没有到达发送缓存的2/3 下限
//尚未发送数据缓冲区大小数据,没有大于用户设定值sysctl_tcp_notsent_lowat时候才能触发写数据
//针对于下限,要发送数据还不多
//针对于上限,要发送数据已经很多的话,不变要再发了,否则会使用过多内存
if (sk_stream_is_writeable(sk) && sock) {
//经过sk_stream_is_writeable判别说明缓冲是足够的SOCK_NOSPACE标记清除
clear_bit(SOCK_NOSPACE, &sock->flags);

rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
//唤醒等待队列一个进程
wake_up_interruptible_poll(&wq->wait, POLLOUT |
POLLWRNORM | POLLWRBAND);
//异步队列允许发送数据,通知异步队列
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
}