net.ipv4.tcp_wmem not work
经常发现 net.ipv4.tcp_wmem 没有作用. 这个参数的修改一般, 都看不出效果.
感觉很奇怪, 文档上写的很明白. 但是就是不启效果.
今天, 看代码的时候, 突然想到, accept 的 socket 的 sndbuf 是不是继承自 listen socket.
之前发现过这一特点. 新的 socket 有一些是继承自 listen socket 的.
实验
systemtap
probe kernel.function("tcp_sendmsg") { if (execname() == "test") printf("%s sk_sndbuf: %d \n", execname(), $sk->sk_sndbuf); }
server code
/** * author : 丁雪峰 * time : 2016-08-31 09:37:57 * email : fengidri@yeah.net * version : 1.0.1 * description : */ #include <stdio.h> #include <stddef.h> #include <unistd.h> #include <string.h> #include <errno.h> #include <sys/types.h> #include <malloc.h> #include "sws.h" int main() { int s = sws_net_server("127.0.0.1", 9999, 0, 1); if (s < 0) { printf("sws_net_server fail\n"); return -1; } while (1) { int fd = accept(s, NULL, 0); if (fd < 0) { printf("accept err\n"); continue; } sws_net_noblock(fd, 1); int size = 1024 * 1024 * 20; char *buf = malloc(size); int n = write(fd, buf, size); printf("write: %d\n", n); close(fd); } }
compile:
gcc test.c -L ../sws -I ../sws -lsws -g -o test
sws 是我的一个简单的代码库.
操作
nc 127.0.0.1 9999 > /dev/null -v
反复操作上面的命令, stap 会打印出在 tcp_sendmsg 的下的 sk_sndbuf, server 的代码会打印出成功写入到 socket 的
数据的大小. 可以看到两者的大小是相对应.
但是这个时候, 如果调整了 net.ipv4.tcp_wmem, stap 的输出值并不会变化.
但是重启 test 程序, stap 的输出值就是新的调整的值了.
这就可以证明前面的猜测了.
kernel code
如下的代码是, accept api 对应的内核代码, 创建新的 socket 的时候, 会调用 sctp_copy_sock.
这个函数里会把 sk_sndbuf, sk_rcvbuf
之类的值都复制到新的 socket.
/* Create and initialize a new sk for the socket returned by accept(). */ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, struct sctp_association *asoc) { struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, sk->sk_prot); struct inet_sock *newinet; if (!newsk) goto out; sock_init_data(NULL, newsk); sctp_copy_sock(newsk, sk, asoc); sock_reset_flag(newsk, SOCK_ZAPPED); newinet = inet_sk(newsk); newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; sk_refcnt_debug_inc(newsk); if (newsk->sk_prot->init(newsk)) { sk_common_release(newsk); newsk = NULL; } out: return newsk; }
void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc) { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; newsk->sk_flags = sk->sk_flags; newsk->sk_no_check_tx = sk->sk_no_check_tx; newsk->sk_no_check_rx = sk->sk_no_check_rx; newsk->sk_reuse = sk->sk_reuse; newsk->sk_shutdown = sk->sk_shutdown; newsk->sk_destruct = sctp_destruct_sock; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; newsk->sk_sndbuf = sk->sk_sndbuf; newsk->sk_rcvbuf = sk->sk_rcvbuf; newsk->sk_lingertime = sk->sk_lingertime; newsk->sk_rcvtimeo = sk->sk_rcvtimeo; newsk->sk_sndtimeo = sk->sk_sndtimeo; newinet = inet_sk(newsk); /* Initialize sk's sport, dport, rcv_saddr and daddr for * getsockname() and getpeername() */ newinet->inet_sport = inet->inet_sport; newinet->inet_saddr = inet->inet_saddr; newinet->inet_rcv_saddr = inet->inet_rcv_saddr; newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; newinet->inet_id = asoc->next_tsn ^ jiffies; newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; newinet->mc_ttl = 1; newinet->mc_index = 0; newinet->mc_list = NULL; }