dedecms织梦内容管理系统    
首页 | java | C/C++ | PHP | 操作系统 | ajax | 脚本编程 | 安全技术 | 本站下载页 | flex | CRM | 专题 | QQ群 | 测试中心 | 会员中心 | 积分规则
  当前位置:主页>操作系统>linux>文章内容
linux kernel 2.4.5 ipv4 socket层的一点解释
来源:未知     作者:    
1.新建socket
原形:
static int inet_create(struct socket *sock, int protocol)
在net/ipv4/af_inet.c中
详细解释
static int inet_create(struct socket *sock, int protocol)
{
struct sock *sk;
struct proto *prot;
sock->state = SS_UNCONNECTED; /* 设置状态为未连接 */
sk = sk_alloc(PF_INET, GFP_KERNEL, 1); /* 申请sock所需的内存 */
    /* net/core/sock.c */
if (sk == NULL)
  goto do_oom;
switch (sock->type) {
case SOCK_STREAM:  /* TCP协议 */
  if (protocol && protocol != IPPROTO_TCP)
  goto free_and_noproto;
  protocol = IPPROTO_TCP;
  prot = &tcp_prot; /* tcp_prot定义在net/ipv4/tcp_ipv4.c */
  sock->ops = &inet_stream_ops; /* 针对STREAM的socket操作 */
  break;
case SOCK_SEQPACKET:  /* 不支持 */
  goto free_and_badtype;
case SOCK_DGRAM:  /* UDP协议 */
  if (protocol && protocol != IPPROTO_UDP)
  goto free_and_noproto;
  protocol = IPPROTO_UDP;
  sk->no_check = UDP_CSUM_DEFAULT;
  prot=&udp_prot;  /* udp_prot定义在net/ipv4/udp.c */
  sock->ops = &inet_dgram_ops; /* 针对DGRAM的socket操作 */
  break;
case SOCK_RAW:  /* RAW */
  if (!capable(CAP_NET_RAW)) /* 判断是否有权利建立SOCK_RAW */
  goto free_and_badperm;
  if (!protocol)  /* protocol不能为0 */
  goto free_and_noproto;
  prot = &raw_prot; /* raw_prot定义在net/ipv4/raw.c */
  sk->reuse = 1;  /* 允许地址重用 */
  sk->num = protocol;
  sock->ops = &inet_dgram_ops; /* RAW的一些特性和DGRAM相同 */
  if (protocol == IPPROTO_RAW)
  sk->protinfo.af_inet.hdrincl = 1;
    /* 允许自己定制ip头 */
  break;
default:
  goto free_and_badtype;
}
if (ipv4_config.no_pmtu_disc)
  sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
else
  sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_WANT;
sk->protinfo.af_inet.id = 0;
sock_init_data(sock,sk); /* 初始化一些数据 */
    /* net/core/sock.c */
sk->destruct = inet_sock_destruct; /* 当销毁socket时调用inet_sock_destruct */
sk->zapped = 0;
sk->family = PF_INET;
sk->protocol = protocol;
sk->prot = prot;
sk->backlog_rcv = prot->backlog_rcv; /* prot->backlog_rcv()见各个类型的定义 */
sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl; /* 设置默认ttl */
    /* 修改/proc/sys/net/ipv4/ip_default_ttl */
sk->protinfo.af_inet.mc_loop = 1;
sk->protinfo.af_inet.mc_ttl = 1;
sk->protinfo.af_inet.mc_index = 0;
sk->protinfo.af_inet.mc_list = NULL;
#ifdef INET_REFCNT_DEBUG
atomic_inc(&inet_sock_nr);
#endif
if (sk->num) {
  /* It assumes that any protocol which allows
  * the user to assign a number at socket
  * creation time automatically
  * shares.
  */
  sk->sport = htons(sk->num); /* 设置本地端口 */
  /* Add to protocol hash chains. */
  sk->prot->hash(sk);
}
if (sk->prot->init) {
  int err = sk->prot->init(sk); /* 协议对socket的初始化 */
  if (err != 0) {
  inet_sock_release(sk);
  return(err);
  }
}
return(0);
free_and_badtype:
sk_free(sk);  /* 释放内存 */
return -ESOCKTNOSUPPORT;
free_and_badperm:
sk_free(sk);
return -EPERM;
free_and_noproto:
sk_free(sk);
return -EPROTONOSUPPORT;
do_oom:
return -ENOBUFS;
}
在net/core/sock.c
void sock_init_data(struct socket *sock, struct sock *sk)
{
skb_queue_head_init(&sk->receive_queue); /* 初始化3条队列 接受,发送,错误*/
skb_queue_head_init(&sk->write_queue);
skb_queue_head_init(&sk->error_queue);
init_timer(&sk->timer);  /* 初始化timer */

sk->allocation = GFP_KERNEL;
sk->rcuf = sysctl_rmem_default;
sk->sndbuf = sysctl_wmem_default;
sk->state = TCP_CLOSE;
sk->zapped = 1;
sk->socket = sock;
if(sock)
{
  sk->type = sock->type;
  sk->sleep = &sock->wait;
  sock->sk = sk;
} else
  sk->sleep = NULL;
sk->dst_lock = RW_LOCK_UNLOCKED;
sk->callback_lock = RW_LOCK_UNLOCKED;
   /* sock_def_wakeup(),sock_def_readable(),
    sock_def_write_space(),sock_def_error_report(),
     sock_def_destruct() 在net/core/sock.c */
sk->state_change = sock_def_wakeup;
sk->data_ready = sock_def_readable;
sk->write_space = sock_def_write_space;
sk->error_report = sock_def_error_report;
sk->destruct      =    sock_def_destruct;
sk->peercred.pid = 0;
sk->peercred.uid = -1;
sk->peercred.gid = -1;
sk->rcvlowat = 1;
sk->rcvtimeo = MAX_SCHEDULE_TIMEOUT; /* 设置接受,发送超时 */
sk->sndtimeo = MAX_SCHEDULE_TIMEOUT;
atomic_set(&sk->refcnt, 1);
}
1.1 SOCK_STREAM的初始化
在net/ipv4/tcp_ipv4.c
static int tcp_v4_init_sock(struct sock *sk)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
tp->rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
   
/* So many TCP implementations out there (incorrectly) count the
  * initial SYN frame in their delayed-ACK and congestion control
  * algorithms that we must have the following bandaid to talk
  * efficiently to them. -DaveM
  */
tp->snd_cwnd = 2;
/* See draft-stevens-tcpca-spec-01 for discussion of the
  * initialization of these values.
  */
tp->snd_ssthresh = 0x7fffffff; /* Infinity */
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = 536;
tp->reordering = sysctl_tcp_reordering;
sk->state = TCP_CLOSE;
sk->write_space = tcp_write_space; /* tcp_write_space() 在net/ipv4/tcp.c */
sk->use_write_queue = 1;
sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
    /* ipv4_specific 在net/ipv4/tcp_ipv4.c */
sk->sndbuf = sysctl_tcp_wmem[1]; /* 设置发送和接收缓冲区大小 */
sk->rcuf = sysctl_tcp_rmem[1]; /* sysctl_tcp_* 在net/ipv4/tcp.c */
atomic_inc(&tcp_sockets_allocated); /* tcp_sockets_allocated是当前TCP socket的数量 */
return 0;
}
SOCK_DGRAM无初始化
1.2 SOCK_RAW初始化
在net/ipv4/raw.c
static int raw_init(struct sock *sk)
{
struct raw_opt *tp = &(sk->tp_pinfo.tp_raw4);
if (sk->num == IPPROTO_ICMP)
  memset(&tp->filter, 0, sizeof(tp->filter));
return 0;
}
2.Server
2.1 bind
static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
struct sock *sk=sock->sk;
unsigned short snum;
int chk_addr_ret;
int err;
/* If the socket has its own bind function then use it. (RAW) */
if(sk->prot->bind)
  return sk->prot->bind(sk, uaddr, addr_len);
    /* 只有SOCK_RAW定义了自己的bind */
if (addr_len < sizeof(struct sockaddr_in))
  return -EINVAL;
chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
    /* inet_addr_type返回地址的类型 */
    /* 在net/ipv4/fib_frontend.c */
/* Not specified by any standard per-se, however it breaks too
  * many applications when removed. It is unfortunate since
  * allowing applications to make a non-local bind solves
  * several problems with systems using dynamic addressing.
  * (ie. your servers still start up even if your ISDN link
  * is temporarily down)
  */
if (sysctl_ip_nonlocal_bind == 0 &&
   sk->protinfo.af_inet.freebind == 0 &&
   addr->sin_addr.s_addr != INADDR_ANY &&
   chk_addr_ret != RTN_LOCAL &&
   chk_addr_ret != RTN_MULTICAST &&
   chk_addr_ret != RTN_BROADCAST)
  return -EADDRNOTAVAIL;
snum = ntohs(addr->sin_port);
if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
    /* 检查是否有权利bind端口到1-1024 */
  return -EACCES;
/*   We keep a pair of addresses. rcv_saddr is the one
  *   used by hash lookups, and saddr is used for transmit.
  *
  *   In the BSD API these are the same except where it
  *   would be illegal to use them (multicast/broadcast) in
  *   which case the sending device address is used.
  */
lock_sock(sk);
/* Check these errors (active socket, double bind). */
err = -EINVAL;
if ((sk->state != TCP_CLOSE)  ||
   (sk->num != 0))
  goto out;
sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
  sk->saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
if (sk->prot->get_port(sk, snum) != 0) { /* get_port检查是否重用 */
  sk->saddr = sk->rcv_saddr = 0;
  err = -EADDRINUSE;
  goto out;
}
if (sk->rcv_saddr)
  sk->userlocks |= SOCK_BINDADDR_LOCK;
if (snum)
  sk->userlocks |= SOCK_BINDPORT_LOCK;
sk->sport = htons(sk->num);
sk->daddr = 0;
sk->dport = 0;
sk_dst_reset(sk);
err = 0;
out:
release_sock(sk);
return err;
}
SOCK_STREAM和SOCK_DGRAM用默认的bind
2.1.1 SOCK_RAW的bind
在net/ipv4/raw.c
static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
int ret = -EINVAL;
int chk_addr_ret;
if (sk->state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
  goto out;
chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
    /* inet_addr_type返回地址的类型 */
    /* 在net/ipv4/fib_frontend.c */
ret = -EADDRNOTAVAIL;
if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
   chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
  goto out;
sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
    /* sk->rcv_saddr 捆绑的本地地址 */
    /* sk->saddr 源地址 */
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
  sk->saddr = 0; /* Use device */ /* 地址类型如为多播或是广播源地址为0 */
sk_dst_reset(sk);
ret = 0;
out: return ret;
}
2.2 listen
2.2.1 SOCK_STREAM的listen
在net/ipv4/af_inet.c
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
  goto out;
old_state = sk->state;
if (!((1<<old_state)&(TCPF_CLOSE|TCPF_LISTEN)))
  goto out;
/* Really, if the socket is already in listen state
  * we can only allow the backlog to be adjusted.
  */
if (old_state != TCP_LISTEN) {
  err = tcp_listen_start(sk); /* 真正实现TCP协议listen */
  if (err)
  goto out;
}
sk->max_ack_backlog = backlog;
err = 0;
out:
release_sock(sk);
return err;
}
tcp_listen_start在net/ipv4/tcp.h
int tcp_listen_start(struct sock *sk)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
struct tcp_listen_opt *lopt;
sk->max_ack_backlog = 0;
sk->ack_backlog = 0;
tp->accept_queue = tp->accept_queue_tail = NULL;
tp->syn_wait_lock = RW_LOCK_UNLOCKED;
tcp_delack_init(tp);  /* tp清0 */
    /* include/net/tcp.h */
lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
if (!lopt)
  return -ENOMEM;
memset(lopt, 0, sizeof(struct tcp_listen_opt));
for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
  if ((1<<lopt->max_qlen_log) >= sysctl_max_syn_backlog)
  break;
write_lock_bh(&tp->syn_wait_lock);
tp->listen_opt = lopt;
write_unlock_bh(&tp->syn_wait_lock);
/* There is race window here: we announce ourselves listening,
  * but this transition is still not validated by get_port().
  * It is OK, because this socket enters to hash table only
  * after validation is complete.
  */
sk->state = TCP_LISTEN;
if (sk->prot->get_port(sk, sk->num) == 0) { /* 确认地址没有重用 */
  sk->sport = htons(sk->num); /* 设置源端口 */
  sk_dst_reset(sk);
  sk->prot->hash(sk);  /* 将端口加到hash表中 */
  return 0;
}
sk->state = TCP_CLOSE;
write_lock_bh(&tp->syn_wait_lock);
tp->listen_opt = NULL;
write_unlock_bh(&tp->syn_wait_lock);
kfree(lopt);
return -EADDRINUSE;
}
SOCK_DGRAM 和 SOCK_RAW 不支持listen
2.3 accept
2.3.1 SOCK_STREAM的accept
在net/ipv4/af_inet.c
int inet_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct sock *sk1 = sock->sk;
struct sock *sk2;
int err = -EINVAL;
if((sk2 = sk1->prot->accept(sk1,flags,&err)) == NULL)
  goto do_err;
lock_sock(sk2);
BUG_TRAP((1<<sk2->state)&(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_CLOSE));
sock_graft(sk2, newsock); /* 将sk2转接到newsock */
    /* 在include/net/sock.h */
newsock->state = SS_CONNECTED;
release_sock(sk2);
return 0;
do_err:
return err;
}
SOCK_DGRAM 和 SOCK_RAW 不支持 accept
2.3.1.1 TCP协议的accept
在net/ipv4/tcp.c
struct sock *tcp_accept(struct sock *sk, int flags, int *err)
{
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
struct open_request *req;
struct sock *newsk;
int error;
lock_sock(sk);
/* We need to make sure that this socket is listening,
  * and that it has something pending.
  */
error = -EINVAL;
if (sk->state != TCP_LISTEN) /* 检查socket是否处于listen状态 */
  goto out;
/* Find already established connection */
if (!tp->accept_queue) { /* 判断accept队列是否准备好 */
  long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
    /* 判断是否为堵塞模式 */
    /* 在include/net/sock.h */
  /* If this is a non blocking socket don't sleep */
  error = -EAGAIN;
  if (!timeo)  /* 不堵塞模式,直接返回 */
  goto out;
  error = wait_for_connect(sk, timeo); /* 进入空闲等待连接 */
  if (error)
  goto out;
}
req = tp->accept_queue;
if ((tp->accept_queue = req->dl_next) == NULL)
  tp->accept_queue_tail = NULL;
  newsk = req->sk;
tcp_acceptq_removed(sk);  /* sk当前连接数减1 */
    /*在include/net/tcp.h */
tcp_openreq_fastfree(req);  /* 释放内存 */
    /*在include/net/tcp.h */
BUG_TRAP(newsk->state != TCP_SYN_RECV); 
release_sock(sk);
return newsk;
out:
release_sock(sk);
*err = error;
return NULL;
}
/* 只有当socket为堵塞模式,该才会被调用 */
/* 在net/ipv4/tcp.c */
static int wait_for_connect(struct sock * sk, long timeo)
{
DECLARE_WAITQUEUE(wait, current);
int err;
/*
  * True wake-one mechanism for incoming connections: only
  * one process gets woken up, not the 'whole herd'.
  * Since we do not 'race & poll' for established sockets
  * anymore, the common case will execute the loop only once.
  *
  * Subtle issue: "add_wait_queue_exclusive()" will be added
  * after any current non-exclusive waiters, and we know that
  * it will always _stay_ after any new non-exclusive waiters
  * because all non-exclusive waiters are added at the
  * beginning of the wait-queue. As such, it's ok to "drop"
  * our exclusiveness temporarily when we get woken up without
  * having to remove and re-insert us on the wait queue.
  */
add_wait_queue_exclusive(sk->sleep, &wait);
for (;;) {
  current->state = TASK_INTERRUPTIBLE;
  release_sock(sk);
  if (sk->tp_pinfo.af_tcp.accept_queue == NULL)
  timeo = schedule_timeout(timeo); /* 休眠timeo时长 */
  lock_sock(sk);
  err = 0;
  if (sk->tp_pinfo.af_tcp.accept_queue) /* accept队列可用 */
     /* 也就是有连接进入 */
  break;
  err = -EINVAL;
  if (sk->state != TCP_LISTEN)
  break;
  err = sock_intr_errno(timeo);
  if (signal_pending(current))
  break;
  err = -EAGAIN;
  if (!timeo)
  break;
}
current->state = TASK_RUNNING;
remove_wait_queue(sk->sleep, &wait);
return err;
}
3.Client
3.1 connect
3.1.1 SOCK_STREAM的connect
在net/ipv4/af_inet.c

int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
  int addr_len, int flags)
{
struct sock *sk=sock->sk;
int err;
long timeo;
lock_sock(sk);
if (uaddr->sa_family == AF_UNSPEC) {
  err = sk->prot->disconnect(sk, flags); /* 关闭连接 */
  sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
  goto out;
}
switch (sock->state) {
default:
  err = -EINVAL;
  goto out;
case SS_CONNECTED:
  err = -EISCONN;
  goto out;
case SS_CONNECTING:
  err = -EALREADY;
  /* Fall out of switch with err, set for this state */
  break;
case SS_UNCONNECTED:
  err = -EISCONN;
  if (sk->state != TCP_CLOSE)
  goto out;
  err = -EAGAIN;
  if (sk->num == 0) {
  if (sk->prot->get_port(sk, 0) != 0) /* 是否重用 */
   goto out;
  sk->sport = htons(sk->num);
  }
  err = sk->prot->connect(sk, uaddr, addr_len); /* 调用协议的connect */
  if (err < 0)
  goto out;
   sock->state = SS_CONNECTING;  /* socket状态设置成连接中 */
  /* Just entered SS_CONNECTING state; the only
  * difference is that return value in non-blocking
  * case is EINPROGRESS, rather than EALREADY.
  */
  err = -EINPROGRESS;
  break;
}
timeo = sock_sndtimeo(sk, flags&O_NONBLOCK); /* 是否为堵塞模式 */
     /* 在include/net/sock.h */
if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) { /* 连接为完成 */
  /* Error code is set above */
  if (!timeo || !inet_wait_for_connect(sk, timeo))
    /* 非堵塞模式立即返回 */
    /* 堵塞模式调用inet_wait_for_connect() */
  goto out;
  err = sock_intr_errno(timeo);
  if (signal_pending(current))
  goto out;
}
/* Connection was closed by RST, timeout, ICMP error
  * or another process disconnected us.
  */
if (sk->state == TCP_CLOSE)
  goto sock_error;
/* sk->err may be not zero now, if RECVERR was ordered by user
  * and error was received after socket entered established state.
  * Hence, it is handled normally after connect() return successfully.
  */
sock->state = SS_CONNECTED;  /* 设置状态为已连接 */
err = 0;
out:
release_sock(sk);
return err;
sock_error:
err = sock_error(sk) ? : -ECONNABORTED;
sock->state = SS_UNCONNECTED;
if (sk->prot->disconnect(sk, flags))
  sock->state = SS_DISCONNECTING;
goto out;
}
/* 只有当socket为堵塞模式,该才会被调用 */
/* 在/net/ipv4/af_inet.c */
static long inet_wait_for_connect(struct sock *sk, long timeo)
{
DECLARE_WAITQUEUE(wait, current);
__set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(sk->sleep, &wait);
/* Basic assumption: if someone sets sk->err, he _must_
  * change state of the socket from TCP_SYN_*.
  * Connect() does not allow to get error notifications
  * without closing the socket.
  */
while ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
  release_sock(sk);
  timeo = schedule_timeout(timeo); /* 进入休眠 */
  lock_sock(sk);
  if (signal_pending(current) || !timeo)
  break;
  set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
remove_wait_queue(sk->sleep, &wait);
return timeo;
}

 

 

上一篇:Windows XP和2000操作系统自动关机的实现   下一篇:Linux网络代码导读v0.2
[收藏] [推荐] [评论(0条)] [返回顶部] [打印本页] [关闭窗口]  
用户名: 新注册) 密码: 匿名评论
评论内容:(不能超过250字,需审核后才会公布,请自觉遵守互联网相关政策法规。
 §最新评论
  热点文章
·Linux常用基本命令及应用技巧
·写得蛮好的linux学习笔记
·学会在Linux下对硬盘分区
·找回Redhat的超级用户密码
·Linux下C语言编程
·GDB教程
·Cron服务配置详解
·Linux与Unix二大操作系统编程的
·Linux 2.6 内核的嵌入式系统应用
·Linux和Windows系统调用的比较
·vim命令(一)
·vim 命令(二)
  相关文章
·Linux网络代码导读v0.2
·Linux内核模块编程之字符设备文
·linux内核模块和驱动程序的编写
·Unix(Linux)C编程问题精粹
·Linux 进程调度原理
·使用 GDB 调试 Linux 软件
·C常用的LinuxC语言函数库
·Linux核心代码分析
·关于Linux下编写和编译程序的几
·Linux库函数升级权威经验
·Linux下C语言编程--信号处理函数
·Linux ioctl() Primer
  相关信息
copy right @ 百家拳软件项目研究室 2007 辽ICP备07011763