1. 程式人生 > >記一次傳遞檔案控制代碼引發的血案 (續)

記一次傳遞檔案控制代碼引發的血案 (續)

繼 記一次傳遞檔案控制代碼引發的血案 之後,這個 demo 又引發了一次血案,現錄如下。

這次我是在 linux 上測試檔案控制代碼的傳遞,linux 上並沒有 STREAMS 系統,

因此是採用 unix domain socket 的 sendmsg/recvmsg 中控制訊息部分來傳遞控制代碼的。

程式碼的主要修改部分集中於傳送 fd 與接收 fd 處,一開始程式碼是這樣的,執行良好。

spipe_fd.c

  1 #define MAXLINE 128
  2 #define RIGHTSLEN CMSG_LEN(sizeof(int))
  3 #define CREDSLEN CMSG_LEN(sizeof(struct CREDSTRUCT))
  4 #define CONTROLLEN (RIGHTSLEN+CREDSLEN)
  5 
  6 int send_fd (int fd, int fd_to_send)
  7 {
  8     struct iovec iov[1]; 
  9     struct msghdr msg; 
 10     struct cmsghdr *cmptr = NULL; 
 11     char buf[2]; 
 12 
 13     iov[0].iov_base = buf; 
 14     iov[0].iov_len = 2; 
 15 
 16     msg.msg_iov = iov; 
 17     msg.msg_iovlen = 1; 
 18     msg.msg_name = NULL; 
 19     msg.msg_namelen = 0; 
 20     msg.msg_flags = 0; 
 21 
 22     if (fd_to_send < 0) {
 23         msg.msg_control = NULL; 
 24         msg.msg_controllen = 0; 
 25         buf[1] = -fd_to_send; 
 26         if (buf[1] == 0)
 27             buf[1] = 1; 
 28     } else {
 29         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
 30             fprintf (stderr, "malloc memory failed\n"); 
 31             return -1; 
 32         }
 33 
 34         msg.msg_control = cmptr; 
 35         msg.msg_controllen = CONTROLLEN; 
 36 
 37         cmptr->cmsg_level = SOL_SOCKET; 
 38         cmptr->cmsg_type = SCM_RIGHTS; 
 39         cmptr->cmsg_len = CONTROLLEN; 
 40 
 41         *(int *) CMSG_DATA(cmptr) = fd_to_send; 
 42         buf[1] = 0; 
 43     }
 44 
 45     buf[0] = 0; 
 46     if (sendmsg(fd, &msg, 0) != 2) {
 47         free (cmptr); 
 48         return -1; 
 49     }
 50 
 51     free (cmptr); 
 52     return 0; 
 53 }

 

以上是傳送控制代碼部分,重點位於 37-39 行,設定了控制訊息的型別與控制代碼的值。

sendmsg 中的資料訊息部分,用來相容出錯的場景(出錯時可以提供一個-1~-255的錯誤碼,及一段描述資訊),關鍵資訊位於控制部分。

下面來看訊息的接收:

 1 int recv_fd (int fd, uid_t *uidptr, ssize_t (*userfunc) (int, const void*, size_t))
 2 {
 3     struct cmsghdr *cmptr = NULL; 
 4     int newfd, nr, status; 
 5     char *ptr; 
 6     char buf[MAXLINE]; 
 7     struct iovec iov[1]; 
 8     struct msghdr msg; 
 9 
10     status = -1; 
11     newfd = -1; 
12 
13     for (;;) {
14         iov[0].iov_base = buf; 
15         iov[0].iov_len = sizeof (buf); 
16 
17         msg.msg_iov = iov; 
18         msg.msg_iovlen = 1; 
19         msg.msg_name = NULL; 
20         msg.msg_namelen = 0; 
21 
22         if ((cmptr = malloc (CONTROLLEN)) == NULL) {
23             fprintf (stderr, "malloc error\n"); 
24             return -1; 
25         }
26 
27         msg.msg_control = cmptr; 
28         msg.msg_controllen = CONTROLLEN; 
29 
30         if ((nr = recvmsg (fd, &msg, 0)) < 0) { 
31             fprintf (stderr, "recvmsg error\n"); 
32             free (cmptr); 
33             return -1; 
34         } else if (nr == 0) {
35             fprintf (stderr, "connection closed by server\n"); 
36             free (cmptr); 
37             return -1; 
38         }
39 
40         for (ptr = buf; ptr < &buf[nr]; ) {
41             if (*ptr ++ == 0) {
42                 if (ptr != &buf[nr-1]) {
43                     fprintf (stderr, "message format error"); 
44                     free (cmptr); 
45                     return -1; 
46                 }
47 
48                 status = *ptr & 0xff; 
49                 if (status == 0) {
50                     if (msg.msg_controllen != CONTROLLEN) { 
51                         fprintf (stderr, "status = 0 but no fd\n"); 
52                         free (cmptr); 
53                         return -1; 
54                     }
55 
56                     newfd = *(int *) CMSG_DATA(cmptr); 
57                 } else { 
58                     newfd = -status; 
59                 }
60 
61                 nr -= 2; 
62             }
63         }
64 
65         free(cmptr); 
66         if (nr > 0 && (*userfunc)(STDERR_FILENO, buf, nr) != nr)
67             return -1; 
68 
69         if (status >= 0)
70             return newfd; 
71     }
72 
73     return -1; 
74 }

 

接收部分的重點位於 56 行,這裡取得了對方傳遞過來的檔案控制代碼(注意不是簡單的值傳遞!參考上篇文章)

其它一些程式碼則用來處理出錯資訊,當出現錯誤時,呼叫 userfunc 列印錯誤資訊 (使用者一般傳遞 write) 。

另外介面中 uidptr 引數並沒有用,這個是為將來擴充套件預留的。

 

使用之前的 demo (spipe_server.c / spipe_client.c)編譯、執行,輸出結果如下:

./spipe_server ./spipe_client
create pipe 3.4
3 7
create temp file /tmp/outliqA3i with fd 4
seek to head
send fd 4 to peer
recv fd 3, position 0
create temp file /tmp/inaLr30i with fd 4
source: 3 7

seek to head
send fd 4
recv fd 5 from peer, position 0
10

 

可以看到通過新的方式傳遞的檔案控制代碼值也發生了變化(從 4 變為 3),且也需要對檔案偏移進行重置,否則還會掉到之前文章說的那個坑裡。

 

問題出現在增加一些程式碼來傳遞傳送程序憑證(如uid)時,此時傳送方需要傳遞兩個控制子訊息(分別表示控制代碼與憑證),接收方也需要處理兩個子訊息。

新的傳送程式碼如下:

 1 #define MAXLINE 128
 2 #if defined(SCM_CREDS) // on BSD
 3 #define CREDSTRUCT cmsgcred
 4 #define CR_UID cmcred_uid
 5 #define CREDOPT LOCAL_PEERCRED
 6 #define SCM_CREDTYPE SCM_CREDS
 7 #elif defined(SCM_CREDENTIALS)  // on linux
 8 #define CREDSTRUCT ucred
 9 #define CR_UID uid
10 #define CREDOPT SO_PASSCRED
11 #define SCM_CREDTYPE SCM_CREDENTIALS
12 #else
13 #error passing credentials is unsupported!
14 #endif
15 
16 #define RIGHTSLEN CMSG_LEN(sizeof(int))
17 #define CREDSLEN CMSG_LEN(sizeof(struct CREDSTRUCT))
18 #define CONTROLLEN (RIGHTSLEN+CREDSLEN)
19 
20 
21 int send_fd (int fd, int fd_to_send)
22 {
23     struct iovec iov[1]; 
24     struct msghdr msg; 
25     struct cmsghdr *cmptr = NULL; 
26     char buf[2]; 
27     struct CREDSTRUCT *credp; 
28     struct cmsghdr *cmp; 
29 
30     iov[0].iov_base = buf; 
31     iov[0].iov_len = 2; 
32 
33     msg.msg_iov = iov; 
34     msg.msg_iovlen = 1; 
35     msg.msg_name = NULL; 
36     msg.msg_namelen = 0; 
37     msg.msg_flags = 0; 
38 
39     if (fd_to_send < 0) {
40         msg.msg_control = NULL; 
41         msg.msg_controllen = 0; 
42         buf[1] = -fd_to_send; 
43         if (buf[1] == 0)
44             buf[1] = 1; 
45     } else {
46         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
47             fprintf (stderr, "malloc memory failed\n"); 
48             return -1; 
49         }
50 
51         msg.msg_control = cmptr; 
52         msg.msg_controllen = CONTROLLEN; 
53 
54         cmp = cmptr; 
55         cmp->cmsg_level = SOL_SOCKET; 
56         cmp->cmsg_type = SCM_RIGHTS; 
57         cmp->cmsg_len = RIGHTSLEN; 
58         *(int *) CMSG_DATA(cmp) = fd_to_send; 
59 
60         cmp = CMSG_NXTHDR(&msg, cmp); 
61         cmp->cmsg_level = SOL_SOCKET; 
62         cmp->cmsg_type = SCM_CREDTYPE; 
63         cmp->cmsg_len = CREDSLEN; 
64         credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
65 
66 #  if defined(SCM_CREDENTIALS)
67         // only linux need to set members of this struct !
68         credp->uid = getuid (); 
69         credp->gid = getegid (); 
70         credp->pid = getpid (); 
71 #  endif
72         buf[1] = 0; 
73     }
74 
75     buf[0] = 0; 
76     if (sendmsg(fd, &msg, 0) != 2) {
77         free (cmptr); 
78         return -1; 
79     }
80 
81     free (cmptr); 
82     return 0; 
83 }

 

最開始的一些巨集定義,是用來區分 linux 與 bsd 上一些細節,重點在 55-64 行,這兩段程式碼分別設定了控制代碼與憑證。

然後控制訊息的大小 CONTROLLEN 由兩部分訊息的長度(RIGHTSLEN 與 CREDSLEN)累加得到,分配的記憶體也是這麼大。

再來看接收部分:

  1 int recv_fd (int fd, uid_t *uidptr, ssize_t (*userfunc) (int, const void*, size_t))
  2 {
  3     struct cmsghdr *cmptr = NULL; 
  4 
  5     int newfd, nr, status; 
  6     char *ptr; 
  7     char buf[MAXLINE]; 
  8     struct iovec iov[1]; 
  9     struct msghdr msg; 
 10 
 11     status = -1; 
 12     newfd = -1; 
 13 
 14     const int on = -1; 
 15     struct cmsghdr *cmp; 
 16     struct CREDSTRUCT *credp; 
 17     if (setsockopt (fd, SOL_SOCKET, CREDOPT, &on, sizeof(int)) < 0) {
 18         fprintf (stderr, "setsockopt for %d failed\n", CREDOPT); 
 19         return -1; 
 20     }
 21 
 22     for (;;) {
 23         iov[0].iov_base = buf; 
 24         iov[0].iov_len = sizeof (buf); 
 25 
 26         msg.msg_iov = iov; 
 27         msg.msg_iovlen = 1; 
 28         msg.msg_name = NULL; 
 29         msg.msg_namelen = 0; 
 30 
 31         if ((cmptr = malloc (CONTROLLEN)) == NULL) {
 32             fprintf (stderr, "malloc error\n"); 
 33             return -1; 
 34         }
 35 
 36         msg.msg_control = cmptr; 
 37         msg.msg_controllen = CONTROLLEN; 
 38 
 39         if ((nr = recvmsg (fd, &msg, 0)) < 0) { 
 40             fprintf (stderr, "recvmsg error\n"); 
 41             free (cmptr); 
 42             return -1; 
 43         } else if (nr == 0) {
 44             fprintf (stderr, "connection closed by server\n"); 
 45             free (cmptr); 
 46             return -1; 
 47         }
 48 
 49         for (ptr = buf; ptr < &buf[nr]; ) {
 50             if (*ptr ++ == 0) {
 51                 if (ptr != &buf[nr-1]) {
 52                     fprintf (stderr, "message format error"); 
 53                     free (cmptr); 
 54                     return -1; 
 55                 }
 56 
 57                 status = *ptr & 0xff; 
 58                 if (status == 0) {
 59                     if (msg.msg_controllen != CONTROLLEN) { 
 60                         fprintf (stderr, "status = 0 but no fd\n"); 
 61                         free (cmptr); 
 62                         return -1; 
 63                     }
 64 
 65                     for (cmp = CMSG_FIRSTHDR(&msg); cmp != NULL; cmp = CMSG_NXTHDR(&msg, cmp)) { 
 66                         if (cmp->cmsg_level != SOL_SOCKET) {
 67                             fprintf (stderr, "ignore unknown socket level %d\n", cmp->cmsg_level); 
 68                             continue; 
 69                         }
 70 
 71                         switch (cmp->cmsg_type) {
 72                             case SCM_RIGHTS:
 73                                 newfd = *(int *) CMSG_DATA(cmp); 
 74                                 break; 
 75                             case SCM_CREDTYPE:
 76                                 credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
 77                                 *uidptr = credp->CR_UID; 
 78                                 break; 
 79                             default:
 80                                 fprintf (stderr, "ignore unknown msg type %d\n", cmp->cmsg_type); 
 81                                 break; 
 82                         }
 83                     }
 84                 } else { 
 85                     newfd = -status; 
 86                 }
 87 
 88                 nr -= 2; 
 89             }
 90         }
 91 
 92         free(cmptr); 
 93         if (nr > 0 && (*userfunc)(STDERR_FILENO, buf, nr) != nr)
 94             return -1; 
 95 
 96         if (status >= 0)
 97             return newfd; 
 98     }
 99 
100     return -1; 
101 }

 

重點分為兩個部分:

14-20 行,設定 unix domain socket 可以接收憑證資訊;

65-83 行,分別讀取控制訊息中的控制代碼與憑證資訊,這裡我們取了傳送程序的 uid 資訊作為憑證返回給上層呼叫者;

與傳送訊息類似,這裡使用系統提供的 CMSG_FIRSTHDR、CMSG_NXTHDR 在控制訊息中遍歷各個子部分。

 

重新編譯、執行 demo,卻發現出錯了:

./spipe_server ./spipe_client
create pipe 3.4
3 7
create temp file /tmp/outgQY1Y4 with fd 4
seek to head
send fd 4 to peer
recv fd 3, uid 500, position 0
create temp file /tmp/invVgKW4 with fd 4
source: 3 7

seek to head
connection closed by server
recv fd from peer failed, error -1

 

從輸出日誌看,第一次從 server 發往 client 的控制代碼及憑證是可以的(line 7),再之後 client 處理完訊息回傳時,就出錯了。

首先定位出錯程式碼位置,在 client 回傳這裡 (send_fd),加入一些日誌:

 

 1         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
 2             fprintf (stderr, "malloc memory failed\n"); 
 3             return -1; 
 4         }
 5 
 6         msg.msg_control = cmptr; 
 7         msg.msg_controllen = CONTROLLEN; 
 8 
 9         cmp = cmptr; 
10         cmp->cmsg_level = SOL_SOCKET; 
11         cmp->cmsg_type = SCM_RIGHTS; 
12         cmp->cmsg_len = RIGHTSLEN; 
13         *(int *) CMSG_DATA(cmp) = fd_to_send; 
14         fprintf (stderr, "add fd with len %d\n", RIGHTSLEN); 
15 
16         cmp = CMSG_NXTHDR(&msg, cmp); 
17         cmp->cmsg_level = SOL_SOCKET; 
18         cmp->cmsg_type = SCM_CREDTYPE; 
19         cmp->cmsg_len = CREDSLEN; 
20         credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
21         fprintf (stderr, "add credential with len %d\n", CREDSLEN); 
22 
23 #  if defined(SCM_CREDENTIALS)
24         // only linux need to set members of this struct !
25         credp->uid = getuid (); 
26         credp->gid = getegid (); 
27         credp->pid = getpid (); 
28         fprintf (stderr, "set uid %d, gid %d, pid %d\n", credp->uid, credp->gid, credp->pid);
29 #  endif
30         buf[1] = 0; 

 

標黃的是新加入的輸出日誌,再次編譯執行:

./spipe_server ./spipe_client
create pipe 3.4
3 7
create temp file /tmp/outivt2Og with fd 4
seek to head
add fd with len 16
add credential with len 24
set uid 500, gid 500, pid 12071
send fd 4 to peer
recv fd 3, uid 500, position 0
create temp file /tmp/inHqRwMg with fd 4
source: 3 7

seek to head
add fd with len 16
connection closed by server
recv fd from peer failed, error -1

 

可以看到,第一次傳遞時,這三條日誌全都正確輸出了,而回傳時,只輸出了第一條日誌。

所以明顯是在第一條日誌與第二條日誌之間的程式碼出了問題。左看右看,看不出這塊有什麼問題,難道系統提供的 CMSG_NXTHDR 會出錯?

這邊再加兩條日誌:

 1         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
 2             fprintf (stderr, "malloc memory failed\n"); 
 3             return -1; 
 4         }
 5 
 6         msg.msg_control = cmptr; 
 7         msg.msg_controllen = CONTROLLEN; 
 8 
 9         cmp = cmptr; 
10         cmp->cmsg_level = SOL_SOCKET; 
11         cmp->cmsg_type = SCM_RIGHTS; 
12         cmp->cmsg_len = RIGHTSLEN; 
13         *(int *) CMSG_DATA(cmp) = fd_to_send; 
14         fprintf (stderr, "add fd with len %d\n", RIGHTSLEN); 
15         fprintf (stderr, "cmsghdr = %d, cmsglen = %d, after align = %d, control len = %d\n", sizeof(struct cmsghdr), CREDSLEN, CMSG_ALIGN(CREDSLEN), CONTROLLEN); 
16 
17         cmp = CMSG_NXTHDR(&msg, cmp); 
18         fprintf (stderr, "cmp = %p\n", cmp); 
19         cmp->cmsg_level = SOL_SOCKET; 
20         cmp->cmsg_type = SCM_CREDTYPE; 
21         cmp->cmsg_len = CREDSLEN; 
22         credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
23         fprintf (stderr, "add credential with len %d\n", CREDSLEN); 
24 
25 #  if defined(SCM_CREDENTIALS)
26         // only linux need to set members of this struct !
27         credp->uid = getuid (); 
28         credp->gid = getegid (); 
29         credp->pid = getpid (); 
30         fprintf (stderr, "set uid %d, gid %d, pid %d\n", credp->uid, credp->gid, credp->pid);
31 #  endif
32         buf[1] = 0; 

 

第二條日誌是主要懷疑的地方,看指標是否為空;第一條日誌則是懷疑塊大小計算有誤,導致分配的記憶體不夠大,指標遞增時出現了範圍錯誤,所以這裡列印各種長度做驗證。

再次執行後,又多了一些輸出:

./spipe_server ./spipe_client
create pipe 3.4
3 7
create temp file /tmp/out7UgSYZ with fd 4
seek to head
add fd with len 16
cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
cmp = 0x9ded018
add credential with len 24
set uid 500, gid 500, pid 12100
send fd 4 to peer
recv fd 3, uid 500, position 0
create temp file /tmp/inC3nyWZ with fd 4
source: 3 7

seek to head
add fd with len 16
cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
cmp = (nil)
connection closed by server
recv fd from peer failed, error -1

 

神奇的地方出現了,同樣的程式碼,相同的尺寸,第一次指標正常;第二次就為空了!

崩潰點找到了,但是還是一頭霧水,看起來資料塊都對齊了,計算也沒毛病,難道是這個系統提供的巨集 (CMSG_NXTHDR) 出問題了嗎?

翻看標頭檔案,找到這一段的定義 (我所在的系統,位於 /usr/include/bits/socket.h (L311)):

 1 __EXTERN_INLINE struct cmsghdr *
 2 __NTH (__cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg))
 3 {
 4   if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr))
 5     /* The kernel header does this so there may be a reason.  */
 6     return 0;
 7 
 8   __cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg
 9                    + CMSG_ALIGN (__cmsg->cmsg_len));
10   if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control
11                     + __mhdr->msg_controllen)
12       || ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len)
13       > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)))
14     /* No more entries.  */
15     return 0;
16   return __cmsg;
17 }

 

這段 INLINE 函式主要包含三個判斷,

1)子訊息長度小於訊息頭長度,返回 null;

2)下一個子訊息的訊息頭超出訊息尾部,返回null;

3)下一個子訊息的訊息體超出訊息尾部,返回null;

直接修改系統程式碼不方便,將這個函式拷貝到本地並重全名為 my_cmsg_nxthdr,在各個判斷下面新增日誌輸出:

 1 struct cmsghdr *my_cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg)
 2 {
 3   if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr)) {
 4     /* The kernel header does this so there may be a reason.  */
 5     fprintf (stderr, "in step1\n"); 
 6     return 0;
 7   }
 8 
 9   fprintf (stderr, "%p: cmsg_len %u, cmsg_level %d, cmsg_type %d\n", __cmsg, __cmsg->cmsg_len, __cmsg->cmsg_level, __cmsg->cmsg_type); 
10   __cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len));
11   if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)) {
12       fprintf (stderr, "in step2\n"); 
13       return 0; 
14   }
15 
16   fprintf (stderr, "%p: cmsg_len %u, cmsg_level %d, cmsg_type %d\n", __cmsg, __cmsg->cmsg_len, __cmsg->cmsg_level, __cmsg->cmsg_type); 
17   if (((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len) > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen))) {
18     /* No more entries.  */
19       fprintf (stderr, "in step3\n"); 
20       fprintf (stderr, "msg len %d, after align %d, msg control %d\n", __cmsg->cmsg_len, CMSG_ALIGN(__cmsg->cmsg_len), __mhdr->msg_controllen); 
21     return 0;
22   }
23 
24   fprintf (stderr, "in final step\n"); 
25   return __cmsg;
26 }

 

為了便於根據不同的判斷條件輸出日誌,這裡對判斷條件進行了拆分。

再次執行 demo,輸出如下:

./spipe_server ./spipe_client
create pipe 3.4
3 7
create temp file /tmp/outh7NhIs with fd 4
seek to head
add fd with len 16
cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
0x9336008: cmsg_len 16, cmsg_level 1, cmsg_type 1
0x9336018: cmsg_len 0, cmsg_level 0, cmsg_type 0
in final step
cmp = 0x9336018
add credential with len 24
set uid 500, gid 500, pid 12171
send fd 4 to peer
recv fd 3, uid 500, position 0
create temp file /tmp/inoJMmKs with fd 4
source: 3 7

seek to head
add fd with len 16
cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
0x904d008: cmsg_len 16, cmsg_level 1, cmsg_type 1
0x904d018: cmsg_len 500, cmsg_level 500, cmsg_type 16
in step3
msg len 500, after align 500, msg control 40
cmp = (nil)
connection closed by server
recv fd from peer failed, error -1

 

原來是第三個判斷出現了問題(Line 24)!

訊息總長度是 16 + 24 = 40,而這裡的第二個子訊息單個的長度達到 500,明顯越界了。

但是第二個子訊息的長度明明是 24 呀,哪裡跑出來的 500 呢?

而且它的其它欄位也明顯不對,例如訊息 level 也是 500,訊息型別是 16 !

 

初步可以確定是這塊記憶體被弄亂了,而從前面列印的訊息指標(0x904d008 與 0x904d018)看,分配的大小是沒問題的,因此記憶體越界問題先排除掉;

其次是我們設定好的內容……等等……我們好像還沒有設定第二個子訊息的內容!!

……

垃圾資料!!

……

malloc 之後沒有清空的垃圾資料!!

……

這也是第一次呼叫沒問題而第二次掉坑裡的原因,隨著系統記憶體的分配回收而存在一定的隨機性!

 

找到原因之後,修改就簡單了,可以將 malloc 替換為 calloc,或者簡單的加一句 memset 來清空記憶體:

 1         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
 2             fprintf (stderr, "malloc memory failed\n"); 
 3             return -1; 
 4         }
 5 
 6         // important on linux, garbage data may mess cmsg_len fields, 
 7         // and cause CMSG_NXTHDR return null on protection.
 8         memset (cmptr, 0, CONTROLLEN); 
 9         msg.msg_control = cmptr; 
10         msg.msg_controllen = CONTROLLEN; 

 

再次執行 demo,一切正常:

./spipe_server ./spipe_client
create pipe 3.4
3 7
create temp file /tmp/outqsTYkp with fd 4
seek to head
add fd with len 16
cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
0x814c008: cmsg_len 16, cmsg_level 1, cmsg_type 1
0x814c018: cmsg_len 0, cmsg_level 0, cmsg_type 0
in final step
cmp = 0x814c018
add credential with len 24
set uid 500, gid 500, pid 12207
send fd 4 to peer
recv fd 3, uid 500, position 0
create temp file /tmp/in3ntkip with fd 4
source: 3 7

seek to head
add fd with len 16
cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
0x8389008: cmsg_len 16, cmsg_level 1, cmsg_type 1
0x8389018: cmsg_len 0, cmsg_level 0, cmsg_type 0
in final step
cmp = 0x8389018
add credential with len 24
set uid 500, gid 500, pid 12208
send fd 4
recv fd 5, uid 500 from peer, position 0
10

 

通過這次 debug,找到了經典的 APUE 例子中的一個瑕疵 (隨機性比較大,大師剛好沒有遇到而已,可能你的機器也不復現)。

不過回過頭來看這個場景,也不能全算在 coder 身上,我感覺系統提供的這個 CMSG_NXTHDR 巨集也頗成問題:

如果我呼叫這個之前還沒有設定下一個子訊息,難道還不准我使用了麼? 過多的檢查反而弄巧成拙,總之一句話:差評! 哈