Skip to content

Commit 29bcd9b

Browse files
z-wenlinzhangyue1818
authored andcommitted
Add hint message for MTU settings when IC reports ERROR "Failed to send packet" (#17164)
* Add hint message for MTU settings when IC reports ERROR "Failed to send packet".
1 parent e349ec5 commit 29bcd9b

File tree

4 files changed

+50
-4
lines changed

4 files changed

+50
-4
lines changed

contrib/interconnect/udp/ic_faultinjection.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ typedef enum {
8383
FINC_OS_NET_INTERFACE = 19,
8484
FINC_OS_MEM_INTERFACE = 20,
8585
FINC_OS_CREATE_THREAD = 21,
86+
FINC_PKT_TOO_LONG = 22,
8687

8788
/* These are used to inject network faults. */
8889
FINC_NET_RECV_ERROR = 23,
@@ -301,6 +302,13 @@ testmode_sendto(const char *caller_name, int socket, const void *buffer,
301302
errno = EFAULT;
302303
return -1;
303304

305+
case FINC_PKT_TOO_LONG:
306+
if (!FINC_HAS_FAULT(fault_type) || !is_pkt)
307+
break;
308+
write_log("inject fault to sendto: FINC_PKT_TOO_LONG");
309+
errno = EMSGSIZE;
310+
return -1;
311+
304312
default:
305313
break;
306314
}

contrib/interconnect/udp/ic_udpifc.c

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,10 +1836,23 @@ sendControlMessage(icpkthdr *pkt, int fd, struct sockaddr *addr, socklen_t peerL
18361836
if (gp_interconnect_full_crc)
18371837
addCRC(pkt);
18381838

1839-
char errDetail[100];
1840-
snprintf(errDetail, sizeof(errDetail), "Send control message: got error with seq %u", pkt->seq);
1841-
/* Retry for infinite times since we have no retransmit mechanism for control message */
1842-
n = sendtoWithRetry(fd, (const char *) pkt, pkt->len, 0, addr, peerLen, -1, errDetail);
1839+
/* retry 10 times for sending control message */
1840+
int counter = 0;
1841+
while (counter < 10)
1842+
{
1843+
counter++;
1844+
n = sendto(fd, (const char *) pkt, pkt->len, 0, addr, peerLen);
1845+
if (n < 0)
1846+
{
1847+
if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
1848+
continue;
1849+
else {
1850+
write_log("sendcontrolmessage: got errno %d", errno);
1851+
return;
1852+
}
1853+
}
1854+
break;
1855+
}
18431856
if (n < pkt->len)
18441857
write_log("sendcontrolmessage: got error %d errno %d seq %d", n, errno, pkt->seq);
18451858
}
@@ -4877,6 +4890,19 @@ sendtoWithRetry(int socket, const void *message, size_t length,
48774890
return n;
48784891
}
48794892

4893+
/*
4894+
* If the OS can detect an MTU issue on the host network interfaces, we
4895+
* would get EMSGSIZE here. So, bail with a HINT about checking MTU.
4896+
*/
4897+
if (errno == EMSGSIZE)
4898+
{
4899+
ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
4900+
errmsg("Interconnect error writing an outgoing packet: %m"),
4901+
errdetail("error during sendto() call (error:%d).\n"
4902+
"%s", save_errno, errDetail),
4903+
errhint("check if interface MTU is equal across the cluster and lower than gp_max_packet_size")));
4904+
}
4905+
48804906
ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
48814907
errmsg("Interconnect error writing an outgoing packet: %m"),
48824908
errdetail("error during sendto() call (error:%d).\n"

src/test/regress/expected/icudp/icudp_full.out

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,14 @@ SELECT system_call_fault_injection_test();
544544

545545
(1 row)
546546

547+
-- inject faults for errMsgSize when packet is too long.
548+
SET gp_udpic_fault_inject_bitmap = 4194304;
549+
SELECT system_call_fault_injection_test();
550+
system_call_fault_injection_test
551+
----------------------------------
552+
553+
(1 row)
554+
547555
-- disable ipv6 may increase the code coverage.
548556
SET gp_udpic_network_disable_ipv6 = 1;
549557
SELECT system_call_fault_injection_test();

src/test/regress/sql/icudp/icudp_full.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,10 @@ $$;
276276
SET gp_udpic_fault_inject_bitmap = 524288;
277277
SELECT system_call_fault_injection_test();
278278

279+
-- inject faults for errMsgSize when packet is too long.
280+
SET gp_udpic_fault_inject_bitmap = 4194304;
281+
SELECT system_call_fault_injection_test();
282+
279283
-- disable ipv6 may increase the code coverage.
280284
SET gp_udpic_network_disable_ipv6 = 1;
281285
SELECT system_call_fault_injection_test();

0 commit comments

Comments
 (0)