最近一项目老是发现在关闭UDT,就是调用udt::close之后总会出现UDT内部的一个链表在删除错误。
原因就是指针非法,这个看似内存错误,可是哪点出了问题呢?一个简单的想法就是什么时候申请了,但是关闭时却没有释放。
对于没有释放的内存访问是没有问题的,对于释放了的资源访问就会有问题,这里就要找为什么释放了还在调用呢?
主要是udt::connect里面有一个接收队列里面将检查状态,在连接的时候添加进去了,然后m_bConnecting为true,可是在某一次更新状态时,将这个设置为false了,遗憾的是,他在close的时候需要检查 这个变量,如果为真才调用删除接收队列,所以造成了在close的并没有从队列里面删除,close完了之后,在队列线程检查的时候会发现一个被释放了的资源,调用成员变量函数时会内存错误。不错,这个是UDT的BUG。我去啊,害得人好苦。
void CUDT::connect(const sockaddr* serv_addr)
{
CGuard cg(m_ConnectionLock);
if (!m_bOpened)
throw CUDTException(5, 0, 0);
if (m_bListening)
throw CUDTException(5, 2, 0);
if (m_bConnecting || m_bConnected)
throw CUDTException(5, 2, 0);
// record peer/server address
delete m_pPeerAddr;
m_pPeerAddr = (AF_INET == m_iIPversion) ? (sockaddr*)new sockaddr_in : (sockaddr*)new sockaddr_in6;
memcpy(m_pPeerAddr, serv_addr, (AF_INET == m_iIPversion) ? sizeof(sockaddr_in) : sizeof(sockaddr_in6));
// register this socket in the rendezvous queue
// RendezevousQueue is used to temporarily store incoming handshake, non-rendezvous connections also require this function
uint64_t ttl = 3000000;
if (m_bRendezvous)
ttl *= 10;
ttl += CTimer::getTime();
//这里注册了队列,注意了,需要关闭从队列中删除。
m_pRcvQueue->registerConnector(m_SocketID, this, m_iIPversion, serv_addr, ttl);
// This is my current configurations
m_ConnReq.m_iVersion = m_iVersion;
m_ConnReq.m_iType = m_iSockType;
m_ConnReq.m_iMSS = m_iMSS;
m_ConnReq.m_iFlightFlagSize = (m_iRcvBufSize < m_iFlightFlagSize)? m_iRcvBufSize : m_iFlightFlagSize;
m_ConnReq.m_iReqType = (!m_bRendezvous) ? 1 : 0;
m_ConnReq.m_iID = m_SocketID;
CIPAddress::ntop(serv_addr, m_ConnReq.m_piPeerIP, m_iIPversion);
// Random Initial Sequence Number
srand((unsigned int)CTimer::getTime());
m_iISN = m_ConnReq.m_iISN = (int32_t)(CSeqNo::m_iMaxSeqNo * (double(rand()) / RAND_MAX));
m_iLastDecSeq = m_iISN – 1;
m_iSndLastAck = m_iISN;
m_iSndLastDataAck = m_iISN;
m_iSndCurrSeqNo = m_iISN – 1;
m_iSndLastAck2 = m_iISN;
m_ullSndLastAck2Time = CTimer::getTime();
// Inform the server my configurations.
CPacket request;
char* reqdata = new char [m_iPayloadSize];
request.pack(0, NULL, reqdata, m_iPayloadSize);
// ID = 0, connection request
request.m_iID = 0;
int hs_size = m_iPayloadSize;
m_ConnReq.serialize(reqdata, hs_size);
request.setLength(hs_size);
m_pSndQueue->sendto(serv_addr, request);
m_llLastReqTime = CTimer::getTime();
m_bConnecting = true; //这里置为true表示 在连接
// asynchronous connect, return immediately
if (!m_bSynRecving)
{
delete [] reqdata;
return;
}
..
..
void CUDT::close()
{
if (!m_bOpened)
return;
if (0 != m_Linger.l_onoff)
{
uint64_t entertime = CTimer::getTime();
while (!m_bBroken && m_bConnected && (m_pSndBuffer->getCurrBufSize() > 0) && (CTimer::getTime() – entertime < m_Linger.l_linger * 1000000ULL))
{
// linger has been checked by previous close() call and has expired
if (m_ullLingerExpiration >= entertime)
break;
if (!m_bSynSending)
{
// if this socket enables asynchronous sending, return immediately and let GC to close it later
if (0 == m_ullLingerExpiration)
m_ullLingerExpiration = entertime + m_Linger.l_linger * 1000000ULL;
return;
}
#ifndef WIN32
timespec ts;
ts.tv_sec = 0;
ts.tv_nsec = 1000000;
nanosleep(&ts, NULL);
#else
Sleep(1);
#endif
}
}
// remove this socket from the snd queue
if (m_bConnected)
m_pSndQueue->m_pSndUList->remove(this);
// trigger any pending IO events.
s_UDTUnited.m_EPoll.update_events(m_SocketID, m_sPollID, UDT_EPOLL_ERR, true);
// then remove itself from all epoll monitoring
try
{
for (set<int>::iterator i = m_sPollID.begin(); i != m_sPollID.end(); ++ i)
s_UDTUnited.m_EPoll.remove_usock(*i, m_SocketID);
}
catch (…)
{
}
if (!m_bOpened)
return;
// Inform the threads handler to stop.
m_bClosing = true;
CGuard cg(m_ConnectionLock);
// Signal the sender and recver if they are waiting for data.
releaseSynch();
if (m_bListening)
{
m_bListening = false;
m_pRcvQueue->removeListener(this);
}
//else if (m_bConnecting) //注意了这句是我主动注释掉的,因为在关闭的时候这个值为false,不会执行下面这句,
//杯具就会发生了。
{
m_pRcvQueue->removeConnector(m_SocketID);
}
if (m_bConnected)
{
if (!m_bShutdown)
sendCtrl(5);
m_pCC->close();
// Store current connection information.
CInfoBlock ib;
ib.m_iIPversion = m_iIPversion;
CInfoBlock::convert(m_pPeerAddr, m_iIPversion, ib.m_piIP);
ib.m_iRTT = m_iRTT;
ib.m_iBandwidth = m_iBandwidth;
m_pCache->update(&ib);
m_bConnected = false;
}
// waiting all send and recv calls to stop
CGuard sendguard(m_SendLock);
CGuard recvguard(m_RecvLock);
// CLOSED.
m_bOpened = false;
}
..大家可能会问为什么在,在close函数那个m_bConnecting会false了呢?肯定在哪里执行更改,但又没有从队列中删除。
有兴趣的问题可以找找看,其实就知道了,在CRcvQueue当中会有一个更新连接状态的函数,这里面有一个坑爹的设置变量
void CRendezvousQueue::updateConnStatus()
{
if (m_lRendezvousID.empty())
return;
CGuard vg(m_RIDVectorLock);
for (list<CRL>::iterator i = m_lRendezvousID.begin(); i != m_lRendezvousID.end(); ++ i)
{
// avoid sending too many requests, at most 1 request per 250ms
if (CTimer::getTime() – i->m_pUDT->m_llLastReqTime > 250000)
{
if (CTimer::getTime() >= i->m_ullTTL)
{
// connection timer expired, acknowledge app via epoll
i->m_pUDT->m_bConnecting = false;//你这里不是坑爹吗?超时就设置了,可是关闭时候又需要这个值啊
CUDT::s_UDTUnited.m_EPoll.update_events(i->m_iID, i->m_pUDT->m_sPollID, UDT_EPOLL_ERR, true);
continue;
}
本文转载自:http://www.paobuke.com/develop/pbk1393.html