complete redesign of watchdog code

This commit is contained in:
John Winans
1992-04-06 14:40:03 +00:00
parent d567298c82
commit 3ee58625f1

View File

@@ -57,9 +57,11 @@
#include <iosLib.h>
#include <taskLib.h>
#include <memLib.h>
#include <semLib.h>
#include <rngLib.h>
#include <wdLib.h>
#include <wdLib.h>
#include <tickLib.h>
#include <vme.h>
#include <task_params.h>
@@ -409,7 +411,6 @@ int link;
return(0);
}
/******************************************************************************
*
* This interrupt handler is invoked when the BitBus controller has completed
@@ -436,19 +437,14 @@ checkLink(link)
int link;
{
if (link<0 || link>BB_NUM_LINKS)
{
/* link number out of range */
return(ERROR);
}
return(ERROR); /* link number out of range */
if (pXvmeLink[link] == NULL)
{
/* link number has no card installed */
return(ERROR);
}
return(ERROR); /* link number has no card installed */
return(OK);
}
/******************************************************************************
*
* Watchdogs are running when ever the busy list has any elements in it.
@@ -492,6 +488,7 @@ int link;
int rxTCount; /* byte counter for data in rxHead */
unsigned char ch;
struct dpvtBitBusHead *rxDpvtHead; /* for message currently receiving */
int lockKey; /* used for intLock calls */
rxMsg = (unsigned char *) NULL;
rxStatus = BBRX_HEAD;
@@ -504,17 +501,23 @@ int link;
if ((pXvmeLink[link]->bbRegs->fifo_stat & XVME_RFNE) == 0)
{
/* Enable interrupts and check again because xycom blew it */
lockKey = intLock();
pXvmeLink[link]->bbRegs->stat_ctl = XVME_ENABLE_INT | XVME_RX_INT;
intUnlock(lockKey);
while (((pXvmeLink[link]->bbRegs->fifo_stat & XVME_RFNE) == 0) && (pXvmeLink[link]->abortFlag == 0))
{
/* Re-enable ints here each time in case board got reset */
lockKey = intLock();
pXvmeLink[link]->bbRegs->stat_ctl = XVME_ENABLE_INT | XVME_RX_INT;
intUnlock(lockKey);
semTake(pXvmeLink[link]->rxInt, WAIT_FOREVER); /* wait for groceries */
}
/* Disable RX Interrupts (prevents unnecessary context switching) */
lockKey = intLock();
pXvmeLink[link]->bbRegs->stat_ctl = 0;
intUnlock(lockKey);
}
if (pXvmeLink[link]->abortFlag == 0)
{
@@ -551,6 +554,9 @@ int link;
/* Delete the node from the list */
listDel(&(pXvmeLink[link]->pbbLink->busyList), rxDpvtHead);
/* decrement the number of outstanding messages to the node */
(pXvmeLink[link]->pbbLink->deviceStatus[rxDpvtHead->txMsg.node])--;
rxDpvtHead->rxMsg.length = rxHead[0];
rxDpvtHead->rxMsg.route = rxHead[1];
@@ -616,9 +622,6 @@ int link;
if (bbDebug>24)
printf("xvmeRxTask(%d):RX command byte = %02.2X\n", link, rxDpvtHead->rxCmd);
/* decrement the number of outstanding messages to the node */
(pXvmeLink[link]->pbbLink->deviceStatus[rxDpvtHead->rxMsg.node])--;
if (rxDpvtHead->finishProc != NULL)
{
if (bbDebug>8)
@@ -626,8 +629,8 @@ int link;
callbackRequest(rxDpvtHead); /* schedule completion processing */
}
/* If there is a semaphore for synchronous I/O, unlock it */
if (rxDpvtHead->syncLock != NULL)
FASTUNLOCK(rxDpvtHead->syncLock);
if (rxDpvtHead->psyncSem != NULL)
semGive(*(rxDpvtHead->psyncSem));
/* If busy list is empty, stop the dog */
if (pXvmeLink[link]->pbbLink->busyList.head == NULL)
@@ -647,6 +650,14 @@ int link;
}
else
{ /* Link abort state is active reset receiver link status now */
if (rxDpvtHead != NULL)
{ /* This xact is not on the busy list, put it back on */
FASTLOCK(&(pXvmeLink[link]->pbbLink->busyList.sem));
(pXvmeLink[link]->pbbLink->deviceStatus[rxDpvtHead->txMsg.node])++;
listAddTail(&(pXvmeLink[link]->pbbLink->busyList), rxDpvtHead);
FASTUNLOCK(&(pXvmeLink[link]->pbbLink->busyList.sem));
}
rxMsg = (unsigned char *) NULL;
rxStatus = BBRX_HEAD;
rxTCount = 0;
@@ -668,6 +679,27 @@ int link;
}
}
/******************************************************************************
*
* A user callable link resetter. This sets a flag and releases the dog
* task to reset the link.
*
******************************************************************************/
int
bbReset(link)
int link;
{
if (checkLink(link) != ERROR)
{
pXvmeLink[link]->pbbLink->nukeEm = 1;
semGive(pXvmeLink[link]->watchDogSem);
}
else
printf("Link %d not installed.\n", link);
return(0);
}
/******************************************************************************
*
******************************************************************************/
@@ -679,22 +711,27 @@ int link;
struct dpvtBitBusHead *pnode;
struct dpvtBitBusHead *npnode;
unsigned long now;
int tixPerSecond;
SEM_ID syncSem;
struct dpvtBitBusHead resetNode;
unsigned char resetNodeData; /* 1-byte data field for RAC_OFFLINE */
/*
* Hand-craft a RAC_OFFLINE message to use when a message times out.
*
* NOTE that having only one copy is OK provided that this message is
* sent immediately following the RAC_RESET_SLAVE message.
*/
tixPerSecond = sysClkRateGet(); /* What is the timer clock rate? */
resetNode.finishProc = NULL;
resetNode.syncLock = NULL;
resetNode.link = link;
/* init the SEM used when sending the reset message */
syncSem = semBCreate(SEM_EMPTY, SEM_Q_PRIORITY);
/*
* Hand-craft a RAC_OFFLINE message to use when a message times out.
* NOTE that having only one copy is OK provided that the dog waits for
* a response before sending it again!
*/
resetNode.finishProc = NULL; /* no callback routine used */
resetNode.psyncSem = &syncSem;/* do a semGive on this SEM when done sending */
resetNode.link = link; /* which bitbus link to send message out on */
resetNode.rxMaxLen = 7; /* Chop off the response... we don't care */
resetNode.ageLimit = 20; /* Time to wait on response */
resetNode.ageLimit = tixPerSecond*100; /* make sure this never times out */
resetNode.txMsg.length = 8;
resetNode.txMsg.route = BB_STANDARD_TX_ROUTE;
@@ -705,102 +742,120 @@ int link;
plink = pXvmeLink[link]->pbbLink;
plink->nukeEm = 0; /* Make sure the nuke status is clear */
while(1)
{
semTake(pXvmeLink[link]->watchDogSem, WAIT_FOREVER);
now = tickGet(); /* what time is it? */
if (plink->nukeEm != 0)
printf("Bitbus manual reset being issued on link %d\n", link);
if (bbDebug>4)
printf("xvmeWdTask(%d): (Watchdog) checking busy list\n", link);
FASTLOCK(&(plink->busyList.sem));
pXvmeLink[link]->rxAbortAck = 0; /* In case we need to use them */
pXvmeLink[link]->txAbortAck = 0;
now = tickGet(); /* what time is it? */
if (plink->nukeEm != 0)
{ /* set abort status and wait for the abort acks */
pXvmeLink[link]->abortFlag = 1;
/* wake up the Tx task so it can observe the abort status */
semGive(plink->linkEventSem);
/* wake up the Rx task so it can observe the abort ststus */
semGive(pXvmeLink[link]->rxInt);
/* sleep until abort ack from Tx & Rx tasks */
while ((pXvmeLink[link]->rxAbortAck == 0) && (pXvmeLink[link]->txAbortAck == 0))
taskDelay(RESET_POLL_TIME);
}
/*
* Run thru entire busy list to see if there are any transactions
* that have been waiting on a response for too long a period.
*/
FASTLOCK(&(plink->busyList.sem));
pnode = plink->busyList.head;
while (pnode != NULL)
{
if (pnode->retire <= now)
{ /* This node has been on the list too long. */
npnode = pnode->next; /* remember where we were in the list */
npnode = pnode->next; /* remember where we were in the list */
if ((plink->nukeEm != 0) || (pnode->retire <= now))
{
/* Get rid of the request and set error status etc... */
listDel(&(plink->busyList), pnode);
/* Get rid of the request and set error status etc... */
listDel(&(plink->busyList), pnode);
/*if (bbDebug)*/
printf("xvmeWdTask(%d): TIMEOUT on xact 0x%08.8X\n", link, pnode);
/*if (bbDebug)*/
printf("xvmeWdTask(%d): TIMEOUT on xact 0x%08.8X\n", link, pnode);
(plink->deviceStatus[pnode->txMsg.node])--; /* fix device status */
pnode->status = BB_TIMEOUT;
if (pnode == &resetNode)
{ /* YIKES! it is my OWN message! Link is totaly dead! */
printf("BitBus link %d dead... Rebooting interface now\n", link);
/* Release a completion lock if one was spec'd */
if (pnode->psyncSem != NULL)
semGive(*(pnode->psyncSem));
/* Make the callbackRequest if one was spec'd */
if(pnode->finishProc != NULL)
callbackRequest(pnode); /* schedule completion processing */
/* set abort status for the link to be reset */
pXvmeLink[link]->rxAbortAck = 0; /* clear the ACK flags */
pXvmeLink[link]->txAbortAck = 0;
/* If we are not going to reboot the link... */
if (plink->nukeEm == 0)
{ /* Send out a RAC_NODE_OFFLINE to the controller */
FASTUNLOCK(&(plink->busyList.sem)); /* so Tx and Rx can work */
pXvmeLink[link]->abortFlag = 1;
resetNodeData = pnode->txMsg.node; /* mark the node number */
printf("issuing a node offline for link %d node %d\n", link, resetNodeData);
/* wake up the Tx task so it can observe the abort status */
semGive(plink->linkEventSem);
FASTLOCK(&(plink->queue[BB_Q_HIGH].sem)); /* queue the message */
listAddHead(&(plink->queue[BB_Q_HIGH]), &resetNode);
FASTUNLOCK(&(plink->queue[BB_Q_HIGH].sem));
/* wake up the Rx task so it can observe the abort ststus */
semGive(pXvmeLink[link]->rxInt);
semGive(plink->linkEventSem); /* Tell TxTask to send the message */
/* sleep until abort ack from Tx & Rx tasks */
while ((pXvmeLink[link]->rxAbortAck == 0) && (pXvmeLink[link]->txAbortAck == 0))
{
taskDelay (RESET_POLL_TIME);
if (semTake(syncSem, tixPerSecond/4) == ERROR)
{
printf("xvmeWdTask(%d): link dead, trying manual reboot\n", link);
plink->nukeEm = 1;
pXvmeLink[link]->abortFlag = 1; /* Start the abort sequence */
semGive(plink->linkEventSem); /* Let Tx task observe abort status */
semGive(pXvmeLink[link]->rxInt); /* Let Rx task observe abort ststus */
/* sleep until abort ack from Tx & Rx tasks */
while ((pXvmeLink[link]->rxAbortAck == 0) && (pXvmeLink[link]->txAbortAck == 0))
taskDelay(RESET_POLL_TIME);
}
xvmeReset(pXvmeLink[link]->bbRegs, link);
/* clear the abort_flag */
pXvmeLink[link]->abortFlag = 0;
/* signal the RxTask to re-enable its interrupts */
semGive(pXvmeLink[link]->rxInt);
/* signal the TxTask to wake up & check its queues */
semGive(plink->linkEventSem);
/* Start over since released the busy list */
FASTLOCK(&(plink->busyList.sem));
npnode = plink->busyList.head;
}
else
{
pnode->status = BB_TIMEOUT;
/* Release a completion lock if one was spec'd */
if (pnode->syncLock != NULL)
FASTUNLOCK(pnode->syncLock);
/* Make the callbackRequest if one was spec'd */
if(pnode->finishProc != NULL)
callbackRequest(pnode); /* schedule completion processing */
/* Have to reset the master so it won't wait on a response */
resetNodeData = pnode->txMsg.node; /* mark the node number */
/* No other code takes both the busy & queue sems at same time, */
/* so it is ok to do so here. */
FASTLOCK(&(plink->queue[BB_Q_HIGH].sem));
listAddHead(&(plink->queue[BB_Q_HIGH]), &resetNode);
FASTUNLOCK(&(plink->queue[BB_Q_HIGH].sem));
}
pnode = npnode; /* get @ of next node in list */
}
else
pnode = pnode->next; /* get @ of next node in list */
pnode = npnode;
}
/* Restart the timer if the list is not empty */
if (plink->busyList.head != NULL)
{
if (bbDebug>5)
printf("xvmeWdTask(%d): restarting watch dog timer\n", link);
{ /* Restart the dog timer */
wdStart(pXvmeLink[link]->watchDogId, plink->busyList.head->retire - now,
xvmeTmoHandler, link);
if (bbDebug>5)
printf("xvmeWdTask(%d): restarting watch dog timer\n", link);
wdStart(pXvmeLink[link]->watchDogId, plink->busyList.head->retire - now, xvmeTmoHandler, link);
}
FASTUNLOCK(&(plink->busyList.sem)); /* don't need any more */
/* Finish the link reboot if necessary */
if (plink->nukeEm != 0)
{
xvmeReset(pXvmeLink[link]->bbRegs, link);
/* clear the abort_flag */
pXvmeLink[link]->abortFlag = 0;
plink->nukeEm = 0;
}
FASTUNLOCK(&(plink->busyList.sem));
}
}
@@ -841,7 +896,7 @@ int link;
/* sent immediately following the RAC_RESET_SLAVE message. */
resetNode.finishProc = NULL;
resetNode.syncLock = NULL;
resetNode.psyncSem = NULL;
resetNode.link = link;
resetNode.rxMaxLen = 7; /* chop it off */
resetNode.ageLimit = 10;
@@ -870,8 +925,8 @@ int link;
/* if (bbDebug) */
printf("xvmeTxTask(%d): restarting after abort\n", link);
}
semTake(plink->linkEventSem, WAIT_FOREVER);
else
semTake(plink->linkEventSem, WAIT_FOREVER);
if (bbDebug>5)
printf("xvmeTxTask(%d): got an event\n", link);
@@ -907,9 +962,6 @@ int link;
if (bbDebug>3)
printf("xvmeTxTask(%d): got xact, pnode=0x%08.8X\n", link, pnode);
/* Count the outstanding messages */
(plink->deviceStatus[pnode->txMsg.node])++;
/* Send the message in polled mode */
txTCount = pnode->txMsg.length - 2;
@@ -945,7 +997,7 @@ int link;
{
/* Lock the busy list */
FASTLOCK(&(plink->busyList.sem));
if (plink->busyList.head == NULL)
dogStart = 1;
else
@@ -953,6 +1005,9 @@ int link;
/* Add pnode to the busy list */
listAddTail(&(plink->busyList), pnode);
/* Count the outstanding messages */
(plink->deviceStatus[pnode->txMsg.node])++;
FASTUNLOCK(&(plink->busyList.sem));
@@ -973,9 +1028,6 @@ int link;
/* if (bbDebug) */
printf("xvmeTxTask(%d): RAC_RESET_SLAVE sent, resetting node %d\n", link, pnode->txMsg.node);
/* decrement the number of outstanding messages to the node */
(plink->deviceStatus[pnode->txMsg.node])--;
pnode->status = BB_OK;
if (pnode->finishProc != NULL)
@@ -987,8 +1039,8 @@ int link;
}
/* If there is a semaphore for synchronous I/O, unlock it */
if (pnode->syncLock != NULL)
FASTUNLOCK(pnode->syncLock);
if (pnode->psyncSem != NULL)
semGive(*(pnode->psyncSem));
/* have to reset the master so it won't wait on a response */
resetNodeData = pnode->txMsg.node; /* mark the node number */
@@ -1005,8 +1057,6 @@ int link;
FASTLOCK(&(plink->queue[BB_Q_HIGH].sem));
listAddHead(&(plink->queue[BB_Q_HIGH]), pnode);
FASTUNLOCK(&(plink->queue[BB_Q_HIGH].sem));
(plink->deviceStatus[pnode->txMsg.node])--; /* fix count/status */
}
break; /* stop checking the fifo queues */
}