blob: 631fbb0704b707f6668d4e823e8ee452bc764e86 [file] [log] [blame]
/*
*
* Copyright 2016-2018 The nlfaultinjection Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/**
* @file
* Implementation of the fault-injection utilities.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <nlassert.h>
#include <nlfaultinjection.hpp>
namespace nl {
namespace FaultInjection {
static void Die(void) __attribute__((noreturn));
static GlobalContext *sGlobalContext = NULL;
/**
* The callback function that implements the deterministic
* injection feature (see FailAtFault).
*/
static bool DeterministicCbFn(Identifier aId,
Record *aRecord,
void *aContext)
{
bool retval = false;
(void)aId;
(void)aContext;
if (aRecord->mNumCallsToSkip)
{
aRecord->mNumCallsToSkip--;
}
else if (aRecord->mNumCallsToFail)
{
aRecord->mNumCallsToFail--;
retval = true;
}
return retval;
}
/**
* Callback list node for DeterministicCbFn.
* This node terminates all callback lists.
*/
static Callback sDeterministicCb = { DeterministicCbFn, NULL, NULL };
/**
* The callback function that implements the random
* injection feature (see FailRandomlyAtFault).
*/
static bool RandomCbFn(Identifier aId,
Record *aRecord,
void *aContext)
{
bool retval = false;
(void)aId;
(void)aContext;
if (aRecord->mPercentage > 0)
{
int randValue = (rand() % 100) + 1;
if (randValue <= aRecord->mPercentage)
{
retval = true;
}
}
return retval;
}
/**
* Callback list node for RandomCbFn.
* Note that this is initialized to point to sDeterministicCb.
* All Record instances are initialized to point to
* this callback node.
*/
static Callback sRandomCb = { RandomCbFn, NULL, &sDeterministicCb };
/**
* Alias for the address of the first default callback.
*/
static const Callback *sEndOfCustomCallbacks = &sRandomCb;
/**
* Initialize the Manager instance.
*
* @param[in] inNumFaults The size of inFaultArray, equal to the number of fault IDs.
* @param[in] inFaultArray A pointer to an array of Record, in which this object
* will store the configuration of each fault.
* @param[in] inManagerName A pointer to a C string containing the name of the Manager.
* @param[in] inFaultNames A pointer to an array of inNumFaults C strings that describe
* each fault ID.
*
* @return -EINVAL if the inputs are not valid.
* 0 otherwise.
*/
int32_t Manager::Init(size_t inNumFaults,
Record *inFaultArray,
Name inManagerName,
const Name *inFaultNames)
{
int32_t err = 0;
Identifier i;
nlEXPECT_ACTION((inNumFaults > 0 && inFaultArray && inManagerName && inFaultNames), exit, err = -EINVAL);
mName = inManagerName;
mNumFaults = inNumFaults;
mFaultRecords = inFaultArray;
mFaultNames = inFaultNames;
mLock = NULL;
mUnlock = NULL;
mLockContext = NULL;
// Link all callback lists to the two default callbacks.
for (i = 0; i < mNumFaults; i++)
{
mFaultRecords[i].mCallbackList = &sRandomCb;
}
exit:
return err;
}
/**
* Configure a fault to be triggered randomly, with a given probability defined as a percentage
* This is meant to be used on live systems to generate a build that will encounter random failures.
*
* @param[in] inId The fault ID
* @param[in] inPercentage An integer between 0 and 100. 100 means "always". 0 means "never".
*
* @return -EINVAL if the inputs are not valid.
* 0 otherwise.
*/
int32_t Manager::FailRandomlyAtFault(Identifier inId,
uint8_t inPercentage)
{
int32_t err = 0;
nlEXPECT_ACTION((inId < mNumFaults && inPercentage <= 100),
exit,
err = -EINVAL);
Lock();
mFaultRecords[inId].mNumCallsToSkip = 0;
mFaultRecords[inId].mNumCallsToFail = 0;
mFaultRecords[inId].mPercentage = inPercentage;
Unlock();
exit:
return err;
}
/**
* Configure a fault to be triggered deterministically.
*
* @param[in] inId The fault ID
* @param[in] inNumCallsToSkip The number of times this fault is to be skipped before it
* starts to fail.
* @param[in] inNumCallsToFail The number of times the fault should be triggered.
* @param[in] inTakeMutex By default this method takes the Manager's mutex.
* If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken.
*
* @return -EINVAL if the inputs are not valid.
* 0 otherwise.
*/
int32_t Manager::FailAtFault(Identifier inId,
uint32_t inNumCallsToSkip,
uint32_t inNumCallsToFail,
bool inTakeMutex)
{
int32_t err = 0;
nlEXPECT_ACTION(inId < mNumFaults && inNumCallsToSkip <= UINT16_MAX && inNumCallsToFail <= UINT16_MAX, exit, err = -EINVAL);
if (inTakeMutex)
{
Lock();
}
mFaultRecords[inId].mNumCallsToSkip = static_cast<uint16_t>(inNumCallsToSkip);
mFaultRecords[inId].mNumCallsToFail = static_cast<uint16_t>(inNumCallsToFail);
mFaultRecords[inId].mPercentage = 0;
if (inTakeMutex)
{
Unlock();
}
exit:
return err;
}
/**
* @overload int32_t FailAtFault(Identifier inId, uint32_t inNumCallsToSkip, uint32_t inNumCallsToFail, bool inTakeMutex)
*/
int32_t Manager::FailAtFault(Identifier inId,
uint32_t inNumCallsToSkip,
uint32_t inNumCallsToFail)
{
return FailAtFault(inId, inNumCallsToSkip, inNumCallsToFail, kMutexTake);
}
/**
* Configure a fault to reboot the system when triggered.
* If the application has installed a RebootCallbackFn, it will
* be invoked when fault inId is triggered.
* If the application has not installed the callback, the system
* will crash.
*
* @param[in] inId The fault ID
*
* @return -EINVAL if the inputs are not valid.
* 0 otherwise.
*/
int32_t Manager::RebootAtFault(Identifier inId)
{
int32_t err = 0;
nlEXPECT_ACTION(inId < mNumFaults, exit, err = -EINVAL);
Lock();
mFaultRecords[inId].mReboot = true;
Unlock();
exit:
return err;
}
/**
* Store a set of arguments for a given fault ID.
* The array of arguments is made available to the code injected with
* the nlFAULT_INJECT macro.
* For this to work for a given fault ID, the Manager must allocate memory to
* store the arguments and configure the Record's mLengthOfArguments and
* mArguments members accordingly.
*
* @param[in] inId The fault ID
* @param[in] inNumArgs The number of arguments in the array pointed to by inArgs.
* @param[in] inArgs The pointer to the array of integers to be stored in the fault
*
* @return -EINVAL if the inputs are not valid.
* 0 otherwise.
*/
int32_t Manager::StoreArgsAtFault(Identifier inId, uint16_t inNumArgs, int32_t *inArgs)
{
int32_t err = 0;
size_t i;
nlEXPECT_ACTION(inId < mNumFaults &&
mFaultRecords[inId].mArguments != NULL &&
mFaultRecords[inId].mLengthOfArguments >= inNumArgs &&
inNumArgs <= UINT8_MAX,
exit,
err = -EINVAL);
Lock();
for (i = 0; i < inNumArgs; i++)
{
mFaultRecords[inId].mArguments[i] = inArgs[i];
}
mFaultRecords[inId].mNumArguments = static_cast<uint8_t>(inNumArgs);
Unlock();
exit:
return err;
}
/**
* Attach a callback to a fault ID.
* Calling this twice does not attach the callback twice.
*
* @param[in] inId The fault ID
* @param[in] inCallback The callback node to be attached to the fault
*
*
* @return -EINVAL if the inputs are not valid.
* 0 otherwise.
*/
int32_t Manager::InsertCallbackAtFault(Identifier inId,
Callback *inCallBack)
{
int32_t err = 0;
// Make sure it's not already there
err = RemoveCallbackAtFault(inId, inCallBack);
nlEXPECT_SUCCESS(err, exit);
Lock();
// Insert the callback at the beginning of the list.
// Remember that all lists end into the two default (deterministic
// and random) callbacks!
inCallBack->mNext = mFaultRecords[inId].mCallbackList;
mFaultRecords[inId].mCallbackList = inCallBack;
Unlock();
exit:
return err;
}
/**
* Detaches a callback from a fault.
*
* @param[in] inId The fault
* @param[in] inCallback The callback node to be removed.
* @param[in] inTakeMutex By default this method takes the Manager's mutex.
* If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken.
*
* @return -EINVAL if the inputs are not valid.
* 0 otherwise.
*/
int32_t Manager::RemoveCallbackAtFault(Identifier inId,
Callback *inCallBack,
bool inTakeMutex)
{
int32_t err = 0;
Callback **cb = NULL;
nlEXPECT_ACTION((inId < mNumFaults) && (inCallBack != NULL), exit, err = -EINVAL);
if (inTakeMutex)
{
Lock();
}
cb = &mFaultRecords[inId].mCallbackList;
while (*cb != NULL)
{
if (*cb == inCallBack)
{
*cb = (*cb)->mNext;
break;
}
cb = &((*cb)->mNext);
}
if (inTakeMutex)
{
Unlock();
}
exit:
return err;
}
/**
* @overload int32_t Manager::RemoveCallbackAtFault(Identifier inId, Callback *inCallBack, bool inTakeMutex)
*/
int32_t Manager::RemoveCallbackAtFault(Identifier inId,
Callback *inCallBack)
{
return RemoveCallbackAtFault(inId, inCallBack, kMutexTake);
}
/**
* When the program traverses the location at which a fault should be injected, this method is invoked
* on the manager to query the configuration of the fault ID.
*
* A fault can be triggered randomly, deterministically or on a call-by-call basis by a callback.
* All three types of trigger can be installed at the same time, and they all get a chance of
* injecting the fault.
*
* @param[in] inId The fault ID
* @param[in] inTakeMutex By default this method takes the Manager's mutex.
* If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken.
*
* @return true if the fault should be injected; false otherwise.
*/
bool Manager::CheckFault(Identifier inId, bool inTakeMutex)
{
bool retval = false;
Callback *cb = NULL;
Callback *next = NULL;
bool reboot = false;
nlEXPECT(inId < mNumFaults, exit);
if (inTakeMutex)
{
Lock();
}
cb = mFaultRecords[inId].mCallbackList;
while (cb != NULL)
{
// Save mNext now, in case the callback removes itself
// calling RemoveCallbackAtFault
next = cb->mNext;
if (cb->mCallBackFn(inId, &mFaultRecords[inId], cb->mContext))
{
retval = true;
}
cb = next;
}
reboot = mFaultRecords[inId].mReboot;
if (retval && sGlobalContext && sGlobalContext->mCbTable.mPostInjectionCb)
{
sGlobalContext->mCbTable.mPostInjectionCb(this, inId, &mFaultRecords[inId]);
}
if (retval && reboot)
{
// If the application has not setup a context and/or reboot callback, the system will crash
if (sGlobalContext && sGlobalContext->mCbTable.mRebootCb)
{
sGlobalContext->mCbTable.mRebootCb();
}
else
{
Die();
}
}
mFaultRecords[inId].mNumTimesChecked++;
if (inTakeMutex)
{
Unlock();
}
exit:
return retval;
}
/**
* @overload bool CheckFault(Identifier inId, bool inTakeMutex)
*/
bool Manager::CheckFault(Identifier inId)
{
return CheckFault(inId, kMutexTake);
}
/**
* When the program traverses the location at which a fault should be injected, this method is invoked
* on the manager to query the configuration of the fault ID.
*
* This version of the method retrieves the arguments stored in the Record.
*
* A fault can be triggered randomly, deterministically or on a call-by-call basis by a callback.
* All three types of trigger can be installed at the same time, and they all get a chance of
* injecting the fault.
*
* @param[in] inId The fault ID
* @param[in] outNumArgs The length of the array pointed to by outArgs
* @param[in] outArgs The array of arguments configured for the faultId
* @param[in] inTakeMutex By default this method takes the Manager's mutex.
* If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken.
*
* @return true if the fault should be injected; false otherwise.
*/
bool Manager::CheckFault(Identifier inId, uint16_t &outNumArgs, int32_t *&outArgs, bool inTakeMutex)
{
bool retval = false;
if (inTakeMutex)
{
Lock();
}
retval = CheckFault(inId, kMutexDoNotTake);
if (retval)
{
outNumArgs = mFaultRecords[inId].mNumArguments;
outArgs = mFaultRecords[inId].mArguments;
}
if (inTakeMutex)
{
Unlock();
}
return retval;
}
/**
* @overload bool CheckFault(Identifier inId, uint16_t &outNumArgs, int32_t *&outArgs, bool inTakeMutex)
*/
bool Manager::CheckFault(Identifier inId, uint16_t &outNumArgs, int32_t *&outArgs)
{
return CheckFault(inId, outNumArgs, outArgs, kMutexTake);
}
/**
* Reset the counters in the fault Records
* Note that calling this method does not impact the current configuration
* in any way (including the number of times a fault is to be skipped
* before it should fail).
*/
void Manager::ResetFaultCounters(void)
{
Identifier id = 0;
Lock();
for (id = 0; id < mNumFaults; id++)
{
mFaultRecords[id].mNumTimesChecked = 0;
}
Unlock();
}
/**
* Reset the configuration of a fault Record
*
* @param[in] inId The fault ID
*
* @return -EINVAL if the inputs are not valid.
* 0 otherwise.
*/
int32_t Manager::ResetFaultConfigurations(Identifier inId)
{
Callback *cb;
int32_t err = 0;
nlEXPECT_ACTION((inId < mNumFaults),
exit,
err = -EINVAL);
Lock();
mFaultRecords[inId].mNumCallsToSkip = 0;
mFaultRecords[inId].mNumCallsToFail = 0;
mFaultRecords[inId].mPercentage = 0;
mFaultRecords[inId].mReboot = 0;
mFaultRecords[inId].mNumArguments = 0;
cb = mFaultRecords[inId].mCallbackList;
// All callback handling code in this module is based on the assumption
// that custom callbacks are inserted at the beginning of the list
while (cb != sEndOfCustomCallbacks && cb != NULL)
{
(void)RemoveCallbackAtFault(inId, cb, kMutexDoNotTake);
cb = mFaultRecords[inId].mCallbackList;
}
Unlock();
exit:
return err;
}
/**
* Reset the configuration of all fault Records
*
* @return -EINVAL if the inputs are not valid.
* 0 otherwise.
*/
int32_t Manager::ResetFaultConfigurations(void)
{
int32_t err = 0;
Identifier id = 0;
for (id = 0; id < mNumFaults; id++)
{
err = ResetFaultConfigurations(id);
nlEXPECT(err == 0, exit);
}
exit:
return err;
}
/**
* Take the Manager's mutex.
*/
void Manager::Lock(void)
{
if (mLock)
{
mLock(mLockContext);
}
}
/**
* Release the Manager's mutex.
*/
void Manager::Unlock(void)
{
if (mUnlock)
{
mUnlock(mLockContext);
}
}
/**
* Configure the instance of GlobalContext to use.
* On systems in which faults are configured and injected from different threads,
* this function should be called before threads are started.
*
* @param[in] inGlobalContext Pointer to the GlobalContext provided by the application
*/
void SetGlobalContext(GlobalContext *inGlobalContext)
{
sGlobalContext = inGlobalContext;
}
/**
* Parse an integer
*
* This implementation does not check for ERANGE, as it assumes a very simple
* underlying implementation of strtol.
*
* @param[in] str Pointer to a string representing an integer
*
* @param[out] num Pointer to the integer result
*
* @return true in case of success; false if the string does not
* contain an integer.
*/
static bool ParseInt(const char *str, int32_t *num)
{
char *endptr = NULL;
long tmp;
bool retval = true;
tmp = strtol(str, &endptr, 10);
if (!endptr || *endptr != '\0')
{
retval = false;
}
else
{
*num = static_cast<int32_t>(tmp);
}
return retval;
}
/**
* Parse an unsigned integer
*
* @param[in] str Pointer to a string representing an insigned int
*
* @param[out] num Pointer to the unsigned integer result
*
* @return true in case of success; false if the string does not
* contain an unsigned integer.
*/
static bool ParseUInt(const char *str, uint32_t *num)
{
bool retval = true;
int32_t tmpint = 0;
retval = ParseInt(str, &tmpint);
if (retval)
{
if (tmpint < 0)
{
retval = false;
}
else
{
*num = static_cast<uint32_t>(tmpint);
}
}
return retval;
}
/**
* Parse a fault-injection configuration string and apply the configuration.
*
* @param[in] aFaultInjectionStr The configuration string. An example of a valid string that
* enables two faults is "system_buffer_f5_s1:inet_send_p33"
* An example of a configuration string that
* also passes three integer arguments to the fault point is
* "system_buffer_f5_s1_a10_a7_a-4"
* The format is
* "<module>_<fault>_{f<numTimesToFail>[_s<numTimesToSkip>],p<randomFailurePercentage>}[_a<integer>]..."
*
* @param[in] inArray An array of GetManagerFn callbacks
* to be used to parse the string.
*
* @param[in] inArraySize Num of elements in inArray
*
* @return true if the string can be parsed completely; false otherwise
*/
bool ParseFaultInjectionStr(char *aFaultInjectionStr, const GetManagerFn *inArray, size_t inArraySize)
{
ManagerTable table = { inArray, inArraySize };
size_t numTables = 1;
return ParseFaultInjectionStr(aFaultInjectionStr, &table, numTables);
}
/**
* Parse a fault-injection configuration string and apply the configuration.
*
* @param[in] aFaultInjectionStr The configuration string. An example of a valid string that
* enables two faults is "system_buffer_f5_s1:inet_send_p33"
* An example of a configuration string that
* also passes three integer arguments to the fault point is
* "system_buffer_f5_s1_a10_a7_a-4"
* The format is
* "<module>_<fault>_{f<numTimesToFail>[_s<numTimesToSkip>],p<randomFailurePercentage>}[_a<integer>]..."
*
* @param[in] inTables An array of ManagerTable structures
* to be used to parse the string.
*
* @param[in] inNumTables Size of inTables
*
* @return true if the string can be parsed completely; false otherwise
*/
bool ParseFaultInjectionStr(char *aFaultInjectionStr, const ManagerTable *inTables, size_t inNumTables)
{
char *tok1 = NULL;
char *savePtr1 = NULL;
char *tok2 = NULL;
char *savePtr2 = NULL;
char *outerString = aFaultInjectionStr;
size_t i = 0;
nl::FaultInjection::Identifier j = 0;
int err = 0;
bool retval = false;
int32_t args[kMaxFaultArgs];
uint16_t numArgs = 0;
nl::FaultInjection::Manager *mgr = NULL;
nl::FaultInjection::Identifier faultId = 0;
memset(args, 0, sizeof(args));
while ((tok1 = strtok_r(outerString, ":", &savePtr1)))
{
uint32_t numTimesToFail = 0;
uint32_t numTimesToSkip = 0;
uint32_t percentage = 0;
bool gotPercentage = false;
bool gotReboot = false;
bool gotArguments = false;
const Name *faultNames = NULL;
outerString = NULL;
tok2 = strtok_r(tok1, "_", &savePtr2);
nlEXPECT(tok2 != NULL, exit);
// this is the module
for (i = 0; i < inNumTables; i++)
{
for (j = 0; j < inTables[i].mNumItems; j++)
{
nl::FaultInjection::Manager &tmpMgr = inTables[i].mArray[j]();
if (!strcmp(tok2, tmpMgr.GetName()))
{
mgr = &tmpMgr;
break;
}
}
}
nlEXPECT(mgr != NULL, exit);
tok2 = strtok_r(NULL, "_", &savePtr2);
nlEXPECT(tok2 != NULL, exit);
// this is the fault name
faultNames = mgr->GetFaultNames();
for (j = 0; j < mgr->GetNumFaults(); j++)
{
if (!strcmp(tok2, faultNames[j]))
{
faultId = j;
break;
}
}
nlEXPECT(j != mgr->GetNumFaults(), exit);
while ((tok2 = strtok_r(NULL, "_", &savePtr2)))
{
switch (tok2[0])
{
case 'a':
{
int32_t tmp = 0;
nlEXPECT(numArgs < kMaxFaultArgs, exit);
gotArguments = true;
nlEXPECT(ParseInt(&(tok2[1]), &tmp), exit);
args[numArgs++] = tmp;
}
break;
case 'f':
nlEXPECT(ParseUInt(&(tok2[1]), &numTimesToFail), exit);
break;
case 's':
nlEXPECT(ParseUInt(&(tok2[1]), &numTimesToSkip), exit);
break;
case 'p':
gotPercentage = true;
nlEXPECT(ParseUInt(&(tok2[1]), &percentage), exit);
nlEXPECT(percentage <= 100, exit);
break;
case 'r':
gotReboot = true;
break;
default:
goto exit;
break;
}
}
if (gotArguments)
{
err = mgr->StoreArgsAtFault(faultId, numArgs, args);
nlEXPECT_SUCCESS(err, exit);
}
if (gotPercentage)
{
err = mgr->FailRandomlyAtFault(faultId, static_cast<uint8_t>(percentage));
nlEXPECT_SUCCESS(err, exit);
}
else
{
err = mgr->FailAtFault(faultId, numTimesToSkip, numTimesToFail);
nlEXPECT_SUCCESS(err, exit);
}
if (gotReboot)
{
err = mgr->RebootAtFault(faultId);
nlEXPECT_SUCCESS(err, exit);
}
}
retval = true;
exit:
return retval;
}
/**
* Internal function to kill the process if a
* fault is supposed to reboot the process but the application
* has not installed a callback
*/
static void Die(void)
{
while (true)
*((volatile long *)1) = 0;
}
} // namespace FaultInjection
} // namespace nl