@inproceedings{89572eba35e547e2ae68e8ed6f46b124,
title = "Design of Algorithm-Based Fault Tolerant Systems with In-System Checks",
abstract = "To improve the reliability of computeintensive applications run on multiprocessor architec tures, fault tolerance is introduced into the system with on-line detection and location of faults. This can be achieved by a low-cost scheme, called Algorithm-based fault tolerance (ABFT), which encodes data at the system level and modifies the algorithm to operate on the encoded data. The resultant encoded output data is checked for correctness by some checks. In this pa per we present an extended model for representing and designing ABFT systems. The model takes into con sideration the processors evaluating the checks. We propose a design method which considers the proces sors computing the checks to be a part of the ABFT system and guarantees concurrent error detection even in the presence of faults in these processors, unlike most methods presented earlier.",
author = "Shalini Yajnik and Jha, {Niraj K.}",
note = "Publisher Copyright: {\textcopyright} 1993 IEEE.; 1993 International Conference on Parallel Processing, ICPP 1993 ; Conference date: 16-08-1993 Through 20-08-1993",
year = "1993",
doi = "10.1109/ICPP.1993.70",
language = "English (US)",
series = "Proceedings of the International Conference on Parallel Processing",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "246--253",
booktitle = "Architecture",
address = "United States",
}