@inproceedings{195bb55c69c14953b695be463dd547f3,
title = "Faster checkpointing with N+1 parity",
abstract = "This paper presents a way to perform fast, incremental checkpointing of multicomputers and distributed systems by using N + 1 parity. A basic algorithm is described that uses two extra processors for checkpointing and enables the system to tolerate any single processor failure. The algorithm's speed comes from a combination of N + 1 parity, extra physical memory, and virtual memory hardware so that checkpoints need not be written to disk. This eliminates the most time-consuming portion of checkpointing. The algorithm requires each application processor to allocate a fixed amount of extra memory for checkpointing. This amount may be set statically by the programmer, and need not be equal to the size of the processor's writable address space. This alleviates a major restriction of previous checkpointing algorithms using N + 1 parity [28]. Finally, we outline how to extend our algorithm to tolerate any m processor failures with the addition of 2m extra checkpointing processors.",
author = "Plank, \{James S.\} and Li Kai",
year = "1994",
language = "English (US)",
isbn = "0818655224",
series = "Digest of Papers - International Symposium on Fault-Tolerant Computing",
publisher = "Publ by IEEE",
pages = "288--297",
booktitle = "Digest of Papers - International Symposium on Fault-Tolerant Computing",
note = "Proceedings of the 24th International Symposium on Fault-Tolerant Computing ; Conference date: 15-06-1994 Through 17-06-1994",
}