@article{ba0817bb65344364aac8b3291c276b9e,
title = "How to make causal inferences using texts",
abstract = "Text as data techniques offer a great promise: the ability to inductively discover measures that are useful for testing social science theories with large collections of text. Nearly all text-based causal inferences depend on a latent representation of the text, but we show that estimating this latent representation from the data creates underacknowledged risks: we may introduce an identification problem or overfit. To address these risks, we introduce a split-sample workflow for making rigorous causal inferences with discovered measures as treatments or outcomes. We then apply it to estimate causal effects from an experiment on immigration attitudes and a study on bureaucratic responsiveness.",
author = "Naoki Egami and Fong, {Christian J.} and Justin Grimmer and Roberts, {Margaret E.} and Stewart, {Brandon M.}",
note = "Funding Information: We thank E. Airoldi, P. Aronow, M. Blackwell, S. Bouchat, C. Felton, M. Handcock, E. Hartman, R. Johnson, G. King, I. Lundberg, R. Nielsen, S. O{\textquoteright}Brien, T. Richardson, M. Salganik, M. Sands, F. S{\"a}vje, A. Spirling, A. Tahk, E. Tvinnereim, H. Waight, H. Wallach, S. Zhang, and numerous seminar participants for useful discussions about making causal inference with texts. We appreciate the insightful comments of the editor and reviewers. We also thank D. Tingley for early conversations about potential SUTVA concerns with respect to STM and sequential experiments as a possible way to combat it. This work was supported by the Eunice Kennedy Shriver National Institute of Child Health and Human Development of the National Institutes of Health under award number P2CHD047879 (to B.M.S.) and the National Science Foundation under the Resource Implementations for Data Intensive Research program award numbers 1738411 (to M.E.R.) and 1738288 (to B.M.S.). Publisher Copyright: Copyright {\textcopyright} 2022 The Authors, some rights reserved.",
year = "2022",
month = oct,
doi = "10.1126/sciadv.abg2652",
language = "English (US)",
volume = "8",
journal = "Science advances",
issn = "2375-2548",
publisher = "American Association for the Advancement of Science",
number = "42",
}