@article{7d12224248b44d44a99d802b33ba9018,
title = "Adjusting for Confounding with Text Matching",
abstract = "We identify situations in which conditioning on text can address confounding in observational studies. We argue that a matching approach is particularly well-suited to this task, but existing matching methods are ill-equipped to handle high-dimensional text data. Our proposed solution is to estimate a low-dimensional summary of the text and condition on this summary via matching. We propose a method of text matching, topical inverse regression matching, that allows the analyst to match both on the topical content of confounding documents and the probability that each of these documents is treated. We validate our approach and illustrate the importance of conditioning on text to address confounding with two applications: the effect of perceptions of author gender on citation counts in the international relations literature and the effects of censorship on Chinese social media users.",
author = "Roberts, {Margaret E.} and Stewart, {Brandon M.} and Nielsen, {Richard A.}",
note = "Funding Information: We thank the following for helpful comments and suggestions on this work: David Blei, Naoki Egami, Chris Felton, James Fowler, Justin Grimmer, Erin Hartman, Chad Hazlett, Seth Hill, Kosuke Imai, Rebecca Johnson, Gary King, Adeline Lo, Will Lowe, Chris Lucas, Walter Mebane, David Mimno, Jennifer Pan, Marc Ratkovic, Matt Salganik, Caroline Tolbert, and Simone Zhang; audiences at the Princeton Text Analysis Workshop, Princeton Politics Methods Workshop, the University of Rochester, Microsoft Research, the Text as Data Conference, and the Political Methodology Society and the Visions in Methodology conference; and some tremendously helpful anonymous reviewers. We especially thank Dustin Tingley for numerous insightful conversations on the connections between STM and causal inference and Ian Lundberg for extended discussions on some technical details. Dan Maliniak, Ryan Powers, and Barbara Walter graciously supplied data and replication code for the gender and citations study. The JSTOR Data for Research program provided academic journal data for the international relations application. This research was supported, in part, by the Eunice Kennedy Shriver National Institute of Child Health and Human Development under grant P2‐CHD047879 to the Office of Population Research at Princeton University. The research was also supported by grants from the National Science Foundation RIDIR program, award numbers 1738411 and 1738288. This publication was made possible, in part, by a grant from the Carnegie Corporation of New York, supporting Richard Nielsen as an Andrew Carnegie Fellow. The statements made and views expressed are solely the responsibility of the authors. Funding Information: We thank the following for helpful comments and suggestions on this work: David Blei, Naoki Egami, Chris Felton, James Fowler, Justin Grimmer, Erin Hartman, Chad Hazlett, Seth Hill, Kosuke Imai, Rebecca Johnson, Gary King, Adeline Lo, Will Lowe, Chris Lucas, Walter Mebane, David Mimno, Jennifer Pan, Marc Ratkovic, Matt Salganik, Caroline Tolbert, and Simone Zhang; audiences at the Princeton Text Analysis Workshop, Princeton Politics Methods Workshop, the University of Rochester, Microsoft Research, the Text as Data Conference, and the Political Methodology Society and the Visions in Methodology conference; and some tremendously helpful anonymous reviewers. We especially thank Dustin Tingley for numerous insightful conversations on the connections between STM and causal inference and Ian Lundberg for extended discussions on some technical details. Dan Maliniak, Ryan Powers, and Barbara Walter graciously supplied data and replication code for the gender and citations study. The JSTOR Data for Research program provided academic journal data for the international relations application. This research was supported, in part, by the Eunice Kennedy Shriver National Institute of Child Health and Human Development under grant P2-CHD047879 to the Office of Population Research at Princeton University. The research was also supported by grants from the National Science Foundation RIDIR program, award numbers 1738411 and 1738288. This publication was made possible, in part, by a grant from the Carnegie Corporation of New York, supporting Richard Nielsen as an Andrew Carnegie Fellow. The statements made and views expressed are solely the responsibility of the?authors. Publisher Copyright: {\textcopyright} 2020, Midwest Political Science Association",
year = "2020",
month = oct,
day = "1",
doi = "10.1111/ajps.12526",
language = "English (US)",
volume = "64",
pages = "887--903",
journal = "American Journal of Political Science",
issn = "0092-5853",
publisher = "Wiley-Blackwell",
number = "4",
}