@inproceedings{1b12039c0dbe44bba705ccd3e2c4e7ef,
title = "Robust moment estimation and improved clustering via sum of squares",
abstract = "We develop efficient algorithms for estimating low-degree moments of unknown distributions in the presence of adversarial outliers and design a new family of convex relaxations for k-means clustering based on sum-of-squares method. As an immediate corollary, for any > 0, we obtain an efficient algorithm for learning the means of a mixture of k arbitrary Poincar{\'e} distributions in d in time dO(1/γ) so long as the means have separation Ω(kγ). This in particular yields an algorithm for learning Gaussian mixtures with separation Ω(kγ), thus partially resolving an open problem of Regev and Vijayaraghavan (2017). The guarantees of our robust estimation algorithms improve in many cases significantly over the best previous ones, obtained in the recent works. We also show that the guarantees of our algorithms match information-theoretic lower-bounds for the class of distributions we consider. These improved guarantees allow us to give improved algorithms for independent component analysis and learning mixtures of Gaussians in the presence of outliers. We also show a sharp upper bound on the sum-of-squares norms for moment tensors of any distribution that satisfies the Poincar{\'e} inequality. The Poincar{\'e} inequality is a central inequality in probability theory, and a large class of distributions satisfy it including Gaussians, product distributions, strongly log-concave distributions, and any sum or uniformly continuous transformation of such distributions. As a consequence, this yields that all of the above algorithmic improvements hold for distributions satisfying the Poincar{\'e} inequality.",
keywords = "Clustering, Moments, Robust learning, Sum-of-squares",
author = "Kothari, {Pravesh K.} and Jacob Steinhardt and David Steurer",
note = "Publisher Copyright: {\textcopyright} 2018 Association for Computing Machinery.; 50th Annual ACM Symposium on Theory of Computing, STOC 2018 ; Conference date: 25-06-2018 Through 29-06-2018",
year = "2018",
month = jun,
day = "20",
doi = "10.1145/3188745.3188970",
language = "English (US)",
series = "Proceedings of the Annual ACM Symposium on Theory of Computing",
publisher = "Association for Computing Machinery",
pages = "182--189",
editor = "Monika Henzinger and David Kempe and Ilias Diakonikolas",
booktitle = "STOC 2018 - Proceedings of the 50th Annual ACM SIGACT Symposium on Theory of Computing",
}