@inproceedings{055631de3e3d4f00a8fdcf2972a7dda9,
title = "Smoothing the Landscape Boosts the Signal for SGD Optimal Sample Complexity for Learning Single Index Models",
abstract = "We focus on the task of learning a single index model σ(w* · x) with respect to the isotropic Gaussian distribution in d dimensions. Prior work has shown that the sample complexity of learning w* is governed by the information exponent k* of the link function σ, which is defined as the index of the first nonzero Hermite coefficient of σ. Ben Arous et al. [1] showed that n ≳ dk*−1 samples suffice for learning w* and that this is tight for online SGD. However, the CSQ lower bound for gradient based methods only shows that n ≳ dk*/2 samples are necessary. In this work, we close the gap between the upper and lower bounds by showing that online SGD on a smoothed loss learns w* with n ≳ dk*/2 samples. We also draw connections to statistical analyses of tensor PCA and to the implicit regularization effects of minibatch SGD on empirical losses.",
author = "Alex Damian and Eshaan Nichani and Rong Ge and Lee, \{Jason D.\}",
note = "Publisher Copyright: {\textcopyright} 2023 Neural information processing systems foundation. All rights reserved.; 37th Conference on Neural Information Processing Systems, NeurIPS 2023 ; Conference date: 10-12-2023 Through 16-12-2023",
year = "2023",
language = "English (US)",
series = "Advances in Neural Information Processing Systems",
publisher = "Neural information processing systems foundation",
editor = "A. Oh and T. Neumann and A. Globerson and K. Saenko and M. Hardt and S. Levine",
booktitle = "Advances in Neural Information Processing Systems 36 - 37th Conference on Neural Information Processing Systems, NeurIPS 2023",
}