@inproceedings{6e037b0f55d9416ebfaa88ae1a6fe026,
title = "Gradient descent finds global minima of deep neural networks",
abstract = "Gradient descent finds a global minimum in training deep neural networks despite the objective function being non-convex. The current paper proves gradient descent achieves zero training loss in polynomial time for a deep overparameterized neural network with residual connections (RcsNct). Our analysis relics on the particular structure of the Gram matrix induced by the neural network architecture. This structure allows us to show the Gram matrix is stable throughout the training process and this stability implies the global optimality of the gradient descent algorithm. Wc further extend our analysis to deep residual convolutional neural networks and obtain a similar convergence result.",
author = "Du, {Simon S.} and Lee, {Jason D.} and Haochuan Li and Liwei Wang and Xiyu Zhai",
note = "Funding Information: We thank Lijie Chen and Ruosong Wang for useful discussions. SSD acknowledges support from AFRL grant FA8750-17-2-0212 and DARPA D17AP00001. JDL acknowledges support of the ARO under MURI Award W911NF-11-1-0303. This is part of the collaboration between US DOD, UK MOD and UK Engineering and Physical Research Council (EPSRC) under the Multidisciplinary University Research Initiative. HL and LW acknowlege support from National Basic Research Program of China (973 Program) (grant no. 2015CB352502), NSFC (61573026) and BJNSF (LI72037). Part of the work is done while SSD was visiting Simons Institute. Publisher Copyright: Copyright 2019 by the author(s).; 36th International Conference on Machine Learning, ICML 2019 ; Conference date: 09-06-2019 Through 15-06-2019",
year = "2019",
language = "English (US)",
series = "36th International Conference on Machine Learning, ICML 2019",
publisher = "International Machine Learning Society (IMLS)",
pages = "3003--3048",
booktitle = "36th International Conference on Machine Learning, ICML 2019",
}