@inproceedings{6e037b0f55d9416ebfaa88ae1a6fe026,
title = "Gradient descent finds global minima of deep neural networks",
abstract = "Gradient descent finds a global minimum in training deep neural networks despite the objective function being non-convex. The current paper proves gradient descent achieves zero training loss in polynomial time for a deep overparameterized neural network with residual connections (RcsNct). Our analysis relics on the particular structure of the Gram matrix induced by the neural network architecture. This structure allows us to show the Gram matrix is stable throughout the training process and this stability implies the global optimality of the gradient descent algorithm. Wc further extend our analysis to deep residual convolutional neural networks and obtain a similar convergence result.",
author = "Du, {Simon S.} and Lee, {Jason D.} and Haochuan Li and Liwei Wang and Xiyu Zhai",
note = "Publisher Copyright: Copyright 2019 by the author(s).; 36th International Conference on Machine Learning, ICML 2019 ; Conference date: 09-06-2019 Through 15-06-2019",
year = "2019",
language = "English (US)",
series = "36th International Conference on Machine Learning, ICML 2019",
publisher = "International Machine Learning Society (IMLS)",
pages = "3003--3048",
booktitle = "36th International Conference on Machine Learning, ICML 2019",
}