%PDF-1.3 1 0 obj << /Kids [ 3 0 R 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R 12 0 R ] /Type /Pages /Count 10 >> endobj 2 0 obj << /Title (Towards a Mathematical Understanding of the Difficulty in Learning With Feedforward Neural Networks) /Producer (PyPDF2) /Author (Hao Shen) /Subject (2018 IEEE Conference on Computer Vision and Pattern Recognition) >> endobj 3 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 14 0 R /Resources << /XObject << /x8 15 0 R /x6 18 0 R /x12 21 0 R /x10 24 0 R >> /ExtGState << /s9 27 0 R /s11 30 0 R /a0 << /CA 1 /ca 1 >> /R37 33 0 R /s5 34 0 R /s7 37 0 R >> /Font << /R44 40 0 R /R42 44 0 R /R40 47 0 R /F2 51 0 R /F1 52 0 R /R38 53 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 57 0 R 58 0 R 59 0 R 60 0 R 61 0 R 62 0 R 63 0 R 64 0 R 65 0 R 66 0 R 67 0 R 68 0 R 69 0 R 70 0 R 71 0 R 72 0 R 73 0 R 74 0 R 75 0 R 76 0 R 77 0 R 78 0 R 79 0 R 80 0 R 81 0 R 82 0 R 83 0 R 84 0 R ] >> endobj 4 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 85 0 R /Resources << /ExtGState << /R37 33 0 R >> /Font << /R79 86 0 R /R73 91 0 R /R71 96 0 R /R77 99 0 R /R40 47 0 R /R44 40 0 R /R89 102 0 R /F1 105 0 R /R83 106 0 R /R81 109 0 R /R75 112 0 R /R87 116 0 R /R85 121 0 R /F2 125 0 R /R65 126 0 R /R67 131 0 R /R38 53 0 R /R69 136 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 140 0 R 141 0 R 142 0 R 143 0 R 144 0 R 145 0 R 146 0 R 147 0 R 148 0 R 149 0 R 150 0 R 151 0 R 152 0 R 153 0 R ] >> endobj 5 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 154 0 R /Resources << /ExtGState << /R37 33 0 R >> /Font << /R79 86 0 R /R73 91 0 R /R71 96 0 R /R77 99 0 R /R40 47 0 R /R44 40 0 R /R89 102 0 R /F1 155 0 R /R83 106 0 R /R75 112 0 R /R87 116 0 R /R85 121 0 R /F2 156 0 R /R105 157 0 R /R65 126 0 R /R67 131 0 R /R38 53 0 R /R69 136 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 161 0 R 162 0 R 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R ] >> endobj 6 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 170 0 R /Resources << /ExtGState << /R37 33 0 R >> /Font << /R79 86 0 R /R73 91 0 R /R71 96 0 R /R77 99 0 R /R40 47 0 R /R127 171 0 R /R131 174 0 R /R89 102 0 R /F1 178 0 R /R83 106 0 R /R75 112 0 R /R87 116 0 R /R129 179 0 R /F2 183 0 R /R105 157 0 R /R125 184 0 R /R65 126 0 R /R67 131 0 R /R85 121 0 R /R38 53 0 R /R44 40 0 R /R69 136 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 187 0 R 188 0 R 189 0 R 190 0 R 191 0 R 192 0 R 193 0 R 194 0 R 195 0 R 196 0 R 197 0 R 198 0 R 199 0 R ] >> endobj 7 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 200 0 R /Resources << /ExtGState << /R37 33 0 R >> /Font << /R79 86 0 R /R73 91 0 R /R71 96 0 R /R77 99 0 R /R40 47 0 R /R44 40 0 R /R89 102 0 R /F1 201 0 R /R83 106 0 R /R75 112 0 R /R87 116 0 R /R85 121 0 R /F2 202 0 R /R105 157 0 R /R65 126 0 R /R67 131 0 R /R38 53 0 R /R69 136 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 203 0 R 204 0 R 205 0 R 206 0 R 207 0 R 208 0 R 209 0 R 210 0 R 211 0 R 212 0 R 213 0 R 214 0 R 215 0 R 216 0 R 217 0 R 218 0 R 219 0 R 220 0 R 221 0 R 222 0 R ] >> endobj 8 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 223 0 R /Resources << /ExtGState << /R37 33 0 R >> /Font << /R191 224 0 R /R73 91 0 R /R71 96 0 R /R77 99 0 R /R40 47 0 R /R44 40 0 R /R89 102 0 R /R79 86 0 R /R83 106 0 R /R75 112 0 R /R85 121 0 R /F2 227 0 R /R105 157 0 R /R65 126 0 R /R67 131 0 R /F1 228 0 R /R38 53 0 R /R69 136 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 229 0 R 230 0 R 231 0 R 232 0 R 233 0 R 234 0 R 235 0 R 236 0 R 237 0 R 238 0 R 239 0 R 240 0 R 241 0 R 242 0 R 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R 251 0 R 252 0 R 253 0 R 254 0 R 255 0 R 256 0 R ] >> endobj 9 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 257 0 R /Resources << /ExtGState << /R37 33 0 R >> /Font << /R191 224 0 R /R73 91 0 R /R71 96 0 R /R77 99 0 R /R40 47 0 R /R44 40 0 R /R131 174 0 R /R89 102 0 R /R79 86 0 R /R83 106 0 R /R75 112 0 R /R85 121 0 R /F2 258 0 R /R105 157 0 R /R65 126 0 R /R67 131 0 R /F1 259 0 R /R38 53 0 R /R69 136 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 260 0 R 261 0 R 262 0 R 263 0 R 264 0 R 265 0 R 266 0 R 267 0 R 268 0 R 269 0 R 270 0 R 271 0 R 272 0 R 273 0 R 274 0 R 275 0 R 276 0 R 277 0 R 278 0 R 279 0 R 280 0 R 281 0 R 282 0 R 283 0 R 284 0 R 285 0 R ] >> endobj 10 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 286 0 R /Resources << /ColorSpace << /R236 287 0 R /R244 289 0 R /R242 291 0 R >> /ExtGState << /R37 33 0 R >> /Font << /R79 86 0 R /R239 293 0 R /R237 296 0 R /R73 91 0 R /R71 96 0 R /R233 299 0 R /R77 99 0 R /R40 47 0 R /R44 40 0 R /F1 302 0 R /R83 106 0 R /R75 112 0 R /F2 303 0 R /R105 157 0 R /R65 126 0 R /R67 131 0 R /R38 53 0 R /R69 136 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 304 0 R 305 0 R 306 0 R 307 0 R ] >> endobj 11 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 308 0 R /Resources << /ExtGState << /R37 33 0 R >> /Font << /R44 40 0 R /R40 47 0 R /F2 309 0 R /F1 310 0 R /R38 53 0 R /R303 311 0 R /R305 314 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 317 0 R 318 0 R 319 0 R 320 0 R 321 0 R 322 0 R 323 0 R 324 0 R 325 0 R 326 0 R 327 0 R 328 0 R 329 0 R 330 0 R 331 0 R 332 0 R 333 0 R 334 0 R 335 0 R 336 0 R 337 0 R 338 0 R 339 0 R 340 0 R 341 0 R 342 0 R 343 0 R 344 0 R 345 0 R 346 0 R 347 0 R 348 0 R 349 0 R 350 0 R 351 0 R 352 0 R 353 0 R 354 0 R 355 0 R 356 0 R 357 0 R 358 0 R 359 0 R 360 0 R 361 0 R 362 0 R 363 0 R 364 0 R 365 0 R 366 0 R 367 0 R 368 0 R 369 0 R ] >> endobj 12 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 370 0 R /Resources << /ExtGState << /R37 33 0 R >> /Font << /R44 40 0 R /R40 47 0 R /F2 371 0 R /F1 372 0 R /R303 311 0 R /R305 314 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 373 0 R 374 0 R 375 0 R 376 0 R 377 0 R 378 0 R 379 0 R 380 0 R 381 0 R 382 0 R 383 0 R 384 0 R ] >> endobj 13 0 obj << /Type /Catalog /Pages 1 0 R >> endobj 14 0 obj << /Length 20280 >> stream q q q 0.1 0 0 0.1 0 0 cm /R37 gs 0 g q 10 0 0 10 0 0 cm BT /R38 14.3462 Tf 1 0 0 1 71.102 675.067 Tm [ (T) 91.9892 (o) 9.99667 (wards) -250.003 (a) -250.008 (Mathematical) -250.005 (Understanding) -249.988 (of) -250.012 (the) -250.008 <4469660263756c7479> -250.008 (in) -249.991 (Lear) 14.9893 (ning) -249.995 (with) ] TJ 132.916 -17.9332 Td [ (F) 24.9889 (eedf) 25.011 (orward) -250.002 (Neural) -250.008 (Netw) 9.99455 (orks) ] TJ /R40 11.9552 Tf 70.1859 -37.8578 Td [ (Hao) -250.008 (Shen) ] TJ -142.57 -13.9473 Td [ (fortiss) -250.013 (\055) -250.004 (The) -250.014 (Research) -250.006 (Institute) -250.016 (of) -250.014 (the) -250 (Free) -250.002 (State) -250.002 (of) -250.014 (Ba) 20.0129 (v) 24.9834 (aria\054) -249.997 (German) 14.9892 (y) ] TJ 65.991 -13.948 Td [ (Guerick) 9.98805 (estr) 54.9925 (\056) -310.001 (25\054) -250.002 (80805) -250.014 (Munich\054) -250.008 (German) 14.9872 (y) ] TJ /R42 8.9664 Tf 46.1891 -13.9477 Td (hao\056shen\100fortiss\056org) Tj /R38 11.9552 Tf -97.8191 -41.0461 Td (Abstract) Tj /R44 9.9626 Tf -83.9277 -24.3473 Td [ (T) 54.9853 (r) 14.984 (aining) -593.004 (deep) -592.998 (neur) 14.9901 (al) -592.987 (networks) -593.004 (for) -594.001 (solving) -593.006 (mac) 14.9803 (hine) ] TJ -11.9551 -11.9547 Td [ (learning) -210.987 (pr) 44.9839 (oblems) -211.002 (is) -211.01 (one) -209.998 (gr) 36.9852 (eat) -211.016 (c) 15.0122 (halleng) 9.98853 (e) -210.989 (in) -211.011 (the) -211.016 <02656c642c> -219.008 (mainly) ] TJ 11.9551 TL T* [ (due) -473.014 (to) -473.987 (its) -472.999 (associated) -474 (optimisati) 1.00106 (on) -473.99 (pr) 44.9851 (oblem) -473.004 (being) -473.982 (highly) ] TJ T* [ (non\055con) 40.0105 (ve) 19.9881 (x\056) -1205.02 (Recent) -547.983 (de) 15.0183 (velopments) -548 (have) -547.985 (sug) 10.0155 (g) 10.0032 (ested) -548.997 (that) ] TJ 11.9559 TL T* [ (many) -326.015 (tr) 14.9914 (aining) -325.009 (algorithms) -326.007 (do) -326.014 (not) -326.007 (suf) 18.0154 (fer) -324.995 (fr) 44.9851 (om) -326.017 (undesir) 37.0036 (ed) -325.985 (lo\055) ] TJ 11.9551 TL T* [ (cal) -378.988 (minima) -379.985 (under) -379.018 (certain) -379.993 (scenario\054) -411.982 (and) -378.981 (consequently) -379.988 (led) ] TJ T* [ (to) -328.011 (gr) 36.9852 (eat) -328.016 (ef) 18 (forts) -329.011 (in) -328.011 (pur) 10.0143 (suing) -327.996 (mathematical) -328.016 (e) 19.9918 (xplanations) -328.989 (for) ] TJ T* [ (suc) 14.9846 (h) -241.994 (observations\056) -306.995 (This) -240.986 (work) -242.018 (pr) 44.9839 (o) 10.0032 (vides) -241.989 (an) -241.009 (alternative) -242.001 (math\055) ] TJ T* [ (ematical) -258.99 (under) 10.0155 (standing) -259.009 (of) -259.003 (the) -259.008 (c) 15.0122 (halleng) 9.98853 (e) -258.981 (fr) 44.9851 (om) -259.008 (a) -259.991 (smooth) -258.984 (op\055) ] TJ 11.9563 TL T* [ (timisation) -254.015 (per) 10.0057 (spective) 14.9865 (\056) -321.003 (By) -253.997 (assuming) -253.014 (e) 19.9918 (xact) -253.994 (learning) -254 (of) -253.985 <026e697465> ] TJ 11.9547 TL T* [ (samples\054) -440.014 (suf) 18.0154 <026369656e74> -401.995 (conditions) -402.003 (ar) 36.9852 (e) -402.016 <6964656e7469026564> -402.019 (via) -402.984 (a) -402.006 (critical) ] TJ T* [ (point) -311.997 (analysis) -311.99 (to) -312.014 (ensur) 36.9926 (e) -311.012 (any) -311.981 (local) -312.007 (minimum) -311.995 (to) -312.014 (be) -311.987 (globally) ] TJ T* [ (minimal) -267.996 (as) -267.003 (well\056) -363.992 (Furthermor) 37.0171 (e) 9.99343 (\054) -271.991 (a) -267.99 (state) -266.993 (of) -267.982 (the) -267.987 (art) -268.017 (algorithm\054) ] TJ T* [ (known) -212.019 (as) -211.993 (the) -211.996 (Gener) 14.9828 (alised) -212.998 (Gaus) 0.99861 (s\055Ne) 14.9901 (wton) -212.994 (\050GGN\051) -211.992 (algorithm\054) ] TJ T* [ (is) -399.998 (rigor) 45.0023 (ously) -399.993 (r) 37.0196 (e) 15.0122 (visited) -399.987 (as) -400 (an) -400.002 (appr) 44.9937 (oximate) -399.992 (Ne) 14.9877 (wton\047) 39.9811 (s) -400.007 (algo\055) ] TJ 11.9563 TL T* [ (rithm\054) -329.013 (whic) 14.9987 (h) -313.002 (shar) 36.9963 (es) -313.005 (the) -313 (pr) 44.9839 (operty) -312.992 (of) -312.994 (being) -314.009 (locally) -312.985 (quadr) 15.0048 (at\055) ] TJ 11.9551 TL T* [ (ically) -280.994 (con) 39.9988 (ver) 37.011 (g) 10.0032 (ent) -281.005 (to) -280.019 (a) -281.007 (global) -281.017 (minimum) -281.019 (under) -280.995 (the) -281.005 (condition) ] TJ T* [ (of) -249.985 (e) 19.9924 (xact) -249.994 (learning) 15.0134 (\056) ] TJ /R38 11.9552 Tf 28.6723 TL T* [ (1\056) -249.99 (Intr) 18.0146 (oduction) ] TJ /R40 9.9626 Tf 11.9551 -19.366 Td [ (Deep) -325.985 (Neural) -325.981 (Netw) 10.0069 (orks) -324.992 (\050DNNs\051) -326.015 (ha) 19.9979 (v) 14.9828 (e) -325.99 (been) -325.982 (successfully) ] TJ -11.9551 -11.9547 Td [ (applied) -373.995 (to) -374.984 (solv) 15.0012 (e) -373.982 (challenging) -375.002 (problems) -373.984 (in) -374.004 (pattern) -374.984 (recogni\055) ] TJ 11.9551 TL T* [ (tion\054) -367.99 (computer) -343.999 (vision\054) -366.999 (and) -344.015 (speech) -344.017 (recognition) -343.987 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 243.805 236.854 Tm (3) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 248.786 236.854 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 254.714 236.854 Tm (21) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 264.677 236.854 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 270.595 236.854 Tm (43) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 280.557 236.854 Tm (\135\056) Tj -230.445 -11.9551 Td [ (Despite) -229.981 (this) -228.995 (success\054) -233.988 (training) -228.991 (DNNs) -229.986 (is) -229.007 (still) -230.006 (one) -229.996 (of) -229.018 (the) -229.993 (great\055) ] TJ 11.9559 TL T* [ (est) -360.989 (challenges) -360.98 (in) -360.986 (the) -362.011 <02656c64> -361.016 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 162.181 212.943 Tm (9) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 167.163 212.943 Tm [ (\135\056) -642.992 (In) -360.997 (this) -361.013 (w) 10 (ork\054) -388.984 (we) -362.02 (focus) -360.991 (on) ] TJ -117.05 -11.9551 Td [ (training) -446.014 (the) -445.996 (classic) -445.991 (feedforw) 9.9922 (ard) -446.006 (Multi\055Layer) -446.011 (Perceptrons) ] TJ 11.9551 TL T* [ (\050MLPs\051\056) -585.989 (It) -342.998 (is) -342.008 (kno) 24.9909 (wn) -342.014 (that) -342.007 (performance) -341.982 (of) -342.019 (MLPs) -343.007 (is) -342.009 (highly) ] TJ T* [ (dependent) -199.007 (on) -199.016 (v) 24.9811 (arious) -199.99 (f) 9.99466 (actors) -199.019 (in) -199.013 (a) -198.991 (v) 14.9828 (ery) -198.989 (complicated) -200.013 (w) 10 (ay) 65.0088 (\056) -293.007 (F) 14.9926 (or) ] TJ T* [ (e) 15.0128 (xample\054) -276.989 (studies) -272.001 (in) -271.982 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 133.13 165.123 Tm (15) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 143.093 165.123 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 148.293 165.123 Tm (37) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 158.255 165.123 Tm [ (\135) -271.017 (identify) -272.004 (the) -271.986 (topology) -271.989 (of) -271.991 (MLPs) ] TJ -108.143 -11.9547 Td [ (as) -273.992 (a) -273.018 (determinati) 25.0105 (v) 14.9828 (e) -273.998 (f) 9.99466 (actor) 55.0104 (\056) -380.993 (W) 79.9879 (orks) -273.981 (in) -273.981 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 196.562 153.168 Tm (25) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 206.525 153.168 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 211.735 153.168 Tm (9) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 216.716 153.168 Tm [ (\135) -273.996 (demonstrate) -274 (the) ] TJ -166.604 -11.9563 Td [ (impact) -351.997 (of) -350.998 (dif) 24.9848 (ferent) -351.983 (acti) 24.9824 (v) 24.9811 (ation) -351.02 (functions) -351.988 (to) -350.988 (performance) -351.98 (of) ] TJ 11.9551 TL T* [ (MLPs\056) -652.99 (Moreo) 14.9914 (v) 14.9828 (er) 39.986 (\054) -392.991 (a) -363.983 (choice) -364.014 (of) -364.997 (error\057loss) -363.996 (functions) -363.986 (is) -364.986 (also) ] TJ T* [ (sho) 24.9922 (wn) -249.99 (to) -249.985 (be) -249.997 <696e0375656e7469616c> -249.988 (as) -249.996 (in) -249.985 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 167.75 117.302 Tm (8) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 172.731 117.302 Tm (\135\056) Tj -110.664 -12.3922 Td [ (Ev) 14.9871 (en) -400.011 (with) -399.99 (a) -400.017 (well) -400 (designed) -399.987 (MLP) -399.982 (architecture\054) -438 (training) ] TJ -11.9551 -11.9551 Td [ (a) -392.017 <73706563690263> -393.013 (MLP) -391.984 (both) -391.991 (ef) 25.0081 (fecti) 25.0179 (v) 14.9828 (ely) -392.006 (and) -392.987 (ef) 25.0081 <026369656e746c79> -392.015 (can) -392.015 (be) -392.993 (as) ] TJ 11.9551 TL T* [ (challenging) -268 (as) -269.013 (constructing) -269.013 (the) -267.987 (netw) 10.0094 (ork\056) -366.008 (The) -267.99 (most) -268.985 (popular) ] TJ 258.75 455.387 Td [ (method) -295.019 (used) -294.997 (in) -294.997 (training) -294.98 (MLPs) -295.014 (is) -296.014 (t) 0.98513 (he) -295.99 (well\055kno) 25 (wn) ] TJ /R44 9.9626 Tf 200.933 0 Td [ (bac) 20.0016 (kpr) 45.017 (o\055) ] TJ -200.933 -11.9551 Td [ (pa) 10.0081 (gation) ] TJ /R40 9.9626 Tf 38.2859 0 Td [ (\050BP\051) -296.987 (algorithm) -297.007 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 413.947 524.432 Tm (42) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 423.91 524.432 Tm [ (\135\056) -450.984 (Although) -297.004 (the) -297 (classic) -296.995 (BP) -296.98 (al\055) ] TJ -115.048 -11.9547 Td [ (gorithm) -366.017 (shares) -366.012 (a) -365.983 (great) -366.017 (con) 39.9982 (v) 14.9828 (enience) -366.017 (of) -366.017 (being) -365.998 (v) 14.9828 (ery) -365.983 (simple\054) ] TJ 11.9563 TL T* [ (the) 14.9852 (y) -299.984 (can) -299.992 (suf) 24.9836 (fer) -298.98 (from) -300.019 (tw) 10.0081 (o) -299.984 (major) -300.014 (problems\054) -311.992 (namely) 64.9892 (\054) -311.982 (\050i\051) -299.989 (con\055) ] TJ 11.9547 TL T* [ (v) 14.9828 (er) 17.9896 (gence) -289.993 (to) -290.018 (undesired) -290.988 (local) -290.02 (minima\054) -299.984 (if) -289.986 (global) -290.986 (optimal) 0.98023 (ity) -290.991 (is) ] TJ T* [ (assumed\073) -494.003 (and) -412.005 (\050ii\051) -412.983 (slo) 24.9934 (w) -412.987 (con) 39.9982 (v) 14.9828 (er) 17.9872 (gence) -412.015 (speed\056) -798.014 (Early) -412.98 (w) 10.0032 (orks) ] TJ T* [ (as) -380.993 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 324.274 464.656 Tm (38) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 334.237 464.656 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 340.523 464.656 Tm (30) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 350.486 464.656 Tm [ (\135) -380.998 (ar) 17.9896 (gue) -380.99 (that) -380 (such) -380.985 (problems) -381 (with) -381.01 (BP) -381.005 (algorithms) ] TJ -41.6234 -11.9551 Td [ (are) -264.988 (essentially) -265.017 (due) -265.01 (to) -265.002 (their) -265.005 (nature) -265.01 (of) -265.995 (being) -264.995 (gradient) -264.995 (descent) ] TJ 11.9551 TL T* [ (algorithms\054) -431.997 (while) -395.015 (an) -396.012 (associated) -395.993 (optimisation) -395.003 (problem) -395.988 (for) ] TJ 11.9559 TL T* [ (MLP) -234.01 (training) -234.99 (is) -235.007 (often) -233.99 (highly) -234.985 (non\055con) 40.0129 (v) 14.9828 (e) 15.0122 (x) -233.995 (and) -235.015 (of) -234 (lar) 17.997 (ge) -235.02 (scale\056) ] TJ 11.9551 -12.5 Td [ (One) -225.019 (major) -223.985 (approach) -225.016 (to) -223.992 (address) -225.011 (the) -225.011 (problem) -224.017 (of) -225.021 (undesired) ] TJ -11.9551 -11.9551 Td [ (local) -364.988 (minima) -364.018 (in) -364.986 (MLP) -363.988 (training) -365.008 (is) -364.983 (via) -364.008 (an) -364.998 (error\057loss) -365.013 (surf) 10.0032 (ace) ] TJ T* [ (analysis) -413.982 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 348.404 392.379 Tm (14) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 358.366 392.379 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 364.971 392.379 Tm (35) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 374.934 392.379 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 381.549 392.379 Tm (6) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 386.53 392.379 Tm [ (\135\056) -801.004 (Ev) 14.9877 (en) -414.009 (for) -412.99 (simple) -414.014 (tasks\054) -454.017 (such) -414 (as) -414.009 (the) ] TJ -77.668 -11.9551 Td [ (classic) -294.995 (XOR) -294.01 (problem\054) -305.998 (the) -293.98 (error) -295.019 (surf) 10.0032 (ace) -294.985 (analysis) -294 (is) -294.995 (surpris\055) ] TJ 11.9551 TL T* [ (ingly) -363.988 (complicated) -363.006 (and) -364.013 (its) -363.998 (res) 1.01944 (ults) -363.993 (can) -363.983 (be) -362.998 (hard) -364.018 (to) -364.008 (conclude) ] TJ (\133) ' ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 312.18 356.514 Tm (22) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 322.142 356.514 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 326.914 356.514 Tm (35) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 336.877 356.514 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 341.659 356.514 Tm (36) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 351.621 356.514 Tm [ (\135\056) -303.008 (Early) -229.013 (ef) 25.0081 (forts) -229.011 (in) -229.991 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 425.704 356.514 Tm (11) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 435.666 356.514 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 440.438 356.514 Tm (44) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 450.401 356.514 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 455.173 356.514 Tm (45) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 465.136 356.514 Tm [ (\135) -230.006 (try) -229.011 (to) -229.011 (identify) -230.011 (gen\055) ] TJ -156.273 -11.9551 Td [ (eral) -246.003 (conditions) -245.988 (on) -245.988 (the) -245.991 (topology) -245.993 (of) -245.998 (MLPs) -246.983 (to) -245.988 (eliminate) -246.003 (unde\055) ] TJ 11.9551 TL T* [ (sired) -338.992 (local) -338.012 (minima\054) -360.996 (i\056e\056\054) -360.989 (suboptimal) -338.007 (local) -338.992 (minima\056) -576.017 (Unfor) 20.0016 (\055) ] TJ 11.9559 TL T* [ (tunately) 64.9965 (\054) -370.014 (these) -344.991 (attempts) -345.984 (f) 9.99343 (ail) -346.011 (to) -344.991 (pro) 14.9828 (vide) -346.006 (complete) -345.996 (solutions) ] TJ 11.9551 TL T* [ (to) -360.006 (general) -360.018 (problems\056) -640.996 (On) -360.009 (the) -360.009 (other) -360.989 (hand\054) -386.98 (although) -361.003 (BP) -359.989 (al\055) ] TJ T* [ (gorithms) -309.002 (are) -310 (often) -308.997 (thought) -309.987 (to) -308.997 (be) -309.987 (sensiti) 25.0105 (v) 14.9828 (e) -309.012 (to) -310.017 (initialisations) ] TJ (\133) ' ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 312.18 284.784 Tm (19) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 322.142 284.784 Tm [ (\135\054) -464.001 (recent) -420.993 (results) -421.003 (reported) -422.003 (in) -421.018 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 443.975 284.784 Tm (10) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 453.938 284.784 Tm [ (\135) -420.993 (suggest) -420.998 (that) -421.993 (modern) ] TJ -145.075 -11.9551 Td [ (MLP) -232.01 (training) -232.01 (algorithms) -233.013 (can) -232.005 (o) 14.9828 (v) 14.9828 (ercome) -232 (the) -231.991 (problem) -232.015 (of) -233.02 (sub\055) ] TJ 11.9551 TL T* [ (optimal) -235.02 (local) -233.99 (minima) -235.02 (con) 39.9982 (v) 14.9828 (eniently) 64.999 (\056) -305.003 (Such) -235 (observ) 24.9909 (ations) -234.995 (ha) 19.9967 (v) 14.9828 (e) ] TJ 11.9559 TL T* [ (triggered) -352.985 (se) 25.0179 (v) 14.9828 (eral) -351.985 (v) 14.9828 (ery) -353.002 (recent) -353.005 (ef) 25.0056 (forts) -352.01 (to) -352.99 (characterise) -353.005 (global) ] TJ 11.9551 TL T* [ (optimality) -250.017 (of) -249.995 (DNN) -249.99 (training) -250.007 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 423.98 236.963 Tm (17) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 433.943 236.963 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 438.924 236.963 Tm (28) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 448.886 236.963 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 453.868 236.963 Tm (13) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 463.83 236.963 Tm (\135\056) Tj -143.013 -12.5 Td [ (The) -313.982 (w) 10 (ork) -314.999 (in) -314.014 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 377.146 224.463 Tm (17) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 387.108 224.463 Tm [ (\135) -313.989 (sho) 24.9934 (ws) -314.996 (that) -314.011 (both) ] TJ /R44 9.9626 Tf 73.1566 0 Td [ (deep) -313.987 (linear) -315.021 (networks) ] TJ /R40 9.9626 Tf -151.403 -11.9563 Td (and) Tj /R44 9.9626 Tf 16.7309 0 Td [ (deep) -235.017 (nonlinear) -235.997 (networks) ] TJ /R40 9.9626 Tf 100.562 0 Td [ (with) -234.995 (only) -236.015 (the) -235.01 <5265637469026564> -234.985 (Linear) ] TJ -117.293 -11.9547 Td [ (Unit) -304.986 (\050ReLU\051) -304.998 (function) -306 (in) -304.996 (the) -304.998 (hidden) -306.003 (layers) -305.003 (are) -304.979 (free) -304.983 (of) -305.988 (sub\055) ] TJ 11.9551 TL T* [ (optimal) -323.985 (local) -324.015 (minima\056) -531.007 (The) -323.98 (attempted) -323.995 (technique) -324.005 (is) -324.01 (not) -324.005 (ap\055) ] TJ T* [ (plicable) -332.984 (for) -331.986 (analysing) ] TJ /R44 9.9626 Tf 91.2828 0 Td [ (deep) -333.003 (nonlinear) -331.984 (networks) ] TJ /R40 9.9626 Tf 103.468 0 Td [ (with) -331.999 (other) ] TJ -194.751 -11.9551 Td [ (acti) 24.9811 (v) 24.9811 (ation) -254.997 (functions\054) -256.001 (e\056g\056) -326.019 (the) ] TJ /R44 9.9626 Tf 115.805 0 Td (Sigmoid) Tj /R40 9.9626 Tf 35.4684 0 Td [ (and) -255.011 (the) ] TJ /R44 9.9626 Tf 31.6438 0 Td (SoftSign) Tj /R40 9.9626 Tf 33.2152 0 Td [ (\056) -325 (Re\055) ] TJ -216.132 -11.9551 Td [ (cent) -291.988 (w) 10 (ork) -293.003 (in) -292.017 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 365.639 152.731 Tm (28) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R40 9.9626 Tf 1 0 0 1 375.602 152.731 Tm [ (\135) -293.012 (pro) 14.9852 (v) 14.9828 (es) -291.988 (that) -292.015 (all) -293 (local) -292.02 (minima) -291.99 (are) -292.981 (globally) ] TJ -66.7395 -11.9559 Td [ (minimal) -382.988 (for) -382.997 (e) 15.0122 (xact) -382.992 (learning) -383.015 (with) -383.01 (wide) -382.985 (MLPs\054) -416.989 (if) -382.99 (the) -382.985 (num\055) ] TJ 11.9551 TL T* [ (ber) -307.99 (of) -307.005 (units) -307.998 (in) -306.995 (a) -307.993 (hidden) -306.986 (layer) -307.988 (of) -307.008 (the) -308.017 (netw) 10.0081 (ork) -307.983 (is) -306.993 (lar) 17.997 (ger) -307.993 (than) ] TJ T* [ (the) -354.992 (number) -355.019 (of) -354.997 (training) -353.99 (samples) -355.009 (and) -354.995 (the) -354.99 (netw) 10.0081 (ork) -354.995 (structure) ] TJ T* [ (from) -191.017 (that) -189.993 (layer) -190.988 (on) -189.998 (is) -191.015 (p) 10.0032 (yramidal\056) -290 (Unfortunately) 65.0039 (\054) -202 (the) -191.017 (deplo) 10.0179 (yed) ] TJ T* [ (techniques) -216.015 (can) -214.986 (neither) -215.991 (e) 15.0122 (xclude) -215.008 (the) -215.993 (possibility) -214.994 (of) -216.003 (suboptimal) ] TJ T* [ (local) -287.001 (minima) -287.011 (of) -287.008 (lo) 24.9885 (w) -285.989 (rank\054) -295.985 (nor) -287.006 (be) -287.011 (applied) -286.991 (to) -287.001 (narro) 25.0154 (w) -287.006 (MLPs\056) ] TJ ET Q Q Q q q 1 1 1 rg /a0 gs 48.406 786.422 515.188 -52.699 re f q /s5 gs /x6 Do Q q /s7 gs /x8 Do Q q /s9 gs /x10 Do Q q /s11 gs /x12 Do Q Q Q Q q 1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET 1 1 1 rg n 270 32 72 14 re f* 0.5 0.5 0.5 rg BT /F2 9 Tf 10.8 TL ET BT 1 0 0 1 299.25 35 Tm (811) Tj T* ET Q endstream endobj 15 0 obj << /Filter /FlateDecode /Resources << /ExtGState << /a0 << /CA 1 /ca 1 >> >> /XObject << /x18 16 0 R >> >> /Length 28 /Group << /Type /Group /S /Transparency /CS /DeviceRGB /I true >> /BBox [ 78 746 96 765 ] /Type /XObject /Subtype /Form >> stream x+O4PH/VЯ0Pp 0 endstream endobj 16 0 obj << /Filter /FlateDecode /Resources 17 0 R /Length 107 /Type /XObject /BBox [ 78 746 96 765 ] /Subtype /Form >> stream xe AC̬wʠ =p,?]%+H-
Jc "82w8VSnGW;"
endstream
endobj
17 0 obj
<<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
>>
endobj
18 0 obj
<<
/Filter /FlateDecode
/Resources <<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
/XObject <<
/x15 19 0 R
>>
>>
/Length 28
/Group <<
/Type /Group
/S /Transparency
/CS /DeviceRGB
/I true
>>
/BBox [ 67 752 84 775 ]
/Type /XObject
/Subtype /Form
>>
stream
x+O4PH/VЯ04Up
0
endstream
endobj
19 0 obj
<<
/Filter /FlateDecode
/Resources 20 0 R
/Length 228
/Type /XObject
/BBox [ 67 752 84 775 ]
/Subtype /Form
>>
stream
xeQKn!s ?FPav6R٪TS.
b];15YyR
{7QL.\:Rv/x9l+L7h%1!}i/AI(kz"U&,YO![R hg{3}4/GyYF:!w}Gn+'xJcO9i뽼_-:`
endstream
endobj
20 0 obj
<<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
>>
endobj
21 0 obj
<<
/Filter /FlateDecode
/Resources <<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
/XObject <<
/x24 22 0 R
>>
>>
/Length 28
/Group <<
/Type /Group
/S /Transparency
/CS /DeviceRGB
/I true
>>
/BBox [ 132 751 480 772 ]
/Type /XObject
/Subtype /Form
>>
stream
x+O4PH/VЯ02Qp
0
endstream
endobj
22 0 obj
<<
/Filter /FlateDecode
/Resources 23 0 R
/Length 53223
/Type /XObject
/BBox [ 132 751 480 772 ]
/Subtype /Form
>>
stream
xtI:6%Q㨈?7rA= u%6 ?Y(WbWo{B>9
x`Znϳ|8{3?0x*z ǃ|,@:w>`c|*ϻⳅKO3`g
:_|}}><.6`Z{{3]#<_o"~:ͺgk7/Ұ@|K yp ]03ʷCmş8˽Y?>(3!Bwqs.Z8,~~=rMT̩y+/*w: uBZ_`ߵp`%M?ɝ1ɳw=vDۉy&xb4Q>d@ sg~lA