%PDF-1.3 1 0 obj << /Kids [ 3 0 R 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R ] /Type /Pages /Count 9 >> endobj 2 0 obj << /Title (BPGrad\072 Towards Global Optimality in Deep Learning via Branch and Pruning) /Producer (PyPDF2) /Author (Ziming Zhang\054 Yuanwei Wu\054 Guanghui Wang) /Subject (2018 IEEE Conference on Computer Vision and Pattern Recognition) >> endobj 3 0 obj << /Parent 1 0 R /Rotate 0 /Contents 13 0 R /Resources << /XObject << /x8 14 0 R /R48 17 0 R /R47 18 0 R /x6 19 0 R /x12 22 0 R /x10 25 0 R >> /ExtGState << /s9 28 0 R /s11 31 0 R /a0 << /CA 1 /ca 1 >> /s5 34 0 R /s7 37 0 R /R31 40 0 R >> /Font << /F2 41 0 R /F1 42 0 R /R38 43 0 R /R36 46 0 R /R34 51 0 R /R44 55 0 R /R32 59 0 R /R42 63 0 R /R40 67 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] >> /Group 71 0 R /MediaBox [ 0 0 612 792 ] /Annots [ 72 0 R 73 0 R 74 0 R 75 0 R 76 0 R 77 0 R 78 0 R 79 0 R 80 0 R 81 0 R 82 0 R 83 0 R 84 0 R 85 0 R 86 0 R 87 0 R 88 0 R 89 0 R 90 0 R 91 0 R 92 0 R ] /Type /Page >> endobj 4 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 93 0 R /Resources << /XObject << /R74 94 0 R >> /ExtGState << /R31 40 0 R >> /Font << /R42 63 0 R /R75 96 0 R /F2 99 0 R /F1 100 0 R /R34 51 0 R /R32 59 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] /ColorSpace << /R73 95 0 R >> >> /MediaBox [ 0 0 612 792 ] /Annots [ 101 0 R 102 0 R 103 0 R 104 0 R 105 0 R 106 0 R 107 0 R 108 0 R 109 0 R 110 0 R 111 0 R 112 0 R 113 0 R 114 0 R 115 0 R 116 0 R 117 0 R 118 0 R ] >> endobj 5 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 119 0 R /Resources << /ExtGState << /R31 40 0 R >> /Font << /R42 63 0 R /R98 120 0 R /R124 123 0 R /R122 126 0 R /R120 129 0 R /R108 133 0 R /R118 138 0 R /R112 142 0 R /R100 145 0 R /R110 150 0 R /R102 155 0 R /R116 158 0 R /R104 161 0 R /R114 166 0 R /R106 170 0 R /F2 174 0 R /F1 175 0 R /R36 46 0 R /R34 51 0 R /R32 59 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 176 0 R 177 0 R 178 0 R 179 0 R 180 0 R 181 0 R 182 0 R 183 0 R 184 0 R 185 0 R 186 0 R 187 0 R 188 0 R 189 0 R ] >> endobj 6 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 190 0 R /Resources << /ExtGState << /R31 40 0 R >> /Font << /R42 63 0 R /R98 120 0 R /F1 191 0 R /R145 192 0 R /R108 133 0 R /R118 138 0 R /R141 196 0 R /R143 200 0 R /R112 142 0 R /R100 145 0 R /R110 150 0 R /R116 158 0 R /R104 161 0 R /R114 166 0 R /R106 170 0 R /F2 204 0 R /R34 51 0 R /R32 59 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 205 0 R 206 0 R 207 0 R 208 0 R 209 0 R 210 0 R 211 0 R 212 0 R 213 0 R 214 0 R 215 0 R ] >> endobj 7 0 obj << /Parent 1 0 R /Rotate 0 /Contents 216 0 R /Resources << /XObject << /R176 217 0 R /R177 218 0 R /R178 219 0 R /R179 220 0 R >> /ExtGState << /R31 40 0 R >> /Font << /R180 221 0 R /R182 224 0 R /R44 55 0 R /R42 63 0 R /R98 120 0 R /R124 123 0 R /R120 129 0 R /R108 133 0 R /R118 138 0 R /R141 196 0 R /R143 200 0 R /R112 142 0 R /R100 145 0 R /R110 150 0 R /R116 158 0 R /R104 161 0 R /R114 166 0 R /R106 170 0 R /F2 227 0 R /F1 228 0 R /R36 46 0 R /R34 51 0 R /R32 59 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] >> /Group 71 0 R /MediaBox [ 0 0 612 792 ] /Annots [ 229 0 R 230 0 R 231 0 R 232 0 R 233 0 R 234 0 R 235 0 R 236 0 R 237 0 R 238 0 R 239 0 R 240 0 R 241 0 R 242 0 R 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R 251 0 R 252 0 R ] /Type /Page >> endobj 8 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 253 0 R /Resources << /ExtGState << /R31 40 0 R >> /Font << /R210 254 0 R /R212 257 0 R /R214 260 0 R /R216 263 0 R /R180 221 0 R /R218 266 0 R /R222 269 0 R /R224 272 0 R /R42 63 0 R /R226 275 0 R /R75 96 0 R /R98 120 0 R /R124 123 0 R /R120 129 0 R /R108 133 0 R /R100 145 0 R /R104 161 0 R /R106 170 0 R /F2 278 0 R /R220 279 0 R /F1 282 0 R /R34 51 0 R /R32 59 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 283 0 R 284 0 R 285 0 R 286 0 R 287 0 R 288 0 R 289 0 R 290 0 R 291 0 R 292 0 R 293 0 R 294 0 R 295 0 R 296 0 R 297 0 R 298 0 R 299 0 R 300 0 R 301 0 R 302 0 R ] >> endobj 9 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 303 0 R /Resources << /ExtGState << /R31 40 0 R >> /Font << /R108 133 0 R /R244 304 0 R /R106 170 0 R /F2 307 0 R /F1 308 0 R /R122 126 0 R /R258 309 0 R /R248 312 0 R /R120 129 0 R /R246 315 0 R /R254 318 0 R /R34 51 0 R /R256 321 0 R /R32 59 0 R /R250 324 0 R /R252 327 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 330 0 R 331 0 R 332 0 R 333 0 R 334 0 R 335 0 R 336 0 R 337 0 R 338 0 R 339 0 R 340 0 R ] >> endobj 10 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 341 0 R /Resources << /ExtGState << /R31 40 0 R >> /Font << /R108 133 0 R /R277 342 0 R /R110 150 0 R /R42 63 0 R /R114 166 0 R /R106 170 0 R /F2 345 0 R /F1 346 0 R /R122 126 0 R /R273 347 0 R /R34 51 0 R /R32 59 0 R /R271 350 0 R /R275 353 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 356 0 R 357 0 R 358 0 R 359 0 R 360 0 R 361 0 R ] >> endobj 11 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 362 0 R /Resources << /ExtGState << /R31 40 0 R >> /Font << /R42 63 0 R /F2 363 0 R /F1 364 0 R /R38 43 0 R /R34 51 0 R /R32 59 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 365 0 R 366 0 R 367 0 R 368 0 R 369 0 R 370 0 R 371 0 R 372 0 R 373 0 R 374 0 R 375 0 R 376 0 R 377 0 R 378 0 R 379 0 R 380 0 R 381 0 R 382 0 R 383 0 R 384 0 R 385 0 R 386 0 R 387 0 R 388 0 R 389 0 R 390 0 R 391 0 R 392 0 R 393 0 R 394 0 R 395 0 R 396 0 R 397 0 R 398 0 R 399 0 R 400 0 R 401 0 R 402 0 R 403 0 R 404 0 R 405 0 R 406 0 R 407 0 R 408 0 R 409 0 R 410 0 R 411 0 R 412 0 R 413 0 R 414 0 R 415 0 R 416 0 R 417 0 R 418 0 R 419 0 R 420 0 R ] >> endobj 12 0 obj << /Type /Catalog /Pages 1 0 R >> endobj 13 0 obj << /Length 20950 >> stream q q q 0.1 0 0 0.1 0 0 cm /R31 gs 0 g q 10 0 0 10 0 0 cm BT /R32 14.3462 Tf 1 0 0 1 52.5102 675.067 Tm [ (BPGrad\072) -310.011 (T) 91.9897 (o) 9.99625 (wards) -250.003 (Global) -250.013 (Optimality) -249.998 (in) -249.993 (Deep) -250.008 (Lear) 14.9893 (ning) -249.993 (via) -250.008 (Branch) -250.008 (and) -249.991 (Pruning) ] TJ /R34 11.9552 Tf 90.8027 -37.8582 Td [ (Ziming) -250 (Zhang) ] TJ /R36 7.9701 Tf 68.7422 4.33906 Td [ <03> -469.32 (y) -0.19911 ] TJ /R34 11.9552 Tf -131.154 -18.2859 Td [ (Mitsubishi) -249.985 (Electric) -250.005 (Research) -250.006 (Laboratories) ] TJ -6.36719 -13.948 Td [ (201) -249.99 (Broadw) 10.0115 (ay) 65.002 (\054) -250.012 (Cambridge\054) -250.012 (MA) -249.983 (02139\0551955) ] TJ /R38 8.9664 Tf 66.7871 -13.948 Td (zzhang\100merl\056com) Tj /R34 11.9552 Tf 204.352 41.843 Td [ (Y) 110.996 (uanwei) -249.985 (W) 50.0056 (u) ] TJ /R36 7.9701 Tf 61.4848 4.33906 Td [ <03> -0.30019 ] TJ /R34 11.9552 Tf 4.33906 TL T* [ (\054) -250.01 (Guanghui) -250.01 (W) 79.9984 (ang) ] TJ -67.5098 -13.948 Td [ (EECS\054) -250.012 (The) -250.014 (Uni) 24.9957 (v) 14.9851 (ersity) -249.989 (of) -250.014 (Kansas) ] TJ -23.7973 -13.9469 Td [ (1450) -250.002 (Jayha) 15.0076 (wk) -250 (Blvd\056\054) -249.989 (La) 14.9851 (wrence\054) -249.993 (KS) -250.002 (66045) ] TJ /R40 8.9664 Tf 35.9441 -13.948 Td [ (f) -0.90126 ] TJ /R38 8.9664 Tf 4.6082 0 Td [ (y262w558\054) -600.005 (ghwang) ] TJ /R40 8.9664 Tf 86.077 0 Td [ (g) -0.89854 ] TJ /R38 8.9664 Tf 4.60703 0 Td (\100ku\056edu) Tj /R32 11.9552 Tf -301.092 -41.0461 Td (Abstract) Tj /R42 9.9626 Tf 1.02 0 0 1 62.0672 529.569 Tm [ (Under) 10.0191 (standing) -366.013 (the) -365.009 (global) -366.004 (optimality) -364.984 (in) -365.983 (deep) -364.989 (learning) ] TJ 1.015 0 0 1 49.4441 517.614 Tm [ (\050DL\051) -247 (has) -247.006 (been) -246.986 (attr) 14 (acting) -246.997 (mor) 36.994 (e) -247.008 (and) -247.994 (mor) 36.9952 (e) -247.008 (attention) -247.018 (r) 36.0029 (ecently) 53.985 (\056) ] TJ 0.986 0 0 1 49.7828 505.658 Tm [ (Con) 41.0135 (ventional) -254.984 (DL) -254.019 (solver) 10.0111 (s\054) -255.016 (howe) 14.9805 (ver) 113.013 (\054) -254.986 (have) -253.99 (not) -255.014 (been) -253.99 (de) 14.9892 (veloped) ] TJ 1.02 0 0 1 50.1121 493.703 Tm [ (intentionally) -261.99 (to) -262.003 (seek) -261.983 (for) -261.98 (suc) 13.9971 (h) -262.003 (global) -261.984 (optimality) 54.0159 (\056) -355.013 (In) -262.012 (this) -261.98 (pa\055) ] TJ 1.016 0 0 1 50.1121 481.748 Tm [ (per) -246.017 (we) -246 (pr) 43.9969 (opose) -246.005 (a) -246.016 (no) 10.0073 (vel) -246.995 (appr) 45.0124 (oximation) -246.01 (algorithm\054) ] TJ /R34 9.9626 Tf 199.656 0 Td (BPGrad) Tj /R42 9.9626 Tf 32.0993 0 Td (\054) Tj 1.02 0 0 1 50.1121 469.793 Tm [ (towar) 35.9982 (ds) -329.981 (optimizing) -330.988 (deep) -331.007 (models) -329.986 (globally) -331.01 (via) -329.991 (br) 13.9827 (anc) 15.0182 (h) -331.003 (and) ] TJ 0.982 0 0 1 50.1121 457.838 Tm [ (pruning) 15.016 (\056) -315.019 (Our) -253.99 (BPGr) 14.9892 (ad) -254.013 (algorithm) -255.01 (is) -254.016 (based) -254.006 (on) -254.013 (the) -254.013 (assumption) ] TJ 1.02 0 0 1 50.1121 445.883 Tm [ (of) -296.984 (Lipsc) 14.9947 (hitz) -297 (continuity) -297.014 (in) -296.984 (DL\054) -296.994 (and) -297.009 (as) -297 (a) -296.983 (r) 36.0018 (esult) -296.988 (i) 1 (t) -296.99 (can) -297.002 (adap\055) ] TJ 1.006 0 0 1 50.1121 433.927 Tm [ (tively) -248.987 (determine) -247.982 (the) -249.006 (step) -249.018 (size) -248.002 (for) -249.011 (curr) 36.9815 (ent) -249.006 (gr) 15.0177 (adient) -249.006 (given) -248.012 (the) ] TJ 0.98 0 0 1 50.1121 421.972 Tm [ (history) -240.984 (of) -240.997 (pr) 38.0014 (e) 15.98 (vious) -240.989 (updates\054) -244 (wher) 37.9826 (ein) -240.981 (theor) 37.9964 (etically) -240.984 (no) -241.009 (smaller) ] TJ 11.9547 TL T* [ (steps) -253.005 (can) -251.996 (ac) 14.9835 (hie) 14.9985 (ve) -251.986 (the) -252.984 (global) -252.015 (optimality) 55.9868 (\056) -316.012 (W) 93.9869 (e) -253.001 (pr) 46.0032 (o) 11.0051 (ve) -252.986 (that\054) -252.999 (by) -252.999 (r) 37.9989 (e\055) ] TJ 1 0 0 1 50.1121 398.062 Tm [ (peating) -248.987 (suc) 14.9846 (h) -247.993 (br) 14.9889 (anc) 14.984 (h\055and\055pruning) -249.005 (pr) 44.9839 (ocedur) 36.9865 (e) 9.99343 (\054) -249.015 (we) -249.007 (can) -249.012 (locate) ] TJ 0.99 0 0 1 50.1121 386.107 Tm [ (the) -251.987 (global) -252.999 (optimality) -252.015 (within) -252.987 <026e697465> -251.991 (iter) 15.0166 (ations\056) -313.015 (Empirically) -252.018 (an) ] TJ 0.98 0 0 1 50.1121 374.152 Tm [ (ef) 17.9961 <026369656e74> -253.989 (solver) -253.989 (based) -254.003 (on) -254.013 (BPGr) 16.02 (ad) -254.013 (for) -254.012 (DL) -253.997 (is) -254.014 (pr) 46.0032 (oposed) -254 (as) -253.987 (well\054) ] TJ 1.02 0 0 1 50.1121 362.196 Tm [ (and) -322.994 (it) -324.015 (outperforms) -323.011 (con) 39.0061 (ventional) -323 (DL) -324.002 (solver) 9.99689 (s) -322.993 (suc) 13.9971 (h) -323.007 (as) -322.985 (Ada\055) ] TJ 1.013 0 0 1 50.1121 350.241 Tm [ (gr) 15.0191 (ad\054) -246.992 (Adadelta\054) -246.013 (RMSPr) 43.9942 (op\054) -246.992 (and) -245.987 (Adam) -246.985 (in) -247.012 (the) -246 (tasks) -246.997 (of) -246.005 (object) ] TJ 1 0 0 1 50.1121 338.286 Tm [ (r) 37.0196 (eco) 9.99466 (gnition\054) -249.993 (detection\054) -250.011 (and) -249.983 (se) 39.9946 (gmentation\056) ] TJ /R32 11.9552 Tf 38.3879 TL T* [ (1\056) -249.99 (Intr) 18.0146 (oduction) ] TJ /R34 9.9626 Tf 1.008 0 0 1 62.0672 280.128 Tm [ (Deep) -247.995 (learning) -248.011 (\050DL\051) -247.007 (has) -248.007 (been) -248.004 (demonstrated) -248.004 (successfully) ] TJ 1.02 0 0 1 50.1121 268.173 Tm [ (in) -314.013 (man) 14.0187 (y) -314.013 (dif) 24.9838 (ferent) -314.002 (research) -313.989 (areas) -315 (such) -314.008 (as) -313.983 (image) -314.017 <636c617373690263612d> ] TJ 0.998 0 0 1 50.1121 256.218 Tm [ (tion) -251.982 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 71.3949 256.218 Tm (20) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.998 0 0 1 81.357 256.218 Tm [ (\135\054) -251.982 (speech) -250.998 (recognition) -252.01 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 170.313 256.218 Tm (16) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.998 0 0 1 180.275 256.218 Tm [ (\135) -251.994 (and) -250.99 (natural) -252.004 (language) -251 (pro\055) ] TJ 1.001 0 0 1 50.1121 244.263 Tm [ (cessing) -249.998 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 85.2871 244.263 Tm (32) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1.001 0 0 1 95.25 244.263 Tm [ (\135\056) -311.016 (In) -250.011 (general\054) -250.98 (its) -249.997 (empirical) -249.986 (success) -251.011 (ste) 1.00128 (ms) -250.991 (mainly) ] TJ 0.98 0 0 1 50.1121 232.308 Tm [ (from) -244.99 (better) -245.988 (netw) 9.98858 (ork) -245.008 (architectures) -245.008 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 185.934 232.308 Tm (15) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 195.896 232.308 Tm [ (\135\054) -246.994 (lar) 18 (ger) -244.995 (mount) -246.012 (of) -245.012 (train\055) ] TJ 1 0 0 1 50.1121 220.352 Tm [ (ing) -250.02 (data) -249.994 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 87.741 220.352 Tm (6) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 92.7223 220.352 Tm [ (\135\054) -249.998 (and) -249.991 (better) -249.99 (learning) -250.02 (algorithms) -249.99 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 226.639 220.352 Tm (12) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 236.602 220.352 Tm (\135\056) Tj 0.99 0 0 1 62.0672 207.556 Tm [ (Ho) 25.0119 (we) 25.0138 (v) 15.985 (er) 39.987 (\054) -252.986 (theoretical) -252.01 (understanding) -252.997 (of) -253.015 (DL) -252.991 (for) -251.986 (its) -253.018 (success) ] TJ 0.98 0 0 1 50.1121 195.601 Tm [ (still) -236.005 (remains) -236.993 (elusi) 26.0061 (v) 15.0072 (e\056) -309.994 (V) 113.018 (ery) -235.993 (recently) -236.006 (researchers) -236.983 (start) -236.018 (to) -235.998 (under) 19.9947 (\055) ] TJ 1.02 0 0 1 50.1121 183.646 Tm [ (stand) -261 (DL) -261.006 (from) -260.015 (the) -260.989 (perspecti) 24.0168 (v) 14.9977 (e) -260.995 (of) -260.013 (optimization) -260.991 (such) -260.998 (as) -261.012 (the) ] TJ 0.98 0 0 1 50.1121 171.691 Tm [ (optimality) -229.012 (of) -228.008 (learned) -229.02 (models) -228.007 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 167.51 171.691 Tm (13) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 177.473 171.691 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 182.146 171.691 Tm (14) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 192.109 171.691 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 196.782 171.691 Tm (36) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 206.744 171.691 Tm [ (\135\056) -308.007 (It) -227.996 (has) -229.003 (been) -228.013 (pro) 15.0135 (v) 15.006 (ed) ] TJ 1.02 0 0 1 50.1121 159.736 Tm [ (that) -314 (under) -313.994 (certain) -313.018 (\050v) 13.9899 (ery) -314.007 (restrict) 0.99224 (i) 23.9868 (v) 14.9977 (e\051) -314.013 (conditions) -313.99 (the) -314 (critical) ] TJ 11.9551 TL T* [ (points) -252.012 (learned) -252.008 (for) -251.987 (the) -250.995 (deep) -252.012 (models) -251.998 (actually) -252 (achie) 24.9958 (v) 14 (e) -252 (global) ] TJ 0.996 0 0 1 50.1121 135.825 Tm [ (optimality) 64.9943 (\054) -249.995 (e) 24.9818 (v) 15.0036 (en) -249.005 (though) -250.001 (the) -249.996 (optimization) -249.99 (in) -248.996 (deep) -250 (learning) -250.012 (is) ] TJ 1.02 0 0 1 50.1121 123.87 Tm [ (highly) -289.008 (noncon) 38.9869 (v) 14.9977 (e) 14.0067 (x\056) -436 (These) -288.985 (theoretical) -290.012 (results) -289.016 (may) -289.006 (partially) ] TJ 1 0 0 1 50.1121 111.915 Tm [ (e) 15.0128 (xplain) -250.012 (wh) 4.98753 (y) -249.993 (such) -249.985 (deep) -249.996 (models) -250.006 (w) 10 (ork) -249.99 (well) -249.985 (in) -249.985 (practice\056) ] TJ ET Q 3.98 w 0 G 501.121 1002.18 m 1446.11 1002.18 l S q 10 0 0 10 0 0 cm BT /R44 5.9776 Tf 1 0 0 1 60.141 93.616 Tm [ <03> -0.90058 ] TJ /R34 7.9701 Tf 4.3168 -2.81289 Td [ (Joint) -250.02 <02727374> -250.017 (authors) -250.017 (for) -249.985 (the) -250.005 (paper) 54.9938 (\056) ] TJ /R44 5.9776 Tf -3.92969 -6.99023 Td [ (y) -0.10006 ] TJ /R34 7.9701 Tf 3.92969 -2.81289 Td [ (Corresponding) -250 (author) 54.9815 (\056) ] TJ ET Q q 1441.14 0 0 599.618 3549.31 5043.22 cm /R48 Do Q q 10 0 0 10 0 0 cm BT /R34 8.9664 Tf 0.98 0 0 1 308.862 494.857 Tm [ (Figure) -254.983 (1\056) ] TJ /R34 7.9701 Tf 34.707 0 Td [ (Illustration) -255.021 (of) -255 (ho) 25.9747 (w) -254.974 (BPGrad) -255.003 (w) 9.98355 (orks\054) -255.015 (where) -255.021 (each) -254.984 (black) -254.009 (dot) -255.003 (denotes) ] TJ 0.993 0 0 1 308.862 485.393 Tm [ (the) -251.991 (solution) -252.016 (at) -252.007 (each) -251.991 (iterations) -252.013 (\050) ] TJ /R42 7.9701 Tf 98.1468 0 Td (i\056e) Tj /R34 7.9701 Tf 7.74759 0 Td [ (\056) -252.01 (branch\051\054) -252.01 (directed) -251.991 (dotted) -251.985 (lines) -251.997 (denote) -253.009 (the) ] TJ 1.02 0 0 1 308.862 475.928 Tm [ (current) -243.987 (gradients\054) -245.011 (and) -244.008 (red) -244.993 (dotted) -244.005 (circles) -245.002 (denote) -244.023 (the) -245.026 (re) 15.0157 (gions) -244.017 (wherein) -245.014 (there) ] TJ 1.006 0 0 1 308.862 466.463 Tm [ (should) -248.004 (be) -248.981 (no) -249.009 (sol) 1.00787 (utions) -249.015 (achie) 25.0233 (ving) -249.003 (global) -247.992 (optimality) -248.984 (\050) ] TJ /R42 7.9701 Tf 165.543 0 Td (i\056e) Tj /R34 7.9701 Tf 7.74688 0 Td [ (\056) -248.978 (pruning\051\056) -308.02 (BPGrad) ] TJ 0.981 0 0 1 308.862 456.999 Tm [ (can) -254.003 (automatically) -254 (estimate) -253.019 (the) -253.984 (scales) -254.012 (of) -253.993 (these) -253.974 (re) 15 (gions) -253.987 (ba) 0.99297 (sed) -254 (on) -254.012 (the) -253.987 (function) ] TJ 1 0 0 1 308.862 447.534 Tm [ (e) 25.0019 (v) 24.9835 (aluation) -250.01 (of) -250.004 (solutions) -249.994 (and) -249.979 (the) -250.007 (Lipschitz) -250.025 (continuity) -249.988 (assumption\056) ] TJ /R34 9.9626 Tf 0.981 0 0 1 320.817 428.168 Tm [ (Global) -254.018 (optimality) -254.005 (is) -253.995 (al) 10 (w) 10.0021 (ays) -253.99 (desirable) -254 (and) -253.985 (preferred) -253.995 (in) -253.99 (op\055) ] TJ 0.98 0 0 1 308.862 416.213 Tm [ (timization\056) -306.997 (Locating) -226.003 (global) -226.998 (optimality) -226.013 (in) -226.993 (deep) -226.013 (learning\054) -232.994 (ho) 26.0111 (w\055) ] TJ 0.991 0 0 1 308.862 404.257 Tm [ (e) 25.0052 (v) 15.0101 (er) 41.0069 (\054) -252.003 (is) -250.992 (e) 14.9952 (xtremely) -252.006 (challenging) -251.007 (due) -252 (to) -251.012 (its) -252.006 (high) -250.982 (non\055con) 40.0103 (v) 15.0101 (e) 14.9952 (xity) 66.0146 (\054) ] TJ 1.02 0 0 1 308.862 392.302 Tm [ (and) -347.013 (thus) -348.005 (no) -346.989 (con) 39.0049 (v) 14.9989 (entional) -346.991 (DL) -346.996 (solv) 13.9875 (ers\054) ] TJ /R42 9.9626 Tf 156.748 0 Td [ (e) 15.0061 (\056g) ] TJ /R34 9.9626 Tf 11.7482 0 Td [ (\056) -347.015 (stochastic) -348.02 (gra\055) ] TJ -168.496 -11.9551 Td [ (dient) -259.984 (descent) -261.01 (\050SGD\051) -260 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 398.078 380.347 Tm (2) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1.02 0 0 1 403.059 380.347 Tm [ (\135\054) -263.994 (Adagrad) -260.995 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 452.689 380.347 Tm (7) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1.02 0 0 1 457.67 380.347 Tm [ (\135\054) -263.994 (Adadelta) -261.02 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 508.997 380.347 Tm (37) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1.02 0 0 1 518.959 380.347 Tm [ (\135\054) -263.994 (RM\055) ] TJ -205.977 -11.9547 Td [ (SProp) -301.985 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 340.16 368.392 Tm (33) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1.02 0 0 1 350.123 368.392 Tm [ (\135) -301.997 (and) -302 (Adam) -301.985 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 405.605 368.392 Tm (18) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1.02 0 0 1 415.568 368.392 Tm [ (\135\054) -316.004 (is) -302.002 (intentionally) -302.007 (de) 25.0054 (v) 13.9971 (eloped) -302.016 (for) ] TJ 1.002 0 0 1 308.862 356.437 Tm [ (this) -249.993 (purpose\054) -250.995 (to) -250.985 (our) -250 (best) -251.005 (kno) 25 (wledge\056) -312.015 (Alternati) 25.0046 (v) 15.0116 (ely) -249.988 (dif) 25 (ferent) ] TJ 0.98 0 0 1 308.862 344.482 Tm [ (re) 14.9835 (gularization) -235.01 (techniques) -235.012 (are) -235.02 (applied) -235.98 (to) -234.995 (smooth) -234.98 (the) -235.02 (objecti) 25.0108 (v) 15.0085 (e) ] TJ 0.987 0 0 1 308.862 332.526 Tm [ (functions) -252.98 (in) -253.99 (DL) -253.005 (so) -253.985 (that) -253.01 (the) -253.012 (solv) 15.0113 (ers) -253.99 (can) -252.982 (con) 41.0117 (v) 14.9914 (er) 18.0056 (ge) -253.012 (to) -253.99 (some) -252.987 (ge\055) ] TJ 0.998 0 0 1 308.862 320.571 Tm [ (ometrically) -250.005 (wider) -251.007 (and) -250.99 <036174746572> -250.012 (re) 15.0128 (gions) -251 (in) -250.002 (the) -251.014 (parameter) -249.983 (space) ] TJ 0.98 0 0 1 308.503 308.616 Tm [ (where) -254.989 (g) -0.99773 (ood) -255.009 (model) -255.002 (solutions) -255.98 (may) -254.979 (e) 15.0185 (xist) -255.995 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 464.878 308.616 Tm (39) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 474.84 308.616 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 479.776 308.616 Tm (4) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 484.757 308.616 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 489.692 308.616 Tm (40) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 499.655 308.616 Tm [ (\135\056) -317.009 (But) -255.995 (these) ] TJ 1 0 0 1 308.862 296.661 Tm [ (solutions) -249.983 (may) -249.985 (not) -250.02 (necessarily) -249.983 (be) -249.997 (the) -249.988 (global) -250.012 (optimum\056) ] TJ 0.995 0 0 1 320.817 284.472 Tm [ (Inspired) -251.98 (by) -252.012 (the) -253.012 (techniques) -251.993 (in) -251.998 (global) -252.007 (optimization) -252.012 (of) -252.993 (non\055) ] TJ 1.02 0 0 1 308.862 272.517 Tm [ (con) 39.0049 (v) 14.9989 (e) 15.0061 (x) -252.008 (functions\054) -253.017 (we) -251.986 (propose) -252 (a) -251 (no) 14.0067 (v) 14.9965 (el) -252 (approximation) -250.991 (algo\055) ] TJ 11.9563 TL (rithm\054) ' /R42 9.9626 Tf 28.2686 0 Td [ (BPGr) 15.0085 (ad) ] TJ /R34 9.9626 Tf 33.0587 0 Td [ (\054) -421.015 (whi) 1.01867 (ch) -385.996 (has) -385.014 (the) -385.999 (ability) -384.994 (of) -385.979 (locati) 0.99464 (ng) -386.003 (global) ] TJ -61.3274 -11.9551 Td [ (optimality) -285 (in) -284.989 (DL) -285.991 (via) -285.016 (branch) -284.997 (and) -285.006 (pruning) -286.011 (\050BP\051\056) -285.011 (BP) -285.016 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 522.089 248.606 Tm (29) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1.02 0 0 1 532.052 248.606 Tm [ (\135) -285.006 (is) ] TJ 1.007 0 0 1 308.862 236.651 Tm [ (a) -247.006 (well\055kno) 25.0095 (wn) -247 (algorithm) -247 (de) 25.0168 (v) 13.9928 (eloped) -247.004 (for) -247.009 (searching) -247 (for) -247.009 (global) ] TJ 1.02 0 0 1 308.862 224.696 Tm [ (solutions) -311.989 (for) -312.991 (noncon) 38.9857 (v) 14.9989 (e) 15.0061 (x) -313.012 (optimization) -312 (problems\056) -505.99 (Its) -313.005 (basic) ] TJ 11.9551 TL T* [ (idea) -245.985 (is) -245.997 (t) 1 (o) -246.011 (ef) 23.9916 (f) 0.98984 (ecti) 24.0132 (v) 14.9989 (ely) -245.999 (and) -245.992 (gradually) -245.014 (shrink) -245.985 (the) -245.999 (g) 5.00205 (ap) -245.999 (between) ] TJ 0.98 0 0 1 308.862 200.786 Tm [ (the) -241.981 (lo) 25.9836 (wer) -242.006 (and) -240.994 (upper) -241.986 (bounds) -242.011 (of) -241.011 (global) -242.001 (optimum) -242.016 (by) -241.011 (ef) 24.9858 <026369656e746c79> ] TJ 11.9559 TL T* [ (branching) -237.988 (and) -238.993 (pruning) -237.98 (the) -238.981 (parameter) -237.985 (space\056) -311.008 (Fig\056) ] TJ ET Q 1 0 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 499.93 188.83 Tm (1) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 507.145 188.83 Tm (illustrates) Tj 1 0 0 1 308.862 176.875 Tm [ (the) -249.99 (optimization) -250.017 (procedure) -249.997 (in) -249.988 (BPGrad\056) ] TJ 0.98 0 0 1 320.817 164.686 Tm [ (In) -230.009 (order) -230.986 (to) -229.996 (branch) -231.009 (and) -229.991 (prune) -230.984 (the) -230.019 (space) -231.014 (we) -230.004 (assume) -230.979 (that) -230.004 (the) ] TJ -12.1991 -11.9551 Td [ (objecti) 26.0111 (v) 15.006 (e) -193.988 (functions) -194.993 (in) -193.988 (DL) -193.995 (are) -194.01 (Lipschitz) -194.995 (continuous) -193.99 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1 0 0 1 508.842 152.731 Tm (8) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 0.98 0 0 1 513.824 152.731 Tm [ (\135\054) -205.983 (or) -195 (can) ] TJ 1.017 0 0 1 308.862 140.776 Tm [ (be) -246 (approximated) -247.009 (by) -246.019 (Lipschitz) -245.987 (functions\056) -307.003 (This) -245.992 (is) -245.992 (moti) 24.019 (v) 25.0166 (ated) ] TJ 1.013 0 0 1 308.862 128.821 Tm [ (by) -245.984 (the) -246 (f) 9.99823 (acts) -245.989 (that) -246.995 (\0501\051) -245.993 (Lipschitz) -245.981 (continuity) -246.015 (pro) 14.984 (vides) -246 (a) -246.01 (natural) ] TJ 1.02 0 0 1 308.503 116.866 Tm [ (w) 10.0089 (ay) -288.012 (to) -287.988 (estimate) -287.003 (the) -288.014 (lo) 23.994 (wer) -287.015 (and) -288.005 (upper) -288.009 (bounds) -288.019 (of) -286.996 (the) -288.014 (global) ] TJ 0.35195 -11.9559 Td [ (optimum) -330.018 (\050see) -330.008 (Sec\056) ] TJ ET Q 1 0 0 rg q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1.02 0 0 1 391.529 104.91 Tm (2\0563\0561) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R34 9.9626 Tf 1.02 0 0 1 411.853 104.91 Tm [ (\051) -330.02 (used) -330 (in) -330.005 (BP) 108.998 (\054) -329.986 (and) -329.981 (\0502\051) -330.981 (it) -330.01 (can) -330.015 (also) ] TJ 0.99 0 0 1 308.862 92.9551 Tm [ (serv) 15.0178 (e) -253.013 (as) -254.005 (re) 14.9955 (gularization\054) -253.996 (if) -252.988 (needed\054) -253.983 (to) -253.993 (smoothe) 1.01488 (n) -254.003 (the) -254.008 (objecti) 25.9959 (v) 14.9955 (e) ] TJ 1 0 0 1 308.862 81 Tm [ (functions) -250.005 (so) -249.985 (that) -249.983 (the) -249.99 (returned) -249.993 (solutions) -249.983 (can) -250.002 (generalize) -249.997 (well\056) ] TJ ET Q Q Q q q 1 1 1 rg /a0 gs 48.406 786.422 515.188 -52.699 re f q /s5 gs /x6 Do Q q /s7 gs /x8 Do Q q /s9 gs /x10 Do Q q /s11 gs /x12 Do Q Q Q Q q 1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET 1 1 1 rg n 270 32 72 14 re f* 0.5 0.5 0.5 rg BT /F2 9 Tf 10.8 TL ET BT 1 0 0 1 297 35 Tm (3301) Tj T* ET Q endstream endobj 14 0 obj << /Filter /FlateDecode /Resources << /ExtGState << /a0 << /CA 1 /ca 1 >> >> /XObject << /x18 15 0 R >> >> /Length 28 /Group << /Type /Group /S /Transparency /CS /DeviceRGB /I true >> /BBox [ 78 746 96 765 ] /Type /XObject /Subtype /Form >> stream x+O4PH/VЯ0Pp 0 endstream endobj 15 0 obj << /Filter /FlateDecode /Resources 16 0 R /Length 107 /Type /XObject /BBox [ 78 746 96 765 ] /Subtype /Form >> stream xe AC̬wʠ =p,?]%+H-
Jc "82w8VSnGW;" endstream endobj 16 0 obj << /ExtGState << /a0 << /CA 1 /ca 1 >> >> >> endobj 17 0 obj << /SMask 18 0 R /Filter /DCTDecode /BitsPerComponent 8 /Height 233 /Length 16647 /ColorSpace /DeviceRGB /Width 560 /Subtype /Image >> stream Adobe d C $, !$4.763.22:ASF:=N>22HbINVX]^]8EfmeZlS[]Y C**Y;2;YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY 0" } !1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w !1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ? *;w2[ՔT QE QE QE QE QE QE QQ