%PDF-1.3 1 0 obj << /Kids [ 3 0 R 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R ] /Type /Pages /Count 9 >> endobj 2 0 obj << /Title (Fast Algorithms for Convolutional Neural Networks) /Producer (PyPDF2) /Author (Andrew Lavin\054 Scott Gray) /Subject (2016 IEEE Conference on Computer Vision and Pattern Recognition) >> endobj 3 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 13 0 R /Resources << /XObject << /x8 14 0 R /x6 17 0 R /x12 20 0 R /x10 23 0 R >> /ExtGState << /s9 26 0 R /s11 29 0 R /a0 << /CA 1 /ca 1 >> /R22 32 0 R /s5 33 0 R /s7 36 0 R >> /Font << /R29 39 0 R /F2 43 0 R /F1 44 0 R /R23 45 0 R /R25 48 0 R /R33 52 0 R /R27 57 0 R /R31 60 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 63 0 R 64 0 R 65 0 R 66 0 R 67 0 R 68 0 R 69 0 R 70 0 R 71 0 R 72 0 R 73 0 R 74 0 R 75 0 R 76 0 R ] >> endobj 4 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 77 0 R /Resources << /ExtGState << /R22 32 0 R >> /Font << /R49 78 0 R /R47 81 0 R /R45 86 0 R /R43 90 0 R /F2 94 0 R /F1 95 0 R /R23 45 0 R /R25 48 0 R /R33 52 0 R /R31 60 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 96 0 R 97 0 R 98 0 R ] >> endobj 5 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 99 0 R /Resources << /ExtGState << /R22 32 0 R >> /Font << /R49 78 0 R /R47 81 0 R /R45 86 0 R /R43 90 0 R /F2 100 0 R /R64 101 0 R /F1 106 0 R /R60 107 0 R /R62 111 0 R /R23 45 0 R /R25 48 0 R /R33 52 0 R /R31 60 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 114 0 R 115 0 R 116 0 R 117 0 R ] >> endobj 6 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 118 0 R /Resources << /ExtGState << /R22 32 0 R >> /Font << /R49 78 0 R /R47 81 0 R /R45 86 0 R /R43 90 0 R /F2 119 0 R /F1 120 0 R /R23 45 0 R /R25 48 0 R /R33 52 0 R /R31 60 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 121 0 R 122 0 R 123 0 R 124 0 R 125 0 R 126 0 R 127 0 R 128 0 R 129 0 R ] >> endobj 7 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 130 0 R /Resources << /ExtGState << /R22 32 0 R >> /Font << /F2 131 0 R /R49 78 0 R /R45 86 0 R /R43 90 0 R /R29 39 0 R /R64 101 0 R /R89 132 0 R /F1 136 0 R /R91 137 0 R /R23 45 0 R /R25 48 0 R /R33 52 0 R /R31 60 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 142 0 R 143 0 R 144 0 R 145 0 R ] >> endobj 8 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 146 0 R /Resources << /ExtGState << /R22 32 0 R >> /Font << /F2 147 0 R /R64 101 0 R /F1 148 0 R /R49 78 0 R /R43 90 0 R /R23 45 0 R /R25 48 0 R /R33 52 0 R /R31 60 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 149 0 R 150 0 R 151 0 R ] >> endobj 9 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 152 0 R /Resources << /ExtGState << /R22 32 0 R >> /Font << /F2 153 0 R /R118 154 0 R /F1 157 0 R /R120 158 0 R /R43 90 0 R /R23 45 0 R /R25 48 0 R /R33 52 0 R /R31 60 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 162 0 R 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R 171 0 R 172 0 R ] >> endobj 10 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 173 0 R /Resources << /ColorSpace << /R135 174 0 R >> /Pattern << /R134 175 0 R >> /ExtGState << /R133 176 0 R /R22 32 0 R >> /Font << /F2 177 0 R /R154 178 0 R /R43 90 0 R /R118 154 0 R /R127 219 0 R /R89 132 0 R /F1 222 0 R /R120 158 0 R /R25 48 0 R /R33 52 0 R /R31 60 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 11 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 223 0 R /Resources << /ExtGState << /R22 32 0 R >> /Font << /F2 224 0 R /R29 39 0 R /F1 225 0 R /R23 45 0 R /R25 48 0 R /R27 57 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 226 0 R 227 0 R 228 0 R 229 0 R 230 0 R 231 0 R 232 0 R 233 0 R 234 0 R 235 0 R 236 0 R 237 0 R 238 0 R 239 0 R 240 0 R 241 0 R 242 0 R 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R 251 0 R 252 0 R 253 0 R 254 0 R 255 0 R 256 0 R 257 0 R ] >> endobj 12 0 obj << /Type /Catalog /Pages 1 0 R >> endobj 13 0 obj << /Length 19375 >> stream q q q 0.10000 0 0 0.10000 0 0 cm /R22 gs 0 g q 10 0 0 10 0 0 cm BT /R23 14.34620 Tf 1 0 0 1 138.41400 675.06700 Tm [ (F) 24.98890 (ast) -249.99800 (Algorithms) -250.01000 (f) 24.99230 (or) -249.99500 (Con) 40.00710 (v) 9.99625 (olutional) -249.99600 (Neural) -250.00800 (Netw) 9.99285 (orks) ] TJ /R25 11.95520 Tf 48.20000 -37.85820 Td [ (Andre) 24.98550 (w) -249.98700 (La) 20.01700 (vin) ] TJ /R27 8.96640 Tf -3.23398 -13.94690 Td (alavin\100acm\056org) Tj /R25 11.95520 Tf 148.93200 13.94690 Td [ (Scott) -250.00600 (Gray) ] TJ -15.78590 -13.94690 Td [ (Nerv) 24.99570 (ana) -250.01600 (Systems) ] TJ /R27 8.96640 Tf -12.27930 -13.94800 Td (sgray\100nervanasys\056com) Tj /R23 11.95520 Tf -158.25200 -41.04610 Td (Abstract) Tj /R29 9.96260 Tf -83.92770 -24.17190 Td [ (Deep) -347.00200 (con) 39.99880 (volutional) -346.99100 (neur) 14.99010 (al) -346.98900 (networks) -347.00600 (tak) 10.00570 (e) -347.00600 (GPU\055days) -346.98400 (of) ] TJ -11.95510 -11.95510 Td [ (computation) -316.01500 (to) -316.01300 (tr) 14.99140 (ain) -314.98900 (on) -316.01600 (lar) 36.99260 (g) 10.00320 (e) -315.99100 (data) -316.00400 (sets\056) -506.99600 (P) 79.99160 (edestrian) -315.99400 (detec\055) ] TJ 11.95510 TL T* [ (tion) -314.00200 (for) -314.00800 (self) -314.01000 (driving) -313.99500 (car) 10.00570 (s) -313.98100 (r) 37.01960 (equir) 36.99260 (es) -313.98600 (very) -313.99400 (low) -314.00200 (latency) 54.98350 (\056) -501.99200 (Ima) 10.00320 (g) 10.00320 (e) ] TJ T* [ (r) 37.01960 (eco) 9.99466 (gnition) -206.98300 (for) -207.00600 (mobile) -205.98200 (pho) -1.00473 (ne) 0.98145 (s) -207.01800 (is) -207.01100 (constr) 15.00240 (ained) -207.00700 (by) -206.98500 (limited) -206.99200 (pr) 44.98510 (o\055) ] TJ T* [ (cessing) -250.99600 (r) 37.01960 (esour) 36.99260 (ces\056) -311.98700 (The) -251.00200 (success) -251.00700 (of) -251.00500 (con) 40 (volutional) -251.00700 (neur) 14.99010 (al) -251.00500 (net\055) ] TJ T* [ (works) -232.01300 (i) 0.98758 (n) -231.99600 (these) -231.99100 (situations) -230.99800 (is) -231.98700 (limited) -230.98800 (by) -232 (how) -230.99800 (fast) -232.01500 (we) -231.01000 (can) -231.99600 (com\055) ] TJ 11.95630 TL T* [ (pute) -282.01800 (them\056) -405.01700 (Con) 40.01660 (ventional) -281.01800 (FFT) -282.00900 (based) -282.01400 (con) 39.99820 (volution) -281.01400 (is) -282.01900 (fast) -282.00700 (for) ] TJ 11.95470 TL T* [ (lar) 36.99260 (g) 10.00320 (e) -353.00400 <026c746572> 10.01920 (s\054) -380.00600 (b) 20.00160 (ut) -352.98800 (state) -353.99900 (of) -352.98800 (the) -352.99300 (art) -354 (con) 40 (volutional) -352.99000 (neur) 14.99010 (al) -354.00700 (net\055) ] TJ T* [ (works) -363.99100 (use) -364.99100 (small\054) ] TJ /R31 9.96260 Tf 72.33400 0 Td (3) Tj /R33 9.96260 Tf 8.03789 0 Td [ <02> -0.80011 ] TJ /R31 9.96260 Tf 10.80700 0 Td (3) Tj /R29 9.96260 Tf 8.61094 0 Td [ <026c746572> 10.01920 (s\056) -652.98000 (W) 91.98590 (e) -363.98300 (intr) 44.99740 (oduce) -364.99100 (a) -364.01300 (ne) 15.01710 (w) -364.00500 (class) ] TJ -99.78980 -11.95510 Td [ (of) -308.01500 (fast) -308.00100 (algorithms) -308.01000 (for) -308.00900 (con) 40 (volutional) -308.01700 (neur) 14.99010 (al) -308.01500 (networks) -307.99300 (using) ] TJ T* [ (W) 55.01230 (ino) 10.01550 (gr) 14.98890 (ad\047) 40 (s) -384.00800 (minimal) -384.01700 <026c746572696e67> -384.01600 (algorithms\056) -712.99900 (The) -384 (algorithms) ] TJ T* [ (compute) -366.98100 (minimal) -367 (comple) 20.00770 (xity) -366.99100 (con) 40 (volution) -367 (o) 10.00320 (ver) -367.98000 (small) -367.00700 (tiles\054) ] TJ 11.95630 TL T* [ (whic) 14.99870 (h) -380.99000 (mak) 10.00200 (es) -380.99300 (them) -380.98400 (fast) -381.00900 (with) -381.00100 (small) -381.00600 <026c746572> 10.02040 (s) -382.01000 (and) -381.02000 (small) -381.00500 (batc) 14.99010 (h) ] TJ 11.95510 TL T* [ (sizes\056) -863.98900 (W) 91.98650 (e) -434.99100 (benc) 15.01830 (hmark) -433.99100 (a) -434.98100 (GPU) -435.01600 (implementation) -433.98600 (of) -435.01300 (our) -435.00800 (al\055) ] TJ T* [ (gorithm) -370.99800 (with) -371.98300 (the) -370.99000 (VGG) -370.98700 (network) -371.00800 (and) -372.00200 (show) -371.00700 (state) -371.01400 (of) -370.98500 (the) -372.00900 (art) ] TJ T* [ (thr) 44.99130 (oughput) -250 (at) -249.98500 (batc) 14.99010 (h) -249.99300 (sizes) -250.01600 (fr) 44.98640 (om) -249.99000 (1) -249.99300 (to) -249.98500 (64\056) ] TJ /R23 11.95520 Tf 36.64800 TL T* [ (1\056) -249.99000 (Intr) 18.01460 (oduction) ] TJ /R25 9.96260 Tf 11.95510 -19.18980 Td [ (Deep) -323.00600 (con) 39.99880 (v) 20.00160 (olutional) -323 (neur) 0.98268 (al) -323.00200 (netw) 10.00940 (orks) -322.99300 (\050con) 39.99580 (vnets\051) -322.99000 (achie) 25.01540 (v) 14.98280 (e) ] TJ -11.95510 -11.95510 Td [ (state) -749 (of) -749.99100 (the) -749.00500 (art) -749.01200 (results) -750.00700 (on) -749.00200 (image) -748.99600 (recognition) -749.99700 (prob\055) ] TJ 11.95630 TL T* [ (lems) -280.01100 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 75.03870 285.02000 Tm (12) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 85.00120 285.02000 Tm (\135\133) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 91.63630 285.02000 Tm (8) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 96.61760 285.02000 Tm [ (\135\056) -400.01300 (The) -279.98800 (netw) 10.00940 (orks) -280.01900 (tak) 10.00570 (e) -279.99700 (se) 25.01670 (v) 14.98280 (eral) -279.99700 (days) -279.98000 (of) -279.99000 (GPU) -280.01200 (time) ] TJ -46.50550 -11.95470 Td [ (to) -403.99900 (train) -404 (and) -404.00500 (require) -404.00900 <7369676e690263616e74> -404.98300 (c) 1.01454 (o) -1.01454 (m) 0.99248 (pute) -405.01800 (resources) -404.00900 (during) ] TJ 11.95510 TL T* [ <636c6173736902636174696f6e> -320.99700 (as) -321.98400 (well\056) -524.99500 (Lar) 17.99330 (ger) -321.00800 (data) -321.98200 (sets) -320.99100 (and) -322.02000 (models) -321.01300 (lead) -321.98300 (to) ] TJ T* [ (better) -277.98500 (accurac) 14.99750 (y) -277.98800 (b) 20.00160 (ut) -277.98100 (also) -277 (increase) -277.99500 (computation) -277.98300 (time\056) -393.98800 (There\055) ] TJ T* [ (fore) -226.00800 (progress) -227.00400 (in) -225.98900 (deep) -226 (neural) -225.99700 (netw) 10.00940 (orks) -227.00900 (is) -225.98900 (limited) -226 (by) -225.99200 (ho) 24.98600 (w) -227.01800 (f) 9.99588 (ast) ] TJ T* [ (the) -249.99000 (netw) 10.00870 (orks) -249.98500 (can) -250.00100 (be) -249.99600 (computed\056) ] TJ 11.95510 -12.21680 Td [ (Lik) 10.01430 (e) 25.01110 (wise) -328.99200 (the) -328.99700 (application) -329.00800 (of) -329.00300 (con) 40 (vnets) -328.98400 (to) -328.99200 (lo) 24.98850 (w) -329.00100 (latenc) 14.98520 (y) -329 (in\055) ] TJ -11.95510 -11.95510 Td [ (ference) -276.01200 (problems\054) -283.01500 (such) -275.98100 (as) -277.01100 (pedestrian) -276.01300 (detection) -276.00800 (in) -275.98100 (self) -277.00800 (dri) 24.98600 (v\055) ] TJ T* [ (ing) -278.01500 (car) -278.00400 (video) -278.01400 (imagery) 64.98670 (\054) -285.00900 (is) -278.01900 (limited) -277.99000 (by) -277.98300 (ho) 24.98600 (w) -277.99000 (f) 9.99588 (ast) -279.00200 (a) -277.99800 (small) -278.00300 (set) -277.98300 (of) ] TJ T* [ (images\054) -250.01200 (possibly) -249.99000 (a) -250.00200 (single) -250.01100 (image\054) -250.01800 (can) -250.00100 (be) -249.99700 <636c617373690265642e> ] TJ 11.95510 -12.21600 Td [ (Distrib) 19.98990 (uted) -195.01400 (training) -196.01600 (of) -194.98600 (con) 40 (vnets) -195.00800 (can) -196.01000 (be) -194.98700 (achie) 25.01540 (v) 14.98280 (ed) -194.98700 (by) -195.99700 (parti\055) ] TJ -11.95510 -11.95510 Td [ (tioning) -229.01800 (each) -228.98900 (batch) -229.01200 (of) -229.01900 (e) 15.01220 (xamples) -228.99400 (across) -229.01600 (the) -229.01300 (nodes) -229.00300 (of) -229.01800 (a) -228.98600 (cluster) ] TJ T* [ (and) -253.99100 (accumulating) -254 (weight) -252.99300 (updates) -254.01600 (across) -253.99200 (the) -253.98900 (nodes\056) -320.98600 (A) -253.99400 (lar) 17.99700 (ge) ] TJ 11.95590 TL T* [ (batch) -261.00700 (size) -261.99100 (adv) 14.98400 (ersely) -261.00900 (af) 25.00810 (fects) -261.99400 (con) 40 (v) 14.98280 (er) 17.98960 (gence) -261.01800 (of) -261.99300 (the) -261.00800 (netw) 10.00810 (ork\054) -265.00500 (so) ] TJ 11.95510 TL T* [ (the) -384.00600 (minimum) -385.01200 (batch) -384.00600 (size) -384.00900 (that) -385.01900 (can) -384.01800 (be) -384.01300 (computed) -385.00200 (ef) 25.00810 <026369656e746c79> ] TJ T* [ (places) -249.99200 (an) -249.99700 (upper) -249.99000 (limit) -249.99600 (on) -249.98800 (cluster) -249.98400 (size) -249.99300 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 198.69400 105.17100 Tm (9) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 203.67500 105.17100 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 208.65600 105.17100 Tm (7) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 213.63800 105.17100 Tm (\135\056) Tj -151.57000 -12.21600 Td [ (State) -278.01200 (of) -279.01000 (the) -277.98500 (art) -277.99300 (con) 40 (vnet) -279 (architectures) -277.98300 (for) -277.99500 (image) -278.99800 (recogni\055) ] TJ -11.95510 -11.95510 Td [ (tion) -339.01700 (use) -340.01500 (deep) -339.00100 (netw) 10.00940 (orks) -340.00900 (of) ] TJ /R31 9.96260 Tf 109.22200 0 Td (3) Tj /R33 9.96260 Tf 7.85586 0 Td [ <02> -0.80011 ] TJ /R31 9.96260 Tf 10.62300 0 Td (3) Tj /R25 9.96260 Tf 8.36328 0 Td [ (con) 40 (v) 20.00160 (olutional) -338.99700 (layers\054) -362.00800 (be\055) ] TJ 122.68600 487.26700 Td [ (cause) -261.99800 (the) 14.98520 (y) -261.99100 (achie) 25.01540 (v) 14.98280 (e) -262 (better) -261.00800 (accurac) 14.99750 (y) -261.99100 (with) -262.01000 (fe) 25.00560 (wer) -262 (weights) -262.00500 (than) ] TJ 11.95510 TL T* [ (shallo) 25.00320 (w) -249.99500 (netw) 10.00810 (orks) -249.98500 (with) -250.01500 (lar) 17.99700 (ger) -250.00200 <026c74657273> -250.00700 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 455.26300 556.31200 Tm (12) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 465.22500 556.31200 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 470.20600 556.31200 Tm (8) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 475.18800 556.31200 Tm (\135\056) Tj -154.37000 -15.20510 Td [ (Therefore) -365.98800 (there) -366.01700 (is) -366.00500 (a) -365.00300 (strong) -365.99300 (need) -366.01700 (for) -366.01700 (f) 9.99343 (ast) -366.00800 (con) 39.99820 (vnet) -366.00300 (algo\055) ] TJ -11.95510 -11.95510 Td [ (rithms) -442.01000 (for) -440.98800 (small) -442.01400 (batch) -441.01700 (sizes) -441.99500 (and) -441.02000 (small) -442.01400 <026c746572732e> -885 (Ho) 24.98600 (we) 25.01540 (v) 14.98280 (er) ] TJ 11.95510 TL T* [ (con) 39.99820 (v) 14.98280 (entional) -262.00800 (con) 39.99820 (vnet) -261.98300 (libraries) -260.99600 (require) -261.99500 (lar) 17.99700 (ge) -261.99500 (batch) -261.98600 (sizes) -261.98600 (and) ] TJ T* [ (lar) 17.99700 (ge) -249.99700 <026c74657273> -250.01000 (for) -250 (f) 9.99588 (ast) -249.98800 (operation\056) ] TJ 11.95510 -15.20510 Td [ (This) -290.00800 (paper) -289.99300 (introduces) -290 (a) -289.01600 (ne) 25.01540 (w) -289.98600 (class) -290.02000 (of) -289.99100 (f) 9.99343 (ast) -289.98100 (algorithms) -289.98100 (for) ] TJ -11.95510 -11.95510 Td [ (con) 39.99820 (v) 20.00160 (olutional) -297.98500 (neural) -297.00400 (netw) 10.00810 (orks) -298.01400 (based) -297.98000 (on) -297 (the) -297.98000 (minimal) -297.98500 <026c746572> 20.01630 (\055) ] TJ T* [ (ing) -296.01200 (algori) 0.99738 (thms) -296 (disco) 14.99750 (v) 14.98280 (ered) -295.99500 (by) -295 (T) 79.99160 (oom) -296.00500 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 458.08200 466.12700 Tm (14) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 468.04500 466.12700 Tm [ (\135) -295.01400 (and) -295.98500 (Cook) -294.98000 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 519.48200 466.12700 Tm (4) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 524.46300 466.12700 Tm [ (\135) -295.99500 (and) ] TJ -215.60100 -11.95470 Td [ (generalized) -309.00200 (by) -310.01700 (W) 39.99330 (inograd) -308.98800 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 416.74700 454.17200 Tm (16) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 426.71000 454.17200 Tm [ (\135\056) -487.99700 (The) -309.00200 (algorithms) -308.99700 (can) -309.99200 (reduce) ] TJ -117.84800 -11.95630 Td [ (the) -253.00900 (arithmetic) -252.98700 (comple) 14.99750 (xity) -252.99200 (of) -253.01700 (a) -252.98200 (con) 39.99820 (vnet) -253.00200 (layer) -253.01700 (by) -253.00700 (up) -253.98700 (to) -253.00700 (a) -252.98200 (f) 9.99343 (ac\055) ] TJ 11.95470 TL T* [ (tor) -312.01700 (of) -311.98500 (4) -311.98200 (compared) -312.01700 (to) -312.01400 (direct) -312.01700 (con) 39.99820 (v) 20.00160 (olution\056) -495.99100 (Almost) -311.98700 (all) -312.01700 (of) -311.98700 (the) ] TJ T* [ (arithmetic) -319.01500 (is) -318.01300 (performed) -318.99600 (by) -318.99600 (dense) -318.99600 (matrix) -318 (multiplies) -318.98600 (of) -319.00600 (suf\055) ] TJ T* [ <026369656e74> -389.98900 (dimensions) -389.01800 (to) -390.00100 (be) -390.01300 (computed) -389.00400 (ef) 25.00560 <026369656e746c79> 65.00880 (\054) -424.98300 (e) 25.01050 (v) 14.98280 (en) -390.01300 (when) ] TJ T* [ (the) -275.98600 (batch) -275.98600 (size) -275.98800 (is) -276.02000 (v) 14.98280 (ery) -275.01500 (small\056) -387.98400 (The) -275.98800 (memory) -276.00800 (requirements) -276.00300 (are) ] TJ 11.95590 TL T* [ (also) -395.99800 (light) -397 (com) 0.99248 (pared) -396.99500 (to) -396 (the) -396.00300 (con) 39.99820 (v) 14.98280 (entional) -397.00200 (FFT) -396.01700 (con) 39.99820 (v) 20.00160 (olution) ] TJ 11.95510 TL T* [ (algorithm\056) -695.99700 (These) -378.00800 (f) 9.99588 (actors) -378.99100 (mak) 10.01300 (e) -377.98100 (practical) -378.99100 (implementations) ] TJ T* [ (possible\056) -358.01900 (Our) -265.99000 (implementation) -266.00500 (for) -267.01400 (NVIDIA) -265.99000 (Maxwell) -266.01000 (GPUs) ] TJ T* [ (achie) 25.01540 (v) 14.98280 (es) -269.99200 (state) -271.00100 (of) -269.99200 (the) -269.98700 (art) -271.01400 (throughput) -269.98900 (for) -271.01400 (all) -269.98400 (batch) -269.98400 (sizes) -271.00400 (mea\055) ] TJ T* [ (sured\054) -201.99600 (from) -189.99800 (1) -190.00300 (to) -189.99500 (64\054) -201.99300 (while) -189.99000 (using) -190.01700 (at) -190.98300 (most) -190.01700 (16MB) -190.01200 (of) -190.00800 (w) 10.00320 (orkspace) ] TJ T* [ (memory) 64.99650 (\056) ] TJ /R23 11.95520 Tf 33.66020 TL T* [ (2\056) -249.98900 (Related) -250.00200 (W) 74.99720 (ork) ] TJ /R25 9.96260 Tf 11.95510 -22.17770 Td [ (The) -259.01100 (FFT) -260.00300 (and) -259.01100 (con) 39.99820 (v) 20.00160 (olution) -259.99600 (theorem) -258.99600 (ha) 19.99670 (v) 14.98280 (e) -258.98100 (been) -259.99600 (used) -259.00600 (to) -259.98600 (re\055) ] TJ -11.95510 -11.95630 Td [ (duce) -360.01600 (the) -360.99100 (arithmetic) -359.98900 (comple) 14.99750 (xity) -361.01300 (of) -360.01800 (con) 39.99820 (vnet) -360.00400 (layers\054) -388.00400 <02727374> -361.01300 (by) ] TJ 11.95470 TL (Mathieu) ' /R29 9.96260 Tf 36.87700 0 Td [ (et) -368.99500 (al) ] TJ /R25 9.96260 Tf 18.62580 0 Td [ (\056) -368.99500 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 373.84900 242.91600 Tm (11) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 383.81200 242.91600 Tm [ (\135\054) -398.01200 (then) -368.98300 <7265026e6564> -368.00200 (by) -368.98700 (V) 111.00600 (asilache) ] TJ /R29 9.96260 Tf 117.42300 0 Td [ (et) -368.99200 (al) ] TJ /R25 9.96260 Tf 18.62580 0 Td [ (\056) -368.99200 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 529.34500 242.91600 Tm (15) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 539.30800 242.91600 Tm (\135\054) Tj -230.44600 -11.95510 Td [ (and) -249.99300 (then) -249.98500 (implemented) -249.98800 (in) -249.98500 (the) -249.98800 (NVIDIA) -249.99300 (cuDNN) -249.98800 (library) -249.98300 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 528.54600 230.96100 Tm (1) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 533.52800 230.96100 Tm (\135\056) Tj -212.71100 -15.20510 Td [ (The) -689.01700 (Strassen) -690.01500 (algorithm) -688.98300 (for) -690.00300 (f) 9.99343 (ast) -689.01200 (matrix) -690.01700 (multiplica\055) ] TJ -11.95510 -11.95510 Td [ (tion) -218.01800 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 329.85400 203.80100 Tm (13) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 339.81600 203.80100 Tm [ (\135) -218.00500 (w) 10 (as) -218 (used) -217.99100 (by) -217.99300 (Cong) -218.01300 (and) -217.99800 (Xiao) -217.99300 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 460.61300 203.80100 Tm (3) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R25 9.96260 Tf 1 0 0 1 465.59400 203.80100 Tm [ (\135) -218.98800 (to) -217.99300 (reduce) -218.00800 (the) -217.99300 (num\055) ] TJ -156.73200 -11.95510 Td [ (ber) -277.01500 (of) -275.99100 (con) 39.99820 (v) 20.00160 (olutions) -277.00500 (in) -277 (a) -275.99800 (con) 39.99820 (vnet) -276.99800 (layer) 39.99330 (\054) -283.00700 (thereby) -277.00300 (reducing) -276.99800 (its) ] TJ 11.95470 TL T* [ (total) -226.01900 (arithmetic) -226.01100 (comple) 14.99750 (xity) 64.99410 (\056) -301.00400 (The) -225.99700 (authors) -225.98200 (also) -225.98700 (suggested) -226.01100 (that) ] TJ 11.95630 TL T* [ (more) -334.99300 (techniques) -335.01500 (from) -334.01300 (arithmetic) -335.01300 (comple) 14.99750 (xity) -335.01800 (theory) -334.98800 (might) ] TJ 11.95510 TL T* [ (be) -249.99700 (applicable) -250.01700 (to) -249.98500 (con) 39.99820 (vnets\056) ] TJ 11.95510 -15.20470 Td [ (V) 111.00400 (arious) -372.98200 (approaches) -372.99200 (ha) 19.99670 (v) 14.98280 (e) -373.00100 (been) -372.99700 (attempted) -373.00600 (to) -372.98700 (reduce) -373.00100 (the) ] TJ -11.95510 -11.95510 Td [ (comple) 14.99750 (xity) -260.99100 (of) -261.01300 (con) 39.99820 (vnets) -260.99600 (by) -261.00500 (quantizing) -261.01500 (or) -261.99500 (otherwise) -260.99600 (approx\055) ] TJ T* [ (imating) -468 (the) -467.99300 (con) 39.99820 (v) 20.00160 (olutional) -469.01500 (layer) 55.01040 (\056) -963.99100 (W) 79.98660 (e) -468.00500 (consider) -469.00500 (thes) 0.99493 (e) -468.98500 (ap\055) ] TJ T* [ (proaches) -210 (as) -208.98200 (orthogonal) -210.00700 (and) -210 (complementary) -210.00400 (to) -209.01400 (those) -209.98500 (that) -209.98900 (e) 15.01220 (x\055) ] TJ T* [ (ploit) -232.98800 (algebraic) -233.99500 (structure\054) -235.98300 (and) -233.99500 (therefore) -232.98600 (declare) -232.99100 (them) -234.02000 (outside) ] TJ T* [ (the) -249.99000 (scope) -249.99000 (of) -249.99500 (this) -250.01200 (paper) 55.00800 (\056) ] TJ -13.74100 -29.88830 Td (1) Tj ET Q Q Q q q 1 1 1 rg /a0 gs 48.40600 786.42200 515.18800 -52.69900 re f q /s5 gs /x6 Do Q q /s7 gs /x8 Do Q q /s9 gs /x10 Do Q q /s11 gs /x12 Do Q Q Q Q q 1 0 0 1 0 0 cm BT /F1 12 Tf 14.40000 TL ET 1 1 1 rg n 270 47 72 14 re f* 0.50000 0.50000 0.50000 rg BT /F2 9 Tf 10.80000 TL ET BT 1 0 0 1 297 50 Tm (4013) Tj T* ET Q endstream endobj 14 0 obj << /Filter /FlateDecode /Resources << /ExtGState << /a0 << /CA 1 /ca 1 >> >> /XObject << /x18 15 0 R >> >> /Length 28 /Group << /Type /Group /S /Transparency /CS /DeviceRGB /I true >> /BBox [ 78 746 96 765 ] /Type /XObject /Subtype /Form >> stream x+O4PH/VЯ0Pp 0 endstream endobj 15 0 obj << /Filter /FlateDecode /Resources 16 0 R /Length 107 /Type /XObject /BBox [ 78 746 96 765 ] /Subtype /Form >> stream xe AC̬wʠ =p,?]%+H-
Jc "82w8VSnGW;"
endstream
endobj
16 0 obj
<<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
>>
endobj
17 0 obj
<<
/Filter /FlateDecode
/Resources <<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
/XObject <<
/x15 18 0 R
>>
>>
/Length 28
/Group <<
/Type /Group
/S /Transparency
/CS /DeviceRGB
/I true
>>
/BBox [ 67 752 84 775 ]
/Type /XObject
/Subtype /Form
>>
stream
x+O4PH/VЯ04Up
0
endstream
endobj
18 0 obj
<<
/Filter /FlateDecode
/Resources 19 0 R
/Length 228
/Type /XObject
/BBox [ 67 752 84 775 ]
/Subtype /Form
>>
stream
xeQKn!s ?FPav6R٪TS.
b];15YyR
{7QL.\:Rv/x9l+L7h%1!}i/AI(kz"U&,YO![R hg{3}4/GyYF:!w}Gn+'xJcO9i뽼_-:`
endstream
endobj
19 0 obj
<<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
>>
endobj
20 0 obj
<<
/Filter /FlateDecode
/Resources <<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
/XObject <<
/x24 21 0 R
>>
>>
/Length 28
/Group <<
/Type /Group
/S /Transparency
/CS /DeviceRGB
/I true
>>
/BBox [ 132 751 480 772 ]
/Type /XObject
/Subtype /Form
>>
stream
x+O4PH/VЯ02Qp
0
endstream
endobj
21 0 obj
<<
/Filter /FlateDecode
/Resources 22 0 R
/Length 53223
/Type /XObject
/BBox [ 132 751 480 772 ]
/Subtype /Form
>>
stream
xtI:6%Q㨈?7rA= u%6 ?Y(WbWo{B>9
x`Znϳ|8{3?0x*z ǃ|,@:w>`c|*ϻⳅKO3`g
:_|}}><.6`Z{{3]#<_o"~:ͺgk7/Ұ@|K yp ]03ʷCmş8˽Y?>(3!Bwqs.Z8,~~=rMT̩y+/*w: uBZ_`ߵp`%M?ɝ1ɳw=vDۉy&xb4Q>d@ sg~lA