%PDF-1.3 1 0 obj << /Kids [ 3 0 R 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R 12 0 R ] /Type /Pages /Count 10 >> endobj 2 0 obj << /Title (EPIC\055Fusion\072 Audio\055Visual Temporal Binding for Egocentric Action Recognition) /Producer (PyPDF2) /Author (Evangelos Kazakos\054 Arsha Nagrani\054 Andrew Zisserman\054 Dima Damen) /Subject (IEEE International Conference on Computer Vision) >> endobj 3 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 14 0 R /Resources << /XObject << /R18 15 0 R /R19 16 0 R /x6 17 0 R >> /ExtGState << /R20 19 0 R /R8 20 0 R /a1 << /CA 0.5 /ca 0.5 >> /a0 << /CA 1 /ca 1 >> >> /Font << /R9 21 0 R /F2 25 0 R /R11 26 0 R /F1 30 0 R /R15 31 0 R /R13 35 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] >> /Group 38 0 R /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 4 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 39 0 R /Resources << /ExtGState << /R8 20 0 R >> /Font << /R9 21 0 R /F2 40 0 R /R11 26 0 R /F1 41 0 R /R15 31 0 R /R27 42 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 5 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 45 0 R /Resources << /XObject << /R34 46 0 R /R35 47 0 R >> /ExtGState << /R20 19 0 R /R8 20 0 R >> /Font << /R9 21 0 R /R46 48 0 R /R44 51 0 R /R42 56 0 R /R40 59 0 R /F2 62 0 R /R11 26 0 R /F1 63 0 R /R15 31 0 R /R38 64 0 R /R36 68 0 R /R48 72 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] >> /Group 38 0 R /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 6 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 76 0 R /Resources << /XObject << /R67 77 0 R /R66 78 0 R >> /ExtGState << /R20 19 0 R /R8 20 0 R >> /Font << /R60 79 0 R /R58 82 0 R /R9 21 0 R /F1 86 0 R /R56 87 0 R /R44 51 0 R /R42 56 0 R /R40 59 0 R /F2 91 0 R /R11 26 0 R /R13 35 0 R /R15 31 0 R /R38 64 0 R /R62 92 0 R /R36 68 0 R /R48 72 0 R /R64 95 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] >> /Group 38 0 R /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 7 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 100 0 R /Resources << /XObject << /R74 101 0 R /R75 102 0 R >> /ExtGState << /R20 19 0 R /R8 20 0 R >> /Font << /F2 103 0 R /R11 26 0 R /F1 104 0 R /R15 31 0 R /R9 21 0 R /R36 68 0 R /R44 51 0 R /R40 59 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] >> /Group 38 0 R /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 8 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 105 0 R /Resources << /XObject << /R83 106 0 R /R82 107 0 R /R85 108 0 R /R84 109 0 R >> /ExtGState << /R20 19 0 R /R8 20 0 R >> /Font << /F2 110 0 R /R11 26 0 R /F1 111 0 R /R40 59 0 R /R9 21 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] >> /Group 38 0 R /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 9 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 112 0 R /Resources << /XObject << /R98 113 0 R /R99 114 0 R /R92 115 0 R /R93 116 0 R /R94 117 0 R /R95 118 0 R /R96 119 0 R /R97 120 0 R >> /ExtGState << /R20 19 0 R /R8 20 0 R >> /Font << /R9 21 0 R /R46 48 0 R /R44 51 0 R /R42 56 0 R /R40 59 0 R /F2 121 0 R /R11 26 0 R /F1 122 0 R /R15 31 0 R /R38 64 0 R /R36 68 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] >> /Group 38 0 R /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 10 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 123 0 R /Resources << /ExtGState << /R8 20 0 R >> /Font << /R44 51 0 R /R48 72 0 R /R9 21 0 R /R42 56 0 R /R40 59 0 R /F2 124 0 R /R11 26 0 R /F1 125 0 R /R15 31 0 R /R38 64 0 R /R36 68 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 11 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 126 0 R /Resources << /ExtGState << /R8 20 0 R >> /Font << /F2 127 0 R /R11 26 0 R /F1 128 0 R /R15 31 0 R /R9 21 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 12 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 129 0 R /Resources << /ExtGState << /R8 20 0 R >> /Font << /F2 130 0 R /R11 26 0 R /F1 131 0 R /R15 31 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ ] >> endobj 13 0 obj << /Type /Catalog /Pages 1 0 R >> endobj 14 0 obj << /Length 18542 >> stream q q q 0.1 0 0 0.1 0 0 cm /R8 gs 0 g q 10 0 0 10 0 0 cm BT /R9 14.3462 Tf 1 0 0 1 155.177 675.067 Tm [ (EPIC\055Fusion\072) -310.005 (A) 50.0017 (udio\055V) 37 (isual) -250.005 (T) 91.9897 (emporal) -249.988 (Binding) ] TJ 38.0242 -17.9332 Td [ (f) 24.9915 (or) -249.995 (Egocentric) -250.008 (Action) -249.996 (Recognition) ] TJ /R11 11.9552 Tf -82.3172 -37.8578 Td [ (Ev) 25.012 (angelos) -250.015 (Kazak) 9.99724 (os) ] TJ /R13 7.9701 Tf 93.5359 4.33867 Td [ (1) -0.30019 ] TJ /R11 11.9552 Tf 16.6871 -4.33867 Td [ (Arsha) -250.006 (Nagrani) ] TJ /R13 7.9701 Tf 70.0449 4.33867 Td [ (2) -0.30019 ] TJ /R11 11.9552 Tf 16.6879 -4.33867 Td [ (Andre) 24.9855 (w) -249.987 (Zisserman) ] TJ /R13 7.9701 Tf 91.0012 4.33867 Td [ (2) -0.30019 ] TJ /R11 11.9552 Tf 16.6879 -4.33867 Td [ (Dima) -249.989 (Damen) ] TJ /R13 7.9701 Tf 64.0793 4.33867 Td [ (1) -0.30019 ] TJ -411.657 -13.9477 Td [ (1) -0.30019 ] TJ /R11 11.9552 Tf 4.73164 -4.33828 Td [ (V) 59.9906 (isual) -250.008 (Information) -250.004 (Lab\054) -250.003 (Uni) 24.9957 (v) 14.9851 (ersity) -249.989 (of) -250.014 (Bristol) ] TJ /R13 7.9701 Tf 228.593 4.33828 Td [ (2) -0.30019 ] TJ /R11 11.9552 Tf 4.73203 -4.33828 Td [ (V) 59.9916 (isual) -250.008 (Geometry) -250.002 (Group\054) -249.993 (Uni) 24.9957 (v) 14.9851 (ersity) -249.989 (of) -250.014 (Oxford) ] TJ /R9 11.9552 Tf -160.013 -41.0461 Td (Abstract) Tj /R15 9.9626 Tf -83.9277 -23.9109 Td [ (W) 91.9865 (e) -434.01 (focus) -433.007 (on) -433.996 (multi\055modal) -433.994 (fusion) -432.99 (for) -433.989 (e) 39.9884 (gocentric) -433.984 (action) ] TJ -11.9551 -11.9551 Td [ (r) 37.0196 (eco) 9.99466 (gnition\054) -501.99 (and) -451.988 (pr) 44.9839 (opose) -451.006 (a) -451.998 (no) 10.0081 (vel) -452.004 (ar) 36.9852 (c) 15.0122 (hit) 1 (ectur) 36.984 (e) -452.008 (for) -451.986 (multi\055) ] TJ 11.9547 TL T* [ (modal) -256.991 (tempor) 15 (al\055binding) 10.0069 (\054) -257.994 (i\056e) 14.9828 (\056) -329.982 (the) -255.989 (combination) -257.006 (of) -257.004 (modalities) ] TJ T* [ (within) -225.996 (a) -225.016 (r) 14.984 (ang) 10.013 (e) -226.006 (of) -225.009 (tempor) 15 (al) -225.989 (of) 18.0092 (fsets\056) -302.004 (W) 91.9859 (e) -226.006 (tr) 14.9901 (ai) 0.99248 (n) -225.997 (the) -225.994 (ar) 36.9865 (c) 15.0122 (hitectur) 37.0036 (e) ] TJ T* [ (with) -319.012 (thr) 36.9926 (ee) -317.995 (mo) -1.01209 (dal) 0.99738 (ities) -318.982 (\226) -319 (RGB\054) -318.992 (Flow) -319.015 (and) -318.011 (A) 20.0016 (udio) -318.984 (\226) -319 (and) -318.991 (com\055) ] TJ 11.9559 TL T* [ (bine) -298.016 (them) -298.018 (with) -299.015 (mid\055le) 14.9926 (vel) -297.991 (fusion) -297.993 (alongside) -297.987 (spar) 9.98118 (se) -299.006 (tempor) 15 (al) ] TJ 11.9551 TL T* [ (sampling) -208.98 (of) -207.992 (fused) -209.006 (r) 37.0196 (epr) 36.9816 (esentations\056) -295.981 (In) -208.984 (contr) 14.9975 (ast) -209.007 (with) -208.992 (pr) 36.9865 (e) 15.0122 (vious) ] TJ T* [ (works\054) -419 (modalities) -384.986 (ar) 36.9852 (e) -385 (fused) -385.015 (befor) 36.9926 (e) -386.019 (tempor) 15 (al) -384.982 (a) 10.0032 (g) 10.0032 (gr) 36.9865 (e) 39.9884 (gation\054) ] TJ T* [ (with) -391 (shar) 36.9963 (ed) -390.993 (modality) -390.997 (and) -391.018 (fusion) -390.997 (weights) -390.994 (o) 10.0032 (ver) -390.996 (time) 14.9901 (\056) -733.012 (Our) ] TJ T* [ (pr) 44.9839 (oposed) -266.992 (ar) 36.9852 (c) 15.0128 (hite) 0.99493 (ctur) 36.9877 (e) -267.019 (is) -267 (tr) 14.9914 (ained) -266.016 (end\055to\055end\054) -270.994 (outperforming) ] TJ T* [ (individual) -249.987 (modalities) -249.989 (as) -249.986 (well) -250.013 (as) -249.986 (late\055fusion) -249.99 (of) -249.985 (modalities\056) ] TJ 11.9551 -11.9563 Td [ (W) 91.9865 (e) -373 (demonstr) 15.011 (ate) -372.009 (the) -372.989 (importance) -371.982 (of) -372.984 (audio) -371.989 (in) -372.984 (e) 39.9884 (gocentric) ] TJ -11.9551 -11.9547 Td [ (vision\054) -262.006 (on) -259.986 (per) 20.004 (\055class) -260.018 (basis\054) -262.003 (for) -260.017 (identifying) -259.988 (actions) -260.003 (as) -259.984 (well) -260.013 (as) ] TJ T* [ (inter) 14.9987 (acting) -367.012 (objects\056) -661.983 (Our) -366.984 (method) -367.996 (ac) 15.0183 (hie) 14.9852 (ves) -367 (state) -367.015 (of) -366.985 (the) -368.01 (art) ] TJ T* [ (r) 37.0196 (esults) -310 (on) -310.017 (both) -310.005 (the) -310.019 (seen) -309.985 (and) -310.012 (unseen) -310.014 (test) -310.01 (sets) -310.012 (of) -310.014 (the) -310.019 (lar) 36.9914 (g) 10.0032 (est) ] TJ T* [ (e) 39.989 (gocentric) -222.019 (dataset\072) -296 (EPIC\055Ki) 1.01576 (tc) 14.9803 (hens\054) -227.986 (on) -221.992 (all) -221.002 (metrics) -221.983 (using) -222.012 (the) ] TJ T* [ (public) -250.012 (leaderboar) 37.0098 (d\056) ] TJ /R9 11.9552 Tf 33.968 TL T* [ (1\056) -249.99 (Intr) 18.0146 (oduction) ] TJ /R11 9.9626 Tf 11.9551 -18.9293 Td [ (W) 39.9939 (ith) -409.99 (the) -410.002 (a) 19.9918 (v) 24.9811 (ailability) -410.005 (of) -410.989 (mul) 1.00473 (ti\055sensor) -411.005 (wearable) -410.015 (de) 25.0154 (vices) ] TJ -11.9551 -11.9547 Td [ (\050e\056g\056) -533.007 (GoPro\054) -603.99 (Google) -532.988 (Glass\054) -603.996 (Microsoft) -533.016 (Hololens\054) -603.98 (Magi\055) ] TJ 11.9551 TL T* [ (cLeap\051\054) -565.989 (e) 15.0128 (gocentric) -503 (audio\055video) -502.002 (recordings) -502.997 (ha) 19.9967 (v) 14.9828 (e) -503.019 (become) ] TJ 11.9559 TL T* [ (popular) -252.997 (in) -253.004 (man) 14.9901 (y) -251.992 (areas) -252.987 (such) -253.004 (as) -253.015 (e) 15.0122 (xtreme) -251.985 (sports\054) -254.007 (health) -253.002 (moni\055) ] TJ 11.9551 TL T* [ (toring\054) -216.994 (life) -209.017 (logging\054) -218 (and) -209.018 (home) -209.006 (automation\056) -295.982 (As) -209.014 (a) -208.99 (result\054) -217.983 (there) ] TJ T* [ (has) -255.99 (been) -255.015 (a) -256 (rene) 25.0081 (wed) -255.013 (interest) -256.015 (from) -255.005 (the) -255.989 (computer) -254.994 (vision) -256.001 (com\055) ] TJ T* [ (munity) -309.985 (on) -310.997 (collecting) -309.996 (lar) 17.997 (ge\055scale) -309.99 (datasets) -310.987 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 220.891 224.462 Tm (8) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 225.872 224.462 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 230.056 224.462 Tm (35) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 240.019 224.462 Tm [ (\135) -309.99 (as) -309.985 (well) -310.995 (as) ] TJ -189.907 -11.9551 Td [ (de) 25.016 (v) 14.9828 (eloping) -369.987 (ne) 25.0167 (w) -370.014 (or) -370.996 (adapting) -369.991 (e) 15.0122 (xisting) -370.013 (methods) -369.98 (to) -370.004 (the) -370.99 <027273742d> ] TJ 11.9551 TL T* [ (person) -249.984 (point\055of\055vie) 25.0081 (w) -249.995 (scenario) -249.99 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 175.192 200.552 Tm (9) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 180.173 200.552 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 183.959 200.552 Tm (17) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 193.922 200.552 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 197.708 200.552 Tm (21) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 207.67 200.552 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 211.456 200.552 Tm (32) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 221.419 200.552 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 225.205 200.552 Tm (44) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 235.167 200.552 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 238.953 200.552 Tm (46) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 248.916 200.552 Tm (\135\056) Tj -186.848 -11.9559 Td [ (In) -360.017 (this) -359.012 (w) 10.0014 (ork\054) -388.004 (we) -359 (e) 15.0122 (xplore) -360.004 (audio) -360 (as) -360.017 (a) -359.004 (prime) -360.001 (modality) -360.013 (to) ] TJ -11.9551 -11.9551 Td [ (pro) 14.9846 (vide) -479.005 (complementary) -480 (information) -479.014 (to) -479.006 (visual) -480.011 (modalities) ] TJ 11.9547 TL T* [ (\050appearance) -349.018 (and) -350.014 (motion\051) -349.002 (in) -348.988 (e) 15.0122 (gocentric) -348.988 (action) -350.005 (recognition\056) ] TJ T* [ (While) -362.002 (audio) -363.02 (has) -362.011 (been) -362.017 (e) 15.0122 (xplored) -363.018 (in) -362.006 (video) -362 (understanding) -362.993 (in) ] TJ T* [ (general) -567.002 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 88.3984 140.776 Tm (2) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 93.3797 140.776 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 97.3848 140.776 Tm (3) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 102.366 140.776 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 106.371 140.776 Tm (5) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 111.352 140.776 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 115.367 140.776 Tm (6) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 120.348 140.776 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 124.354 140.776 Tm (11) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 134.316 140.776 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 138.321 140.776 Tm (23) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 148.284 140.776 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 152.289 140.776 Tm (27) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 162.251 140.776 Tm (\226) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 167.232 140.776 Tm (29) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 177.195 140.776 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 181.21 140.776 Tm (34) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 191.172 140.776 Tm [ (\135) -283.015 (the) -283.004 (e) 15.0122 (gocentric) -284.019 (domain) ] TJ -141.06 -11.9559 Td [ (in) -342.989 (particular) -342.989 (of) 25.0167 (fers) -343.007 (rich) -342.997 (sounds) -343.009 (resulting) -343.007 (from) -342.992 (the) -342.994 (interac\055) ] TJ 11.9551 TL T* [ (tions) -274.002 (between) -273.986 (hands) -274.016 (and) -273.987 (objects\054) -280.007 (as) -273.992 (well) -273.981 (as) -273.991 (the) -273.986 (close) -273.984 (prox\055) ] TJ T* [ (imity) -424.989 (of) -426.005 (the) -425.019 (wearable) -424.994 (microphone) -426.015 (to) -425.015 (the) -425.02 (under) 18.0043 (going) -426.02 (ac\055) ] TJ T* [ (tion\056) -428.019 (Audio) -290.005 (is) -288.997 (a) -289.996 (prime) -288.993 (discriminator) -289.003 (for) -289.993 (some) -288.991 (actions) -290.008 (\050e\056g\056) ] TJ T* [ (\140w) 9.99772 (ash\047\054) -430.981 (\140fry\047\051) -395.004 (as) -394.992 (well) -395.02 (as) -394.011 (objects) -395.01 (within) -394.998 (actions) -395.01 (\050e\056g\056) -394.99 (\140put) ] TJ ET Q q 2362.49 0 0 1205.12 3088.62 4537.34 cm /R19 Do Q q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 308.862 435.403 Tm [ (Figure) -266.01 (1\072) -342.989 (As) -265.985 (the) -267.007 (width) -266.007 (of) -265.995 (the) -267.004 (temporal) -266.01 (binding) -265.995 (windo) 25 (w) -267.009 (in\055) ] TJ 11.9551 TL T* [ (creases) -335.01 (\050left) -335 (t) 0.98758 (o) -334.998 (right\051\054) -356 (modalities) -334.993 (\050appearance\054) -355.995 (motion) -335.003 (and) ] TJ 11.9559 TL T* [ (audio\051) -249.983 (are) -250.01 (fused) -249.99 (with) -250.015 (v) 24.9811 (arying) -249.983 (temporal) -250.012 (shifts\056) ] TJ 40.2031 TL T* [ (plate\047) -271.006 (vs) -271.982 (\140put) -270.999 (bag\047\051\056) -374.014 (At) -271.984 (times\054) -276.983 (the) -271.004 (temporal) -270.989 (progression) -271.999 (\050or) ] TJ 11.9551 TL T* [ (change\051) -269.016 (of) -269.992 (sounds) -268.982 (can) -269.016 (separate) -269.014 (visually) -269.014 (ambiguous) -269.989 (actions) ] TJ 11.9559 TL T* [ (\050e\056g\056) -289.008 (\140open) -289.003 (tap\047) -289.006 (vs) -288.998 (\140close) -287.984 (tap\047\051\056) -426.983 (Audio) -288.986 (can) -289.016 (also) -288.996 (capture) -289.011 (ac\055) ] TJ 11.9551 TL T* [ (tions) -383.985 (that) -383.019 (are) -383.987 (out) -383.997 (of) -382.992 (the) -384.007 (wearable) -383.98 (camera\047) 55 (s) -382.99 <02656c64> -383.99 (of) -384.014 (vie) 24.986 (w) 65.0137 (\054) ] TJ T* [ (b) 20.0016 (ut) -407.998 (audible) -407.991 (\050e\056g\056) -406.988 (\140eat\047) -408.018 (can) -408.015 (be) -408.01 (heard) -408.01 (b) 20.0016 (ut) -407.02 (not) -407.991 (seen\051\056) -783.987 (Con\055) ] TJ T* [ (v) 14.9828 (ersely) 65.0161 (\054) -231.998 (other) -227.991 (actions) -228.018 (are) ] TJ /R15 9.9626 Tf 100.453 0 Td (sound\055less) Tj /R11 9.9626 Tf 44.3332 0 Td [ (\050e\056g\056) -228.001 (\140wipe) -227.991 (hands\047\051) -227.991 (and) ] TJ -144.786 -11.9547 Td [ (the) -340.995 (wearable) -339.987 (sensor) -340.987 (might) -341.007 (capture) -341.002 (irrele) 25.0154 (v) 24.9811 (ant) -340.012 (sounds\054) -363.983 (such) ] TJ 11.9563 TL T* [ (as) -285.989 (talking) -285.999 (or) -287.008 (music) -286.004 (playing) -286.001 (in) -286.021 (the) -287.001 (background\056) -417.984 (The) -287.006 (oppor) 20.0114 (\055) ] TJ 11.9547 TL T* [ (tunities) -247.015 (and) -247.993 (challenges) -248.02 (of) -247.018 (incorporating) -248.003 (audio) -246.998 (in) -247.988 (e) 15.0122 (gocentric) ] TJ T* [ (action) -389.018 (recognition) -390.018 (allo) 24.9909 (w) -388.991 (us) -390.004 (to) -388.984 (e) 15.0122 (xplore) -389.018 (ne) 25.0154 (w) -390.008 (multi\055sensory) ] TJ T* [ (fusion) -264.99 (approaches\054) -267.987 (particularly) -264.99 (related) -265.01 (to) -265.005 (the) -265.005 (potential) ] TJ /R15 9.9626 Tf 218.547 0 Td (tem\055) Tj -218.547 -11.9551 Td [ (por) 14.995 (al) -312.014 (async) 14.9852 (hr) 44.9851 (ony) ] TJ /R11 9.9626 Tf 72.9848 0 Td [ (between) -312.021 (the) -312.017 (action\047) 54.9859 (s) -311.982 (appearance) -312.002 (and) -311.982 (the) ] TJ -72.9848 -11.9551 Td [ (discriminati) 24.9885 (v) 14.9828 (e) -250.002 (audio) -250.02 (signal) -250.012 (\226) -249.993 (the) -249.988 (main) -250.017 (focus) -249.988 (of) -249.997 (our) -249.993 (w) 10.0032 (ork\056) ] TJ 11.9551 -15.3199 Td [ (While) -280.997 (se) 25.0179 (v) 14.9828 (eral) -279.997 (multi\055modal) -280.992 (fusion) -280.007 (architectures) -281.002 (e) 15.0122 (xist) -280.987 (for) ] TJ -11.9551 -11.9551 Td [ (action) -461.006 (recognition\054) -513.995 (current) -461.021 (approaches) -461.996 (perform) -461.011 (temporal) ] TJ 11.9559 TL T* [ (aggre) 15.0147 (g) 4.98446 (ation) ] TJ /R15 9.9626 Tf 52.177 0 Td (within) Tj /R11 9.9626 Tf 30.259 0 Td [ (each) -535.996 (modality) ] TJ /R15 9.9626 Tf 64.3641 0 Td [ (befor) 36.9938 (e) ] TJ /R11 9.9626 Tf 30.4289 0 Td [ (modalities) -536.018 (are) ] TJ -177.229 -11.9551 Td [ (fused) -376.008 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 337.505 176.641 Tm (22) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 347.467 176.641 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 352.09 176.641 Tm (42) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 362.052 176.641 Tm [ (\135) -376.018 (or) -376.013 (embedded) -375.996 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 429.171 176.641 Tm (23) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 439.133 176.641 Tm [ (\135\056) -688.003 (W) 79.9866 (orks) -376 (that) -376 (do) -376.006 (fuse) -376.011 (in\055) ] TJ -130.271 -11.9551 Td [ (puts) -312.985 (before) -313.012 (temporal) -312.982 (aggre) 15.0171 (g) 4.98446 (ation\054) -328.979 (e\056g\056) -313.002 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 469.11 164.686 Tm (10) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 479.073 164.686 Tm [ (\135\054) -329.004 (do) -312.997 (so) -312.997 (with) -312.982 (in\055) ] TJ -170.211 -11.9547 Td [ (puts) -383.992 (synchronised) -384.017 (across) -384.009 (modalities\056) -713.008 (In) -384.014 (Fig\056) ] TJ ET Q 1 0 0 rg q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 497.255 152.731 Tm (1) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R11 9.9626 Tf 1 0 0 1 502.236 152.731 Tm [ (\054) -418.004 (we) -384.014 (sho) 24.9909 (w) ] TJ -193.374 -11.9551 Td [ (an) -265.015 (e) 15.0122 (xample) -264.995 (of) -265.012 (\140breaking) -265.002 (an) -265.015 (e) 15.0122 (gg) -265.005 (into) -264.99 (a) -266 (pan\047) -265.015 (from) -265.005 (the) -265.005 (EPIC\055) ] TJ 11.9551 TL T* [ (Kitchens) -351.015 (dataset\056) -610.981 (The) -350.995 (distinct) -350.02 (sound) -351.015 (of) -351 (cracking) -350.015 (the) -350.99 (e) 15.0122 (gg\054) ] TJ 11.9559 TL T* [ (the) -281.005 (motion) -281.009 (of) -281.009 (separating) -280.992 (the) -281.002 (e) 15.0122 (gg) -281.002 (and) -281.007 (the) -281.002 (change) -281.012 (in) -281.002 (appear) 19.9918 (\055) ] TJ 11.9551 TL T* [ (ance) -234.008 (of) -233.998 (the) -233.993 (e) 15.0122 (gg) -233.01 (occur) -234.003 (at) -233.998 (dif) 24.986 (ferent) -234.005 (frames\057temporal) -234.005 (positions) ] TJ T* [ (within) -446.009 (the) -447.016 (video\056) -899.007 (Approaches) -445.989 (that) -445.989 (fuse) -446 (modalities) -447.014 (with) ] TJ T* [ (synchronised) -410.012 (input) -409.981 (w) 10 (ould) -409.986 (thus) -409.005 (be) -410.01 (limited) -410.01 (in) -410 (their) -410 (ability) ] TJ ET Q /R20 gs Q Q q 1 0 0 -1 0 792 cm q 1 1 1 rg /a0 gs 48.406 3.066 515.188 33.723 re f 0.44706 0.57647 0.77255 rg /a1 gs 77.262 5.789 m 71.715 5.789 67.215 10.68 67.215 16.707 c 67.215 22.738 71.715 27.625 77.262 27.625 c 78.852 27.625 80.355 27.223 81.691 26.508 c 79.777 22.742 l 79.008 23.121 78.16 23.332 77.262 23.332 c 73.895 23.332 71.164 20.363 71.164 16.707 c 71.164 13.051 73.895 10.082 77.262 10.082 c 78.598 10.082 79.828 10.555 80.832 11.348 c 83.789 8.402 l 82.031 6.77 79.75 5.789 77.262 5.789 c h 77.262 5.789 m f 78.059 15.016 m 87.273 33.801 l 95.863 15.016 l 91.531 15.016 l 87.273 24.305 l 82.684 15.016 l h 78.059 15.016 m f 96.422 5.812 m 109.984 5.812 l 109.984 9.465 l 100.875 9.465 l 100.875 14.996 l 105.816 14.996 l 105.816 18.547 l 100.875 18.547 l 100.875 27.707 l 96.449 27.707 l h 96.422 5.812 m f q 1 0 0 1 0 0 cm /a1 gs /x6 Do Q Q Q Q q 1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET 1 1 1 rg n 270 32 72 14 re f* 0.5 0.5 0.5 rg BT /F2 9 Tf 10.8 TL ET BT 1 0 0 1 297 35 Tm (5492) Tj T* ET Q endstream endobj 15 0 obj << /Filter /FlateDecode /BitsPerComponent 8 /Height 530 /Length 45145 /ColorSpace /DeviceGray /DecodeParms << /Columns 1039 /Predictor 15 >> /Width 1039 /Subtype /Image >> stream xu`gǟH Z[awccn])+{6v9z(.K}ޡA}@Q7싎:z;(t(1IA{9l;HS1 M@hѓȈ[jGli> .m>z72$ $Z1**"sjoi| @ၱ0 1u,| 8