%PDF-1.6
%
16 0 obj
<<
/Length 8768
>>
stream
0 g 0 G
0 g 0 G
0 g 0 G
0 g 0 G
0 g 0 G
BT
/F68 14.3462 Tf 133.687 675.067 Td [(BER)40(THop:)-310(An)-250(Effecti)10(v)10(e)-250(V)37(ision-and-Language)-250(Model)]TJ 60.11 -17.933 Td [(f)25(or)-250(Chest)-250(X-ray)-250(Disease)-250(Diagnosis)]TJ/F69 11.9552 Tf -71.025 -37.858 Td [(Masoud)-250(Monajatipoor)]TJ/F37 7.9701 Tf 106.591 4.339 Td [(1)]TJ/F40 7.9701 Tf 4.235 0 Td [(;)]TJ/F37 7.9701 Tf 2.352 0 Td [(3)]TJ/F69 7.9701 Tf 4.732 -1.761 Td [(*)]TJ/F69 11.9552 Tf 0 -2.578 Td [(,)-250(Mozhdeh)-250(Rouhsedaghat)]TJ/F37 7.9701 Tf 121.859 4.339 Td [(2)]TJ/F69 7.9701 Tf 4.732 -1.761 Td [(*)]TJ/F69 11.9552 Tf 4.483 -2.578 Td [(,)-250(Liunian)-250(Harold)-250(Li)]TJ/F37 7.9701 Tf 92.975 4.339 Td [(1)]TJ/F69 11.9552 Tf 4.732 -4.339 Td [(,)]TJ -328.919 -13.947 Td [(Aichi)-250(Chien)]TJ/F37 7.9701 Tf 58.113 4.338 Td [(4)]TJ/F69 11.9552 Tf 4.733 -4.338 Td [(,)-250(C.-C.)-250(Jay)-250(K)15(uo)]TJ/F37 7.9701 Tf 74.205 4.338 Td [(2)]TJ/F69 11.9552 Tf 4.732 -4.338 Td [(,)-250(F)15(abien)-250(Scalzo)]TJ/F37 7.9701 Tf 73.201 4.338 Td [(1)]TJ/F69 11.9552 Tf 7.721 -4.338 Td [(&)-250(Kai-W)80(ei)-250(Chang)]TJ/F37 7.9701 Tf 86.699 4.338 Td [(1)]TJ -247.671 -13.948 Td [(1)]TJ/F69 11.9552 Tf 4.733 -4.338 Td [(Computer)-250(Science)-250(Department,)-250(UCLA)]TJ/F37 7.9701 Tf -16.186 -9.609 Td [(2)]TJ/F69 11.9552 Tf 4.733 -4.339 Td [(Department)-250(of)-250(Electrical)-250(Engineering,)-250(USC)]TJ/F37 7.9701 Tf -9.377 -9.609 Td [(3)]TJ/F69 11.9552 Tf 4.732 -4.338 Td [(Department)-250(of)-250(Electrical)-250(Engineering,)-250(UCLA)]TJ/F37 7.9701 Tf 24.366 -9.61 Td [(4)]TJ/F69 11.9552 Tf 4.733 -4.338 Td [(Department)-250(of)-250(Radiology)65(,)-250(UCLA)]TJ/F71 8.9664 Tf -91.863 -13.948 Td [(monajati@ucla.edu,rouhseda@usc.edu,aichi@ucla.edu,jckou@usc.edu)]TJ/F42 11.9552 Tf 53.885 -13.947 Td [(f)]TJ/F71 8.9664 Tf 5.978 0 Td [(liunian.harold.li,fab,kwchang)]TJ/F58 8.9664 Tf 156.014 0 Td [(g)]TJ/F71 8.9664 Tf 4.608 0 Td [(@cs.ucla.edu)]TJ
0 g 0 G
/F68 11.9552 Tf -202.638 -41.047 Td [(Abstract)]TJ/F73 9.9626 Tf -83.928 -24.384 Td [(V)74(ision-and-langua)10(g)10(e)-286(\050V&L\051)-286(models)-286(tak)10(e)-286(ima)10(g)10(e)-286(and)-286(te)20(xt)]TJ -11.955 -11.955 Td [(as)-192(input)-191(and)-192(learn)-191(to)-192(captur)37(e)-191(the)-192(associations)-191(between)-192(them.)]TJ 0 -11.955 Td [(Prior)-366(studies)-367(show)-366(that)-367(pr)37(e-tr)15(ained)-366(V&L)-367(models)-366(can)-367(sig-)]TJ 0 -11.955 Td [(ni\002cantly)-329(impr)45(o)10(ve)-329(the)-329(model)-329(performance)-330(for)-329(downstr)37(eam)]TJ 0 -11.955 Td [(tasks)-292(suc)15(h)-291(as)-292(V)74(isual)-292(Question)-291(Answering)-292(\050VQA\051.)-292(Howe)15(ver)111(,)]TJ 0 -11.956 Td [(V&L)-283(models)-283(ar)37(e)-284(less)-283(ef)18(fective)-283(when)-283(applied)-283(in)-284(t)1(h)-1(e)-283(medical)]TJ 0 -11.955 Td [(domain)-225(\050e)15(.g)15(.,)-230(on)-225(X-r)15(ay)-225(ima)10(g)10(es)-224(and)-225(clinical)-225(notes\051)-225(due)-225(to)-225(the)]TJ 0 -11.955 Td [(domain)-272(gap.)-376(In)-272(this)-272(paper)111(,)-278(we)-272(in)40(vestigate)-272(the)-272(c)15(halleng)10(es)-272(of)]TJ 0 -11.955 Td [(applying)-299(pr)37(e-tr)15(ained)-299(V&L)-299(models)-299(in)-299(medical)-299(applications.)]TJ 0 -11.955 Td [(In)-356(particular)111(,)-381(we)-356(identify)-356(that)-355(the)-356(visual)-355(r)37(epr)37(esentation)-356(in)]TJ 0 -11.955 Td [(g)10(ener)15(al)-272(V&L)-271(models)-272(is)-271(not)-272(suitable)-272(for)-271(pr)45(ocessing)-272(medical)]TJ 0 -11.956 Td [(data.)-335(T)92(o)-258(o)10(ver)37(come)-259(this)-258(limitation,)-261(we)-258(pr)45(opose)-258(BERTHop,)-261(a)]TJ 0 -11.955 Td [(tr)15(ansformer)20(-based)-311(model)-310(based)-311(on)-310(PixelHop++)-311(and)-311(V)74(isu-)]TJ 0 -11.955 Td [(alBERT)74(,)-331(for)-331(better)-330(capturing)-331(the)-331(associations)-331(between)-331(the)]TJ 0 -11.955 Td [(two)-292(modalities.)-437(Experiments)-293(on)-292(the)-292(OpenI)-292(dataset,)-303(a)-293(com-)]TJ 0 -11.955 Td [(monly)-429(used)-428(thor)15(acic)-429(disease)-429(dia)10(gnosis)-428(benc)15(hmark,)-474(show)]TJ 0 -11.955 Td [(that)-290(BERTHop)-290(ac)15(hie)15(ves)-290(an)-290(aver)15(a)10(g)10(e)-290(Ar)37(ea)-290(Under)-290(the)-290(Curve)]TJ 0 -11.956 Td [(\050A)50(UC\051)-292(of)-292(98.12%)-292(whic)15(h)-292(is)-292(1.62%)-292(higher)-292(than)-292(state-of-the-)]TJ 0 -11.955 Td [(art)-250(\050SO)40(T)50(A\051)-250(while)-250(it)-250(is)-250(tr)15(ained)-250(on)-250(a)-250(9\327)-250(smaller)-250(dataset.)]TJ/F68 11.9552 Tf 0 -37.287 Td [(1.)-250(Intr)18(oduction)]TJ/F69 9.9626 Tf 11.955 -19.403 Td [(Computer)20(-Aided)-285(Diagnosis)-285(\050CADx\051)-285([14])-285(systems)-285(could)]TJ -11.955 -11.955 Td [(pro)15(vide)-190(v)25(aluable)-190(bene\002ts)-191(for)-190(disease)-190(diagnosis)-190(including)-191(b)20(ut)]TJ 0 -11.955 Td [(not)-280(limited)-280(to)-281(impro)15(ving)-280(the)-280(quality)-280(and)-280(consistenc)15(y)-280(of)-281(the)]TJ 0 -11.955 Td [(predictions)-329(and)-329(reducing)-329(medical)-329(mistak)10(es)-329(as)-329(the)15(y)-329(are)-329(not)]TJ 0 -11.955 Td [(subject)-286(to)-287(human)-286(error)55(.)-419(Although)-287(most)-286(e)15(xisting)-286(studies)-287(fo-)]TJ 0 -11.955 Td [(cus)-255(on)-255(diagnosis)-255(based)-255(on)-255(medical)-255(images)-255(such)-256(as)-255(chest)-255(X-)]TJ 0 -11.956 Td [(ray)-230(\050CXR\051)-230(i)1(mages)-230([4,)-230(2,)-230(1],)-234(the)-229(radiology)-230(reports)-230(often)-230(con-)]TJ 0 -11.955 Td [(tain)-286(subs)1(tantial)-286(information)-286(\050e)1(.g)-1(.)-416(patient)-286(history)-285(and)-286(pre)25(vi-)]TJ
0 g 0 G
ET
q
1 0 0 1 50.112 90.415 cm
[]0 d 0 J 0.398 w 0 0 m 94.499 0 l S
Q
BT
/F69 5.9776 Tf 60.971 82.492 Td [(*)]TJ/F69 7.9701 Tf 3.487 -1.492 Td [(equal)-250(contrib)20(ution)]TJ
0 g 0 G
0 g 0 G
0 g 0 G
0 g 0 G
ET
1 0 0 1 308.862 357.669 cm
q
.2461 0 0 .2461 0 0 cm
q
1 0 0 1 0 0 cm
/Im1 Do
Q
Q
1 0 0 1 -308.862 -357.669 cm
BT
/F69 8.9664 Tf 308.862 346.71 Td [(Figure)-296(1.)-296(An)-296(o)15(v)15(ervie)25(w)-295(of)-296(BER)60(THop.)-448(BER)60(THop)-296(tak)10(es)-296(X-ray)-295(im-)]TJ 0 -10.959 Td [(age)-384(and)-383(clinical)-384(report)-384(as)-383(input.)-711(It)-384(\002rst)-384(enc)1(odes)-384(the)-384(image)-383(and)]TJ 0 -10.959 Td [(te)15(xt)-391(and)-390(e)15(xtracts)-391(potential)-390(features)-391(from)-390(both)-391(modalities.)-731(Then)]TJ 0 -10.959 Td [(a)-331(transformer)20(-based)-332(model)-331(learns)-331(the)-332(associations)-331(between)-331(these)]TJ 0 -10.959 Td [(tw)10(o)-254(modalities.)-321(By)-254(applying)-253(appropriate)-254(vision)-254(and)-254(te)15(xt)-253(e)15(xtractor)40(,)]TJ 0 -10.959 Td [(the)-335(model)-335(is)-335(capable)-336(to)-335(identify)-335(the)-335(abnormality)-335(and)-335(associate)-335(it)]TJ 0 -10.959 Td [(with)-250(the)-250(te)15(xt)-250(labels.)]TJ
0 g 0 G
/F69 9.9626 Tf 0 -31.727 Td [(ous)-269(studies\051)-268(that)-269(are)-269(dif)25(\002cult)-268(to)-269(be)-269(detected)-268(from)-269(the)-269(image)]TJ 0 -11.955 Td [(alone.)-318(Besides,)-254(diagnosis)-253(from)-253(both)-252(image)-253(and)-253(te)15(xt)-253(is)-253(more)]TJ 0 -11.955 Td [(closely)-378(aligned)-378(with)-379(disease)-378(diagnosis)-378(by)-378(human)-379(e)15(xperts.)]TJ 0 -11.955 Td [(Therefore,)-350(V&L)-331(models)-330(that)-330(tak)10(e)-330(both)-331(images)-330(and)-330(te)15(xt)-331(as)]TJ 0 -11.955 Td [(input)-311(can)-311(be)-311(potentially)-311(more)-311(accurate)-311(for)-312(CADx)-311(and)-311(se)25(v-)]TJ 0 -11.956 Td [(eral)-250(attempts)-250(ha)20(v)15(e)-250(been)-250(made)-250(in)-250(this)-250(direction)-250([40,)-250(42,)-250(23].)]TJ 11.955 -12.812 Td [(Ho)25(we)25(v)15(er)40(,)-332(the)-316(s)1(hortage)-316(of)-316(annotated)-315(data)-316(in)-315(the)-316(medical)]TJ -11.955 -11.955 Td [(domain)-350(mak)10(es)-351(utilizing)-350(V&L)-351(models)-350(challenging.)-612(Anno-)]TJ 0 -11.955 Td [(tating)-382(medical)-382(data)-383(is)-382(an)-382(e)15(xpensi)25(v)15(e)-382(process)-382(as)-383(it)-382(requires)]TJ 0 -11.955 Td [(human)-426(e)15(xperts.)-837(Although)-426(a)-425(couple)-426(of)-426(recent)-426(lar)18(ge-scale)]TJ 0 -11.955 Td [(auto-labeled)-388(datasets)-387(ha)20(v)15(e)-388(been)-388(pro)15(vided)-387(for)-388(some)-388(medi-)]TJ 0 -11.956 Td [(cal)-284(tasks,)-293(e.g.,)-292(chest)-284(X-ray)-285([39,)-284(6,)-284(19],)-292(the)15(y)-284(are)-285(often)-284(noisy)]TJ 0 -11.955 Td [(\050lo)25(w-quality\051)-291(and)-290(de)15(grade)-291(the)-291(performance)-291(of)-290(models.)-433(Be-)]TJ 0 -11.955 Td [(sides,)-216(such)-207(datasets)-207(are)-208(not)-207(a)20(v)25(ailable)-207(for)-207(most)-207(medical)-208(tasks.)]TJ 0 -11.955 Td [(Therefore,)-532(training)-476(V&L)-475(models)-476(with)-476(limited)-476(annotated)]TJ
0 g 0 G
0 g 0 G
ET
endstream
endobj
11 0 obj
<<
/Type /XObject
/Subtype /Form
/FormType 1
/PTEX.FileName (./overview_fig3.pdf)
/PTEX.PageNumber 1
/PTEX.InfoDict 31 0 R
/BBox [0 0 960 540]
/Group 28 0 R
/Resources <<
/ExtGState <<
/GS5 32 0 R
/GS17 33 0 R
/GS19 34 0 R
/GS25 35 0 R
>>/XObject <<
/Image6 36 0 R
/Image7 37 0 R
/Image8 38 0 R
/Image9 39 0 R
/Image10 40 0 R
/Image11 41 0 R
/Image12 42 0 R
/Image13 43 0 R
/Image14 44 0 R
/Image18 45 0 R
/Image26 46 0 R
/Image33 47 0 R
>>/Font << /F1 48 0 R/F2 49 0 R>>
/Pattern <<
/P32 50 0 R
>>/ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>>
/Length 2076
/Filter /FlateDecode
>>
stream
xY[k#G~7?ԣ_`\0IXAѴmAKJ,I}Sեu;Yu9u:Si_,W .J Ϣ|6WlJfжGn?7on&e>$Nn3pg]_ԧZrÖ([ZFg~^.1|ֺT-??O<"pcpӵ<O}"Ud#NCV煚zD-8TȽk:("Ά%3{"PTO<谓C\_ܭGa H ̼՜֜ u:,p"N|SVCMk}S34LsĐFO#̻;~wjof*U+VRH>$38$߃)"z 7JFk͝di#gy>1k-M,tyW|z(P$jMc|e>Lhi_-H$'cNMkK.-q3y5ǝ^м\t#w<^L:=ޮr7zV-~xK5lg
mJl\E Yѯ9pbUB\:\(̶3&gD*~|u\c'>&f6'^tQ.+8)T?
xC{ّKKe}_4!rK1fr L}jsO*\-<Ϫl~^ԄtE
_0uYWg