From 2ef6abab8ca0ea6784657ae36d444c0261144796 Mon Sep 17 00:00:00 2001 From: maxzupfer <114431311+maxzupfer@users.noreply.github.com> Date: Mon, 12 Dec 2022 12:00:19 -0600 Subject: [PATCH] Add files via upload --- Age Distribution of Patients.pdf | Bin 0 -> 5190 bytes All_Data_Both.csv | 39889 ++++++++++++++++++ Country Y .txt to .csv Files/screen_120.csv | 498 + Country Y .txt to .csv Files/screen_121.csv | 485 + Country Y .txt to .csv Files/screen_122.csv | 480 + Country Y .txt to .csv Files/screen_123.csv | 253 + Country Y .txt to .csv Files/screen_124.csv | 480 + Country Y .txt to .csv Files/screen_125.csv | 251 + Country Y .txt to .csv Files/screen_126.csv | 288 + Country Y .txt to .csv Files/screen_127.csv | 250 + Country Y .txt to .csv Files/screen_128.csv | 360 + Country Y .txt to .csv Files/screen_129.csv | 252 + Country Y .txt to .csv Files/screen_130.csv | 222 + Country Y .txt to .csv Files/screen_131.csv | 225 + Country Y .txt to .csv Files/screen_132.csv | 241 + Country Y .txt to .csv Files/screen_133.csv | 276 + Country Y .txt to .csv Files/screen_134.csv | 225 + Country Y .txt to .csv Files/screen_135.csv | 398 + Country Y .txt to .csv Files/screen_136.csv | 469 + Country Y .txt to .csv Files/screen_137.csv | 302 + Country Y .txt to .csv Files/screen_138.csv | 463 + Country Y .txt to .csv Files/screen_139.csv | 378 + Country Y .txt to .csv Files/screen_140.csv | 418 + Country Y .txt to .csv Files/screen_141.csv | 452 + Country Y .txt to .csv Files/screen_142.csv | 434 + Country Y .txt to .csv Files/screen_143.csv | 314 + Country Y .txt to .csv Files/screen_144.csv | 217 + Country Y .txt to .csv Files/screen_145.csv | 260 + Country Y .txt to .csv Files/screen_146.csv | 448 + Country Y .txt to .csv Files/screen_147.csv | 326 + Country Y .txt to .csv Files/screen_148.csv | 368 + Country Y .txt to .csv Files/screen_149.csv | 251 + Country Y .txt to .csv Files/screen_150.csv | 297 + Country Y .txt to .csv Files/screen_151.csv | 330 + Country Y .txt to .csv Files/screen_152.csv | 389 + Country Y .txt to .csv Files/screen_153.csv | 477 + Country Y .txt to .csv Files/screen_154.csv | 272 + Country Y .txt to .csv Files/screen_155.csv | 420 + Country Y .txt to .csv Files/screen_156.csv | 369 + Country Y .txt to .csv Files/screen_157.csv | 269 + Country Y .txt to .csv Files/screen_158.csv | 370 + Country Y .txt to .csv Files/screen_159.csv | 468 + Country Y .txt to .csv Files/screen_160.csv | 480 + Country Y .txt to .csv Files/screen_161.csv | 309 + Country Y .txt to .csv Files/screen_162.csv | 435 + Country Y .txt to .csv Files/screen_163.csv | 248 + Country Y .txt to .csv Files/screen_164.csv | 302 + Country Y .txt to .csv Files/screen_165.csv | 309 + Country Y .txt to .csv Files/screen_166.csv | 349 + Country Y .txt to .csv Files/screen_167.csv | 489 + Country Y .txt to .csv Files/screen_168.csv | 498 + Country Y .txt to .csv Files/screen_169.csv | 241 + Country Y .txt to .csv Files/screen_170.csv | 443 + Country Y .txt to .csv Files/screen_171.csv | 451 + Country Y .txt to .csv Files/screen_172.csv | 331 + Country Y .txt to .csv Files/screen_173.csv | 298 + Country Y .txt to .csv Files/screen_174.csv | 390 + Country Y .txt to .csv Files/screen_175.csv | 364 + Infections Per Country Over Time Graph.pdf | Bin 0 -> 5252 bytes Vaccine Graph.pdf | Bin 0 -> 4871 bytes analysis.R | 103 + supportingFunctions.R | 141 + 62 files changed, 60015 insertions(+) create mode 100644 Age Distribution of Patients.pdf create mode 100644 All_Data_Both.csv create mode 100644 Country Y .txt to .csv Files/screen_120.csv create mode 100644 Country Y .txt to .csv Files/screen_121.csv create mode 100644 Country Y .txt to .csv Files/screen_122.csv create mode 100644 Country Y .txt to .csv Files/screen_123.csv create mode 100644 Country Y .txt to .csv Files/screen_124.csv create mode 100644 Country Y .txt to .csv Files/screen_125.csv create mode 100644 Country Y .txt to .csv Files/screen_126.csv create mode 100644 Country Y .txt to .csv Files/screen_127.csv create mode 100644 Country Y .txt to .csv Files/screen_128.csv create mode 100644 Country Y .txt to .csv Files/screen_129.csv create mode 100644 Country Y .txt to .csv Files/screen_130.csv create mode 100644 Country Y .txt to .csv Files/screen_131.csv create mode 100644 Country Y .txt to .csv Files/screen_132.csv create mode 100644 Country Y .txt to .csv Files/screen_133.csv create mode 100644 Country Y .txt to .csv Files/screen_134.csv create mode 100644 Country Y .txt to .csv Files/screen_135.csv create mode 100644 Country Y .txt to .csv Files/screen_136.csv create mode 100644 Country Y .txt to .csv Files/screen_137.csv create mode 100644 Country Y .txt to .csv Files/screen_138.csv create mode 100644 Country Y .txt to .csv Files/screen_139.csv create mode 100644 Country Y .txt to .csv Files/screen_140.csv create mode 100644 Country Y .txt to .csv Files/screen_141.csv create mode 100644 Country Y .txt to .csv Files/screen_142.csv create mode 100644 Country Y .txt to .csv Files/screen_143.csv create mode 100644 Country Y .txt to .csv Files/screen_144.csv create mode 100644 Country Y .txt to .csv Files/screen_145.csv create mode 100644 Country Y .txt to .csv Files/screen_146.csv create mode 100644 Country Y .txt to .csv Files/screen_147.csv create mode 100644 Country Y .txt to .csv Files/screen_148.csv create mode 100644 Country Y .txt to .csv Files/screen_149.csv create mode 100644 Country Y .txt to .csv Files/screen_150.csv create mode 100644 Country Y .txt to .csv Files/screen_151.csv create mode 100644 Country Y .txt to .csv Files/screen_152.csv create mode 100644 Country Y .txt to .csv Files/screen_153.csv create mode 100644 Country Y .txt to .csv Files/screen_154.csv create mode 100644 Country Y .txt to .csv Files/screen_155.csv create mode 100644 Country Y .txt to .csv Files/screen_156.csv create mode 100644 Country Y .txt to .csv Files/screen_157.csv create mode 100644 Country Y .txt to .csv Files/screen_158.csv create mode 100644 Country Y .txt to .csv Files/screen_159.csv create mode 100644 Country Y .txt to .csv Files/screen_160.csv create mode 100644 Country Y .txt to .csv Files/screen_161.csv create mode 100644 Country Y .txt to .csv Files/screen_162.csv create mode 100644 Country Y .txt to .csv Files/screen_163.csv create mode 100644 Country Y .txt to .csv Files/screen_164.csv create mode 100644 Country Y .txt to .csv Files/screen_165.csv create mode 100644 Country Y .txt to .csv Files/screen_166.csv create mode 100644 Country Y .txt to .csv Files/screen_167.csv create mode 100644 Country Y .txt to .csv Files/screen_168.csv create mode 100644 Country Y .txt to .csv Files/screen_169.csv create mode 100644 Country Y .txt to .csv Files/screen_170.csv create mode 100644 Country Y .txt to .csv Files/screen_171.csv create mode 100644 Country Y .txt to .csv Files/screen_172.csv create mode 100644 Country Y .txt to .csv Files/screen_173.csv create mode 100644 Country Y .txt to .csv Files/screen_174.csv create mode 100644 Country Y .txt to .csv Files/screen_175.csv create mode 100644 Infections Per Country Over Time Graph.pdf create mode 100644 Vaccine Graph.pdf create mode 100644 analysis.R create mode 100644 supportingFunctions.R diff --git a/Age Distribution of Patients.pdf b/Age Distribution of Patients.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f9ce4435e3cc19f84741fb4e0216e818002e5cb6 GIT binary patch literal 5190 zcmZ`-XFyX~utpG&s32XGazQ|l5)wk_(u*P@y_XmQL{l(DLhrq-SP+mRMG#O?X;KAg zDnjUjh$tXcKsr+NCGP6(+kNl;xXJnMoS8ZEz8X{k3WY!wR3LCDL_t{^1UALH|G(d?&?F3P zmjz%%KzMthkwn0ZO!6j^XxlCj@a|+Jia`GglZVPfXxPtKcse8vI(-_1!nxD4fd0+~ z09*NbqX4iTf`q`}Jpu3qgeQs!DAEipfWJ2=|7Qc;j&4mr;Yfd(gZ1z@5^YHYRDNy; zTcC(|G66~R2m6;ly1}n=hE^PX6cX=_qUTK{(Q1YT zQ8Fr0P0hrWk3=o)$pl2`3D&;OZf}%KzQ!ei$QQKnH-^KD1k$E>!v%j@;^oBj#PwrzL(SE+0B%dTzq(?#+F1uy@V9-c9#fvx;KQO|TpBjleT*Fk<`i#G9PMa{ei7mOLE&g&4 zThBDWu>BNndM-`W)xJwLtSLb8ru}t^Nn#jD?On`|}5zDoAzN=Z3fm0;Muf_Y0 zv~-mZU2U|@tt&ZG5dxn`&^Fu0{Gw7NL5Js-z9vh4fiUkJO&q1*!p{0n-DLTUd3zr^03A$>Ok(HGt`ht z>BEYR%@bw9p;DEd&HL@fm|puaguN{D`rb5%vxj|`l2h8#;c4*N3XiG0v{7U->Nzlz z7ukRvn9P$B5fUAF!GvTyPyEA4)Vq0>S`^aPf1;4FMH*)m*1_tc$RQ|4NvvC&jShFk z989M`JQ>m{e$g9P9qy&-Y})@hiz}><%bV@aLutmLx`XJUPEQ0o8 z?97e5-NR>E_rB6=JhL&M_PQ@8bo7znioMaeuVAsHOP%fAOXCKS-JHwI+NWk<%g3*X zHx!1Q=Uz%Zy_QZf7hOx=O1vlOQoxu_X)l$DNZxW0-EX>UC>T2(;w_$K*_6D+u5|Gj z2;7oP;cGV5VC69!-#IA6#K{ruz-Kf*3{7ewYF`o0V#;M=nitJRi)YEvvOUqDs}X-< z8Ixu-E=2py@Dz^aA4zKQ>EM>H-z#x-9Sv~4NnPHa>tDBakrmmHxo;%~3pRUuT#H}5 z;a=q*m2Q*72IC(LB;T>o-szro3k|I)i7vLj=!-Ri3&L;G)Rk*@W_L#GmZlG(Z%U_M zb(BF>uHAeW+eVsAK*OVMB<8dxltDlfOmZQ^}rwy|9(UpjCcCg9lryptZ4_5b8%T_Fvo)Q%j$~ql(>nhQIsC3A5M_HTX`~e*4 zGMiW68EF5M9sOvdcDjM`FX*R4~UT_XangvTU;~ zB$+f12Lx|^no=4&xy5-W_(XvGkHTfq3VdhZ=f1-B+sG9S2_q}jN2TAt30WLVoZQBC z^ww4T2YsWQhZqvCZceKXjAaD+Rr~wF%&Mkt6aB!cWm+zaIAPZ;>R8(70RISNk9A;+ zI7egXzsT+{uBKD*pH%satN)~EunF3o2si?CYPJBJX%vnp)2Nunp{6KzG(s0o0UQ-* z-(U(bKoO$o{0}y#v+*zXr8NztC5i+%f@y7w76C;eIfKB4G$sc$G{A;X077^2vloC3 zC?q3G61{6_Xwco!LSpa)OK$}7Cjur~80i8^zs&#c9(0Hv&k=k^Pfr&?M7aZSx|Q>< zenemWI-?G~0pRFC!1}1GXe5g66nvi6W59oP3aFAQ{Qv9}^qhWoip&%rE6ZWNXllK3 z{e|mC;%;_d_Of6!K3s2{EPo2ZcI0g1H3l)bzLmGWd2+!4sMN=hJiX*6VsVlC%%9Bf z7trs&;>~kB<&677fw4N@rOrEJBAY>%v+Jp&o=bs)H4N8w!4vttW6lw-&9X}UEg zG2YXSb>E?{3``h?kKScvx_n!PjA@JkLkXBP?k)cM8g}5@&Z$1FgeHa`**B~0#UKn9 zQWK_EwuYSq_+P1L+Ga>pnY~J_gqe0Job3{`4};p=Ag;(d25h9EI8;}m5gGy=#IAAF z*U9%MClXb;8WNJ-1o)>9Y*j-WB$45yGf~=Y_{k>%0q5ZiQrF+hPZ2HMF>=W3QqVBD z315bf#;;tz_JZx+P`NueTD#sj$a9Go#t29{SwiCC}8EV4O0EV&bxQ6Z(dDti3na4&==Wi-mncyJIwI7 z097%(9gRivGC0rgyHL9q-RTBqhfjz`2wP$CunO%xSt&+FRM_TO7^b^M|WbV&83XcBj}e-V;5h9VE;kb&3;U^LLl& zUJ5+eX2kTZGrlJ+HFJj=_C3({*}3m8`NV^hkVg8@XI$o8U($CS7-0Z|fi}~e_VF1e z?#BIPW@xzv2e@7{+sLpFY6P6H;;a=*_T)ZYS@vkneKB!r|UWNukJ( zFcnt$&aMvL*EX*o>j?$6f+mN@oEYXZH#3z3NfG=_ zuf9T6MC8M&8gF?s1sqw7Fl)a3k<$*O6pUt6w{oQCo%q96<-PlCnjmxD(hOMHuUzLdfM@?>1C1d1gY<5$9Tu zIp5-)8}cm5?u>@MhMAy5gM_)Uhd5k1R9qA*x^Oa6Y*efsxbe!+or9oZf9``WPBL3` z?BsLvyQl0Vj_sKj?YDdab3TYKlY=*L|?kD#f&e_kY z9M)4TVD7d+-z&#^g_le7#>HixEkB!eHu3cpUTLA6<2_6>vun_8#5Q?b-CEAt%(`Jt zasWOcQ(QVQb>H{?^1#{tcLRp~ABq(F1N-~>tBQPz#`?6;spyEylb62qsFW;|*s)0+ zf{Pi~Rimfh4@M|;!cM?!y%zffyE6uR2Uq%D_ZIi6<*i%Yd9;v|SzNET6S0xcZryHb zbhk`zI=>4U1!}&L+;!i54N^c-BJYtUYSwGQYZU$3{LK9#{TBQ-*VwjY_r&*vw(GYo zS=3lWS(sTKvQ!;jj*@NWX?Bjy zI(d+?41MG0FEB|riSNksZCl-1JvNg3J5;4#RgbgqarQ~_58q+p z2;|5zy3rm{SmaqetwZ&68FUdVWiNe;@!UAOA&qYtq1LwecSnzNf3{p8j@eR`hpPsB zUQ<8dBgdqJ%Y(1|fd4qY&tc#Dkg1R&?e@l7p@BO2cbRntbU4B~!wi}7U8<%j3=C&b^c8+D>V|*=~${c+s}aC9W@qyM^42oXDHFGhWEQE&IOOTchc8j>@NP zu?Yj;czM}DyNrgdEv{CH3<)Eyo>)k%T8h-N^sw38>R$bSan;NFARqi@Woenn~{>Vq?(V z3DzAEH5MTj^=QH98nC+aqCl2%zaCC4N#8sk1lxP!JO^l=p3_wO>edvlj(q>*q!#MOT2S4L z=r=@bM3?^Ri4miA$-t9fEtf6I?wP?GX@#v?GC!Pl-b@brJg4kJ6?oN^zB{e$_Lbgo zz5jgZNADtPi8QpfXDd+Z;c~mqJec@VZXWxiHr0n)v3lvrl8jbr(9E}^Ys-s;31-EMXyUjPWKkM{6D(XB04V?v2uR`g1HhG( z6qNuE;5P=Psb%yB!2QM`aN0fSe`63hgm$s`H>OIHmi~pQ(2wBkor}K1Ox(xfWcrHh%{JE7y#14I{d$9rYHiM>}3qp z!ol5~Q3yQHfJks763A|bIIIH^fy7ZxrNklPU^4bA7M21D1Fl^IATbVhIxhPnum-{3+e1)YNK_ zx3q1)$ldU<(OE%R>GW3IsYVo%MsCium(Xx_kq{H)XD*J=X}&Q@YeqeG-wfatd##sL z&D>BPkhLPNQk1|wyJ@7E#;D!v%X-WB_5EFi7vb|s*XKhZ!O3bZ#s=34ow)?fM5FK0 zKlMri(B+C=%w@63nbt}%+hAudQl|YX#^-2AZP|fl!GfqyCTdP)MR!J3sX8u-4O~*N zhQlZ4O?6JGIZQqVBChoYi)fA714OP@J{A_oPZBjra$gLO1M)ZnJ%RREt?RISdpTppU=* zjfHgK`lCAgKCGT`>h_$mgS^FxY5cw@N$g$QooL4ZecuNQ(TfY^9S4e$Z?V;b``%df z1`U;mKv@Q*EkVD@sDS94BtOz^t@9dC#D1Jcecq!toJUZk{s6A(zHMU{KRgi3e+BAP zmT5BSdB89}j#VEDIBj-t@99{HTItjo^8;6SgFUW5Tw}*V+$xv`?kGJQd(}y^o|rV6fLRF#<(g*KsgbW)`Mb&%d&YwOPH#&w`)-WYH~M6zJ}8}4tz>R zyV7@|Uk15m2z$KUVAVLDHDQ8-$!BS4{GJD~p}yltBBprdWqxmv0r|5cqu8<2TiG0J+IMc%IO zZbsjs#~wy}{it76WNcoq(RSkl6{=OdW!ODDyG{KjdM6t8QOL&J+@SiW6WjAQIdNKO zRr}pon{v#%(d**lqMUqD=yZ%5Brdl->6#rpbHRMwcxnz1_pG||IVioy?N->qM2QYu zF|yYJQ1CKGe}J?u`^i2NtLT)D-9V+3lvCA!F_+D^{q+mmdF<{!0j#F^W%TquQ}Nq%tIp#c8Ig`@9%`DEJc6h>ON(GhT;lR@bUd zOMTH65o+0uI=`8)h-&igxA$02Y2BQ`gfYG7hHa|UbLQ=3MOVxnB+f)9uqhq=NKc!L zWZ9+X4Bd=F934m_KY#!5VdzyMETofHprVim=Ei;ECAZUy8KoTW-~)1h|Fc#8+36{b{EtTayVL*C;6b`52RzUUNNMcGKpQdvV2R{L z4hDkskPaxgD%J~VB|$!umXrXJ8?(*dxPa2jfA?-OPQb10!g_Pq)%oj(4Ra5k(Pu0pFBg! z=`Wsnoa||8GIl1Sr%tBMFo8MhL9dImF}3#TI_-GzD_{m@owxU?_@U~iZt6zy`6nPk z(?Pjv@umDxVaJS0S7^D^2i@J+&nI7ZDCU}a-m0?QE!q(oHH5X^QtRgI3u+pGC<+YSvw}ehN|IIrP^F;iXsFmOM zR3ww!HY8Mms{`LXi`xIA}Hj2X_p2+C$&Qs%OYy5sz)Jddh zV!~SYSf-0GXmOpC$rz|+=htIg|7ra*+-o~gHh}Z|k0Q+duUsGh&bT z_zxn_8J0tV`bf|N4fT%v8^9aM6;^F*k#wo5;mUiNkM@|Fe&mf+C*K|4W9dz{pnk%4 zPC0;wO6Up;kipkMq<7u_WQ!Kfephr~YRcn-o;yGMVO2MNw4D(Mj74avL#kMfx>p|^ zT2e~^sWg;nmX*&h(y+ayef^fDii&BPhFV1^lGfcGJbj1#{xNkRCwovu*4YPlitR5y zr6*>wNQFS%&JuzV(^9g>#e+NSX(x^uOE6{x32y*8m6=1s+L$?vBM3mXHtttp7WdE3 z)6l807;-;2A*iORP0gXal4bmqyD)q?+i}rqQHXG^hU&ChWtNdMD~ZGB_(chZ{6@={ zENHqidjr`Mv7wxe?HdqTUhzAXZf(uxJm|3poL`$%od8erw z?eippmwt08}_wfX|ih>*dG%pZM#5iS3#W zOgM#`HzrjvmooXLE;kruU9q@AE40CGE;1-YxMFSOl^+R{GTkw?ex6cdk?p1KuD$t; z)wW&xZt;5~%O|F}1kwE^zl^x^*Dt$`2yh5c7^znP}J_?rE= zuuknD^m`toS*xB_R+-vDUN<5f(3BY8{nTLxoKKJ@9uWm=zSV@(Ncyz+82Nj?6=2tu_Hb}^dvvI`iz}|mdWUXi@@h05nUHL>Auu5zcu_8o3n}Y0AjnSI8|)q zb<6j0>n-c4>Y@dY9)FAzrnT}T!gC1zS7}M`sfw_ z!qxGyr>eZST(QU1FFV6yjW<{QM)x^6^K~ETMt9_Tw`}iipIf+ByN^A`+c;-~Npwi=HJ8a8e!dNnbcKWXKT^RfRV^8gS-` zw$CPC@l&uKnX$?t&ksG5*sv+#!1BQRKcPR*AG16*5%eYKnR4sfq+oxQyevAEAr+=O zU3WAZ75Ut>?c2WJuiZYqaB{&j>`re`@7L_zcQH-LVg9_a3`{3W7y{42*^HR`8iH!< z+wUaPmGIZ`ckr*Pr>bAi@yX@LWy?v+wQKciebB0nD82=2v5gvtWP49+Ma<>SrOg&_ zl0+w}-4q(HWy{Wx_~$gdqs2vsEz;}v_E?(*(*?Cy`|g78irrn8%TA~7-sul|QlVQ= zgJ&sLS{{UJr5U9SCbXifyLvJ!eA$2yxIDX6Y?HSXP83m+*;V1R_C-pvN zeiv4}RHsPuFh0fIRGTHA;Q&%0xebXZ>nw{Z%j-yg_|+OJ2w;^)s*Odzs%@4U>lqAs zul~vw@@OvEBysza%++1(3|xi;*1mddORwYdmj8BxZLnpdO`lbk)yvoRgeMChNM^l- zPl|kCD<$HViuEnnuHcH$3Y1HrP`S7$$gf)5(Z(R&yzfy?>9>Ftt7wiWjb<5W75eGN z$Z?@j*(%gpexV+(u9qOEZn3V2&J^|wrhYG}3zlT(Kb}yIeA$_e?lJE)pBY~HI8k{) z2qxYG;{C|;st$h~q17JyQ2ycS8$&qEs51Jc?kinOF6%KHq+vzHvOiJhoAy1Jo^P?^ z*ZlF3TJPGD)y>fX^8z!N;}4bYbx{Y8cFzY$9hKTF0~N0=D_-5OZw!$~ zOq5<$LjK$dc(cg24{r|bR^Pt(QLFWm|7DPp?Vi`+_2IWCqaehHn3R)Ry~e3wo`XD2xgQkNOP&KbID$wS zbrC2Wg6QgqMtT831QgnV47tJ)I4lMTx`jhh49T8w1cEHi06-2Xawt3s4+M#0umlIB zBM^l9l_Y{Z`J>VBzv0eAj1wG3bVb971Rx0OgvB6T$kE`xZpKq4cbmVdO220P--Gxc zveK_j0AxbEMfjyMQ9$yFp$~T@r$pWke%0j<8*L=o14%$3-~iC?x&SE^{ksa~g+n?5 zB!FNrK;o|t2$hzWlmlF@IrTD0vV1UknVDA@4r_z+|B0b>$zJtPEK# z`v)c`L0(G!7n2}Mc>l)a{)I_F$(jAbS4vj;Z@Ca~a1MbV;$+=vh; zpakGOe}iaiLqL5Z8cD|BaRx{-8jvy2gek#bP#6>ng{#0}>aq~f0`K+z-q~ZwSh|-j zU`#|3d@(2zU_~JlC}g^uH4*PcL7|C^TNOo^B9xAO&xL0|vXGM}A!wWzqYBv1Y5>qa zh=2w_eIyx)#rpuDHPQ!70+i_nw!l9QYQH!z>=@QWG>-hYIjE1vk?BVgp#I$*v_+Hf z6e5ZqPvw8&F${jR;|v<-L-qwUl$8O{2!kcl9{?I*>Af*Pqwrp6M%g4Xy=8t7YF4GA zIbK-pU?Z($!hhqi!?3HPkm(*~w!z@2;|GWYVWEUq<9Y6^y1QYf7?Hu?w2Vust=%6& zTQBn(+6rG_2Wp}t>Q7m2?o{orZYbBh4vaCAz4zzN=Re|T;N|A@>5?f-B!0YpBX0s_ zOlWDUPvLE10_(p@gMBi?90vu-kOjP^x{x;0`F?}ge&bRX_|2p# z+I3xge=E)@*YQ}QgPy=?2hQVwbALB*$jlU3v3FIoS)^bQ2|Jpw&#KB+4b!Y2&~3aH z+$<$m)v4;dW=T-}sAV7B7{HOnVjJ@*BuH%cRck`7WUT7eKv)HHJv23t9cn)^Fc=vt zc?Rl9l+X(^HTQ%?KDVi1IVkR5B*RxVys z%e=fco#q*?g?7k>hL7>%c%DBf+@c*kij!~Ic4t4W@L!s5*MDUN5vw-8zbs=B_{uy@ z(AMNy98c@BQ9+WOGQ_lW0R+l7xF3o7z+jDuoI<)Ue-s z79&vh#BPc^Yf0R)Mz!`3Clx$-GFF;jT`go1v6Nvbdn54S{PUfoIm>=!TOqwPB!yJs zyCVV@Fi$K|$rsdYxS2w2((PT&cps{}x~bIZBe2?)eRp0s*R^kYrC7OjI(=2@vRL2p z#r9C*B{{WDf%Mhoh8_j~SWw|ZTGYT@!IQQUZ`?%vlIBXU!`lsqx9q$uTxQigOxknP zFYiP-o~stcEo>M!XtqRme=6@5l1oqWdLVa_H)QEOzkA6ST-@{Lt<#y~t^TaxE9R{% z(mTn?-6R1ud`_i#QVtViBy1y^K8%hneuyBEF=s^i&9{v9S9K~wk7Y(kz9NqQ}unGhajXzuG) zwHeSiX?OLBu;j&$((BqsR|}iYMTAhxSWLMr(=RUtvC>NZIMtHSl@>9+>tSS)K&P0$ zf7idshQTF&{p&x-<}Z!`%`sjiz!hL{i!Fela}b_F=MpFYTA;l!NIg6ia8;t;sVXZ0 z%1~v*zvzfTCqL+e&Uz?2G#PLO>AXwNfTofW5YUKDMS!+8XaobGj3D313^b&YjqS(` z+SS%(grld#;)!+yBR_rG@c)zR7&ZOmy6hW)_I9KEakK`t2J5Sb6R-83 z_qE+?a;U*1RZ$MgcG#@>3X?e8z@A`WlUjHHCNmLPpr2YMo_KMOP1)Q&L4#p`0?&EQXELFC$`9g`A)oVr`zK%CHZ);?m4)` z#EfN{Ae5Ku@oyUK&Db0IA{m>(vnkL}#{qoVmK!>i+`_b*d#%=49Li*UGkJPtbJR^x zpi5oHF-xl2s_SNzibbE2S)aJ`1(-t`Y2}1#=z0d4Q)3ktqb=A&>YGF_O^r#vyQIP0 zn4Ib%C@_6svliBP92HGAjXm9gpDGg!wS+UtTpd&VK(h0~Dxj{)z%D4f3u2lu={mPG z2s%BZ@$_)E_dRn_Y z7vCePt7+<2$xwZFHJo>Fp+VczzDdHDgCBC^Hbq&#QAGH;zDhR6gVziVdJ4|~XKqxw zKIMtupP?6fdNcRdCTHunJEOHX5?*d{_1|#Xdspn}>2MJy896S1EyPQ{e=&@`!PYwHD|s(<(G^|K^YssYPfCsKHd82vR4IQGCa*Zds=9QnWuSQRWnx& z6Xyi;UR{~^ef}QMi3_|}_80&Hypff8N3LBc_K?2ELdoM&iG~x7kfTr&D(b9?Q9T~} z#`f4MapXkGE0v(GI5MFBRQSdMPvXb81FbB(=Ql<(r??Hl?B1a$Gpt`V>9J5Tx;rEd@RxVsAOo$k%%+G$N9?lmJ5~(4W(#Y6zoVw7M3~v zt~nw+mw2e1Sn7#|RzX4zhj54N*GdVCmY5_Hs?^W9Xa>EY+gEBd|bQ%Nb554;v{N2pLwl^JDw}{M!Lx+PtHkh zpUe`kgZ!`zS zKimttxBT2}_~mn>;n(+-hr@=4hO6%f-hVT68gmm9bAIZ#PXp=?m&qJ{DLujqSywgU zrpHENRC`q_c4O@0Bnk-^0 z`&o)vst+y4o@nK5Ma1sJmRw4{Q{txxNaSHwXmi&52EJ;FVxQEg*TCwO9mqCSS1$Kq{ z6S>ZUPVKIK<+IgWzJB;@+_-O8XRIfWubuA@YV~1phQgm!9qTXMR@~myKAaAqh2eZ~ zoz<^UfkQdJiF!7*H2(Q5I=UeGX?`zqaG)7FP&Ts6lGFh~`FUpt>S+aO6bER2SQ{B_ zasCwg;){Slq4_QIq@IGHj@8Z8qtlmaXKRUWbBMdRgVhGr4LI9CL|{rt^fohR7-x=g zT4zkreV+%@x-=j65qI$tj*>sIKI>-dviP=FwEDJ?{vCS`yUZ<76*ki#UShu{k`@6j|rdb>YVT67dG{9`Qwk3B7gx6-_*2D_lRz>~v@XT@J#>;|W; zBx{dfm)0>0uy~!LoiUkXo4R7(%x}-0Q?ykya<|cv-Qrd5x~$Ie1|8<~)SLeHCR~MV z+ptRIRajj4v+~69J3U$H^KNh{2)8O)e>CY)eY?siZ8);c;E_A**1H>aX{*Q8G&f9g zh&f7lkJ`}{iyrBfu+=8_D3=z*fNP#>X_W{0?sWIo+5QK29|k+ll_DilwEp|y(M2&+_wfm^rBK8Me_Ch;X2wyVKwu=ifYzm$2QUV~XEd|)AJPL&cc zFE*DqdxQUgYuvov>v;WK*vqRG=+bBTSenB#hqohhug0p6$v7&~K+#tsj~Ym1l>jvXzR+#*)WRPXOl~g79qvn^Mx<}tPiR$`Lz6E$f4-0 zqss=a#AC5HxM{18r64vj>}Ht8wg}uvZ9SrQtFG3x;C{u&*6_FCAEV-#!+KMCdlhae zpo<9)?@(4ep0xD5p+*dicVHjli>lK__QpxZi9}>>&VO#zjSIP4no<$wvx)Fytt`01 zGwb$RDSb^b2fkCUz^m3HFVW~nQ_=E zo~|b0Xk*SKfw1bSoWX{~?OPi{;VL_2h*?1A@<6+X^Dfdn4SJRQSjH32a^1E)^&!$ELo2Xx5N_<+X-`v)|4wYn{yVvw$ zcW?o{NPe-l<0x3-?VkSFCW17fFz2^he>0FrxpuK^QU27;@R=`1)|MBFlC5&XnKxoq zw|^P6HGYnvkiT`kA&#!0F*v%gXN;}?oE$MYT^tGX1Au@A7;kShUCcAox+|cf3i@Gi z6cV5T2mL8{G8&8aCPSbqDxephzJDN*FaYR7Lfa`0E~|Qdl5nyRRCy*2}aZ33xK9LZ#-R{G5P`8;_+mFu|@jc8xk2wB!BOW PimIA2L`urg$_Vlw%8*fe literal 0 HcmV?d00001 diff --git a/analysis.R b/analysis.R new file mode 100644 index 0000000..d5e0afe --- /dev/null +++ b/analysis.R @@ -0,0 +1,103 @@ +#Analysis Script for R Project +#Max Zupfer, Roman Fresquez, Rey Ortiz Bautista +#Introduction to Biocomputing + + +#Set the working directory on personal computer +setwd("/Users/maxwellzupfer/Desktop/Biocomputing/Rproject") + +#Load Plots +library(ggplot2) +library(cowplot) + +#Source the supportingFunctions.R script +source('supportingFunctions.R') + +#Change all files from .txt to .csv for country, use function from source: supportingFunctions.R +TxtToCSV(directory) +#To run on personal computer: +TxtToCsv("/Users/maxwellzupfer/Desktop/biocomputing/Rproject/countryY/") + + +#Make the Large CSV for both countries 1 and 2, use function from source: supportingFunctions.R +#This code allows the user to decide what to do with NA's +Combined_CSV(directory1, directory2, directory3) +#To run on personal computer: +Combined_CSV("/Users/maxwellzupfer/Desktop/Biocomputing/Rproject/countryX", "/Users/maxwellzupfer/Desktop/Biocomputing/Rproject/countryY", "/Users/maxwellzupfer/Desktop/Biocomputing/Rproject") + +#Create a summary of the data from both countries, use function from source: supportingFunctions.R +Data_Summary(file) +#To run on personal computer: +Data_Summary(All_Data_Both) + +#Question 1: In which country (X or Y) did the disease outbreak likely begin? +# We determined that the disease originated in country X. +#This conclusion was drawn from the analysis of the graph created by the code that follows. +#This graph visualizes the cumulative number of infected patients from each nation over time. +#For the first twenty days of testing, Country Y screenings returned no positive tests. +#However, Country X had already reported ~4000 cases of the disease by this time. +#This leads us to believe that the disease outbreak took place in Country X. + +#Code to make the graph that demonstrates the answer to question 1 +#Make a data frame to add the data to +outbreak<- data.frame(matrix(NA,0,3)) +#Make the column names for the data frame +Column_Names=c("dayofyear","Patients","Country") +#Add the column names to the data frame +colnames(outbreak)=Column_Names +#Make a list of all the unique day numbers for country X and Y +daysX<-unique(All_Data1$dayofyear[All_Data1$country=="X"]) +daysY<-unique(All_Data1$dayofyear[All_Data1$country=="Y"]) +#For loop that Counts the number of patients from country X that were infected with each marker and adds it to the outbreak file +for(i in 1:length(daysX)){ + #Look at each day of the year + outbreak[i,1]<-daysX[i] + #Sub set the country and the day of the year + countryX<-All_Data1[(All_Data1$dayofyear==daysX[i]&All_Data1$country=="X"),] + #Sub set the patients + Patientsx<-countryX[(rowSums(countryX[,3:12])>0),] + #Number of patients for country X + outbreak[i,2]<-nrow(Patientsx) + outbreak[i,3]<-"X" +} + + +#For loop that counts the number of patients from country that were infected with each marker and adds it to the outbreak file +for(i in 1:length(daysY)){ + #Day of the year for country Y + outbreak[i+56,1]<-daysY[i] + #Sub set the country and the day of the year + countryY<-All_Data1[(All_Data1$dayofyear==daysY[i]&All_Data1$country=="Y"),] + #Sub set the patients + PatientsY<-countryY[(rowSums(countryY[,3:12])>0),] + #Number of patients for country Y + outbreak[i+56,2]<-nrow(PatientsY) + outbreak[i+56,3]<-"Y" +} +# generate dataset with day, total number of patients per day, country +outbreak$totalPatients[1:56] <- cumsum(outbreak$Patients[1:56]) # X +outbreak$totalPatients[57:112] <- cumsum(outbreak$Patients[57:112]) #Y + +# generating ggplot +ggplot(data = outbreak, aes(x=dayofyear, y=totalPatients, group=Country,color=Country))+ + geom_line(linewidth=0.5)+theme_classic()+ggtitle("Cumulative Infections Over Time")+xlab("Day Number")+ylab("Total Infected Patients") +#Plot is saved under Infections Per Country Graph.pdf + + + +#Question 2: If Country Y develops a vaccine for the disease, is it likely to work for citizens of Country X? +#Use function from source: supportingFunctions.R +# We have reason to believe that if a vaccine was manufactured for Country Y, it would not be effective in country X. +#This assumption was made following our analysis of the graph created by the function; Vaccine_Question. +#This graph illustrates how a majority of the infected people in Country X contain markers 1-5 in their samples while most in Country Y contain markers 6-10. +#This provides evidence that there are different proteins in the disease-causing agents between the two countries. +#For this reason, the vaccine in country Y would work to provide an immune response to proteins that aren’t as prevalent in the disease-causing agent in Country X. +#Use function: Vaccine Question +Vaccine_Question(file) +#Run Function To Create Graph on personal computer +Vaccine_Question(All_Data_Both) +#The graphical support for this question is VaccineGraph.pdf, which demonstrates markers 01-05 are dominated by country X outbreak, and 06-10 are dominated by the country Y outbreak + + + + diff --git a/supportingFunctions.R b/supportingFunctions.R new file mode 100644 index 0000000..8e60a05 --- /dev/null +++ b/supportingFunctions.R @@ -0,0 +1,141 @@ +#Supporting Functions for the analysis of the country disease outbreak R Project +#Max Zupfer, Roman Fresquez, Rey Ortiz Bautista +#Introduction to Biocomputing + + +#The function will convert all .txt files in a directory to .csv files +TxtToCsv=function(directory){ + #Make a object with all of the files that are .txt and not currently comma delimited + countrycsv<-list.files(directory, pattern = ".txt") + #Set working directory to where you would like to place the new .csv files + setwd(directory) + #For loop to change each .txt file to a .csv file + for (i in 1:length(countrycsv)){ + FILE<-read.table(file=countrycsv[i],header=TRUE,sep="") + write.table(FILE,file=paste0(directory,sub(".txt","",countrycsv[i]),".csv"),row.names=F,quote=F,sep=",") + } +} + +#Function that combines the data from CountryX and CountryY in a large .csv file with the option of what to do with NA rows +Combined_CSV=function(directory1, directory2, directory3){ + setwd(directory1) + All_DataX=data.frame(gender=character(), age=numeric(), marker01=numeric(), marker02=numeric(), marker03=numeric(), marker04=numeric(), marker05=numeric(), marker06=numeric(), marker07=numeric(), marker08=numeric(), marker09=numeric(), marker10=numeric(), country=character(), dayofyear=numeric()) + #Adding X Files to the DF + listfilesX=list.files(pattern=".csv") + #For Loop to read through each X file + for (i in 1:length(listfilesX)){ + X=read.csv(listfilesX[i]) + #Add the country to each X row + X$country="X" + #Add the day of year to each row + X$dayofyear=as.numeric(substr(listfiles[i], 8, 10)) + All_DataX = rbind(All_DataX, X) + } +#Creating the Large .csv with all Y data from all screens + setwd(directory2) + #Creating a DF for all of the data + All_DataY=data.frame(gender=character(), age=numeric(), marker01=numeric(), marker02=numeric(), marker03=numeric(), marker04=numeric(), marker05=numeric(), marker06=numeric(), marker07=numeric(), marker08=numeric(), marker09=numeric(), marker10=numeric(), country=character(), dayofyear=numeric()) + #Adding X Files to the DF + listfilesY=list.files(pattern=".csv") + #For loop to read through each Y file + for (i in 1:length(listfilesY)){ + Y=read.csv(listfilesY[i]) + #Add the country to each Y row + Y$country="Y" + #Add the day of year to each row + Y$dayofyear=as.numeric(substr(listfilesY[i], 8, 10)) + All_DataY = rbind(All_DataY, Y) + } + All_Data_Both=rbind(All_DataX, All_DataY) + setwd(directory3) + #Allow the user to decide what to do with NA rows + print("What would you like to do with variables with NA in dataset?") + print("Type 1 to remove NA rows") + print("Type 2 to keep NA rows but display a warning") + print("Type 3 to keep the rows with NA without warning") + Number_Chosen=readline(prompt= "Type Number: ") + #For loop to bind data if user chooses to do nothing with the NA + if (Number_Chosen==3){ + All_Data_Both= rbind(All_DataX, All_DataY) + write.csv(All_Data_Both, file= "All_Data_Both.csv") + } + #For loop to print a warning if the user chooses to + if (Number_Chosen==2){ + All_Data_Both= rbind(All_DataX, All_DataY) + write.csv(All_Data_Both, file= "All_Data_Both.csv") + print("Warning: Rows with no data are present") + } + #For loop to remove the NA rows if the user chooses to + if (Number_Chosen==1){ + All_Data_Both= rbind(All_DataX, All_DataY) + All_Data_Both=na.omit(All_Data_Both) + write.csv(All_Data_Both, file= "All_Data_Both.csv") + } +} + +#Data Summary Function: This function will give the Total patients, total male patients, total female patients, the percentage infected, and a graph of the age distribution of patients +Data_Summary=function(file){ + #Remove all People over the age of 120 as that is likely impossible + file=file[!(file$age>120),] + #To count the total number of screens run + Total_Screens=nrow(file) + #To count the number of screens for each country + TotalX_Screens=nrow(subset(file, file$country=="X")) + TotalY_Screens=nrow(subset(file, file$country=="Y")) + #Number of Males Screened + Male_Patients=nrow(subset(file, file$gender=="male")) + #Number of Female Patients + Female_Patients=nrow(subset(file, file$gender=="female")) + #Count the Percentage screened that were infected + total_infected=0 + for (i in 1:nrow(file)){ + Marker_Sum=sum(file[i,4:13]) + if(Marker_Sum>=1){ + total_infected=total_infected+1 + } + } + Percentage_Infected=total_infected/Total_Screens + Age_Distribution_Plot=ggplot(All_Data_Both, aes(x=age))+geom_histogram(binwidth=1)+theme_classic()+ggtitle("Age Distribution of Patients")+xlab("Age")+ylab("Count") + + print(paste("The Total Screens taken was", Total_Screens)) + print(paste("The total females screened was", Female_Patients)) + print(paste("The total males screened was", Male_Patients)) + print(paste("The total screens infected was", total_infected)) + print(paste("The percentage of screens infected was", Percentage_Infected)) + print(Age_Distribution_Plot) +} + + + +#Function that answers question 2 +#Create Data Frame with Data to answer question 2 +#Create a function that looks at the given file with all of the country data +Vaccine_Question=function(file){ +#Create a data frame that will hold the data for each country by marker, with a row for each marker in each country + Marker_Counts=data.frame(matrix(data=NA, nrow=20, ncol=3)) + #Vector to be inserted as the column names in the data frame + vec1=c("Marker", "Count", "Country") + #Insert the vector as the column names + colnames(Marker_Counts)=vec1 + #For loop to sum the patients in country X that were positive for each marker + for (i in 3:12){ + Marker_Counts[i-2,2]=sum(file[which(file$country=="X"),i]) + } + #For loop to sum the patients in country Y that were positive for each marker + for (i in 3:12){ + Marker_Counts[i+8,2]=sum(file[which(file$country=="Y"),i]) + } + #Insert a marker column with the labels for each + Marker_Counts[,1]=rep(colnames(All_Data_Both[,3:12,2])) + #Make the third row of the data frame have country X for the first 10 markers + Marker_Counts[1:10,3]="X" + #Make the third row of the data frame have country Y for the second set of 10 markers + Marker_Counts[11:20,3]="Y" + #Use GGplot to make a graph of the data frame + #We chose to display the data as one row for each marker as we believe it showed which country dominated for each marker better + ggplot(data=Marker_Counts, aes(x=Marker, y=Count, fill=Country))+geom_bar(stat="identity")+ggtitle("Markers Found in Each Country")+xlab("Marker Numbers")+ ylab("Number of Patients")+theme_classic() +} + + + +