From 4d7be916c31036bf859d2704e806f2eced083c64 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 9 Jan 2019 06:42:06 +0000 Subject: [PATCH 001/828] initial commit --- .gitignore | 36 +++ LICENSE | 674 +++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 2 + 3 files changed, 712 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..26fad6fa --- /dev/null +++ b/.gitignore @@ -0,0 +1,36 @@ +# History files +.Rhistory +.Rapp.history + +# Session Data files +.RData + +# Example code in package build process +*-Ex.R + +# Output files from R CMD build +/*.tar.gz + +# Output files from R CMD check +/*.Rcheck/ + +# RStudio files +.Rproj.user/ + +# produced vignettes +vignettes/*.html +vignettes/*.pdf + +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth + +# knitr and R markdown default cache directories +/*_cache/ +/cache/ + +# Temporary files created by R markdown +*.utf8.md +*.knit.md + +# Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html +rsconnect/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..f288702d --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md new file mode 100644 index 00000000..9b3e57ea --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# epichains +Methods for analysing the distribution of epidemiological chain sizes and lengths From 172fef08b94d1443390d6c28ea42e94f06c11c5f Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 9 Jan 2019 06:45:32 +0000 Subject: [PATCH 002/828] function code --- R/borel.r | 25 ++++++++ R/likelihoods.R | 153 ++++++++++++++++++++++++++++++++++++++++++++++++ R/simulate.r | 43 ++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 R/borel.r create mode 100644 R/likelihoods.R create mode 100644 R/simulate.r diff --git a/R/borel.r b/R/borel.r new file mode 100644 index 00000000..a03e34f3 --- /dev/null +++ b/R/borel.r @@ -0,0 +1,25 @@ +##' Density of the Borel distribution +##' +##' @param x vector of integers. +##' @param mu mu parameter. +##' @param log logical; if TRUE, probabilities p are given as log(p). +##' @return probability mass. +##' @author Sebastian Funk +dborel <- function(x, mu, log=FALSE) { + if (x < 1) stop("'x' must be greater than 0") + ld <- -mu * x + (x - 1) * log(mu * x) - lgamma(x + 1) + if (!log) ld <- exp(ld) + return(ld) +} + +##' Generate random numbers from the Borel distribution +##' +##' Random numbers are generated by simulating from a Poisson branching process +##' @param n number of random variates to generate. +##' @param mu mu parameter. +##' @param infinite any number to treat as infinite; simulations will be stopped if this number is reached +##' @return vector of random numbers +##' @author Sebastian Funk +rborel <- function(n, mu, infinite=Inf) { + chain_sim(n, "pois", "size", infinite=infinite, lambda=mu) +} diff --git a/R/likelihoods.R b/R/likelihoods.R new file mode 100644 index 00000000..364ca519 --- /dev/null +++ b/R/likelihoods.R @@ -0,0 +1,153 @@ +##' Likelihood of the size of chains with Poisson offspring distribution +##' +##' @param x vector of sizes +##' @param lambda rate of the Poisson distributino +##' @return log-likelihood values +##' @author Sebastian Funk +pois_size_ll <- function(x, lambda) +{ + (x - 1) * log(lambda) - lambda * x + (x - 2) * log(x) - lgamma(x) +} + +##' Likelihood of the size of chains with Negative-Binomial offspring distribution +##' +##' @param x vector of sizes +##' @param size the dispersion parameter (often called \code{k} in ecological applications) +##' @param prob probability of success (in the parameterisation with \code{prob}, see also \code{\link[stats]{NegBinomial}}) +##' @param mu mean parameter +##' @return log-likelihood values +##' @author Sebastian Funk +nbinom_size_ll <- function(x, size, prob, mu) +{ + if (!missing(prob)) { + if (!missing(mu)) stop("'prob' and 'mu' both specified") + mu <- size * (1 - prob) / prob + } + lgamma(size * x + (x - 1)) - (lgamma(size * x) + lgamma(x + 1)) + + (x - 1) * log (mu / size) - + (size * x + (x - 1)) * log(1 + mu / size) +} + +##' Likelihood of the size of chains with gamma-Borel offspring distribution +##' +##' @param x vector of sizes +##' @param size the dispersion parameter (often called \code{k} in ecological applications) +##' @param prob probability of success (in the parameterisation with \code{prob}, see also \code{\link[stats]{NegBinomial}}) +##' @param mu mean parameter +##' @return log-likelihood values +##' @author Sebastian Funk +gborel_size_ll <- function(x, size, prob, mu) { + if (!missing(prob)) { + if (!missing(mu)) stop("'prob' and 'mu' both specified") + mu <- size * (1 - prob) / prob + } + lgamma(size + x - 1) - (lgamma(x + 1) + lgamma(size)) - size * log(mu / size) + + (x - 1) * log(x) - (size + x - 1) * log(x + size / mu) +} + +##' Likelihood of the length of chains with Poisson offspring distribution +##' +##' @param x vector of sizes +##' @param lambda rate of the Poisson distributino +##' @return log-likelihood values +##' @author Sebastian Funk +pois_length_ll <- function(x, lambda) { + + ## iterated exponential function + arg <- exp(lambda * exp(-lambda)) + itex <- 1 + for (i in seq_len(max(x))) itex <- c(itex, arg ^ itex[i]) + + Gk <- c(0, exp(-lambda) * itex) ## set G_{0}=1 + + log(Gk[x + 1] - Gk[x]) +} + +##' Likelihood of the length of chains with geometric offspring distribution +##' +##' @param x vector of sizes +##' @param prob probability of the geometric distribution with mean \code{1/prob} +##' @return log-likelihood values +##' @author Sebastian Funkgeom_length_ll <- function(x, prob) { +geom_length_ll <- function(x, prob) { + + lambda <- 1 / prob + ## G(k) - G(k - 1) + GkmGkm1 <- (1 - lambda ^ (x)) / (1 - lambda ^ (x + 1)) - + (1 - lambda ^ (x - 1)) / (1 - lambda ^ (x)) + + log(GkmGkm1) +} + +##' Likelihood of the length of chains with generic offspring distribution +##' +##' The likelihoods are calculated with a crude approximation using simulated +##' chains by linearly approximating any missing values in the empirical +##' cumulative distribution function (ecdf). +##' @param x vector of sizes +##' @param ... any paramaters to pass to \code{\link{chain_sim}} +##' @return log-likelihood values +##' @author Sebastian Funkgeom_length_ll <- function(x, prob) { +##' @inheritParams chain_ll chain_sim +offspring_ll <- function(x, offspring, stat, n=100, ...) { + + dist <- chain_sim(n, offspring, stat, ...) + + ## linear approximation + f <- ecdf(dist) + acdf <- diff(c(0, approx(unique(dist), f(unique(dist)), seq_len(max(dist[is.finite(dist)])))$y)) + lik <- acdf[x] + lik[is.na(lik)] <- 0 + log(lik) +} + +##' Likelihood for the outcome of a branching process +##' +##' @param x vector of sizes or lengths of transmission chains +##' @param stat statistic given as \code{x} ("size" or "length" of chains) +##' @param infinite any chains of this size/length will be treated as infinite +##' @param exclude any sizes/lengths to exclude from the likelihood calculation +##' @param ... parameters for the offspring distribution +##' @return likelihood +##' @inheritParams chain_sim +##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll +##' @author Sebastian Funk +chain_ll <- function(x, offspring, stat=c("size", "length"), infinite = Inf, exclude, ...) +{ + stat <- match.arg(stat) + + if (any(x >= infinite)) { + calc_sizes <- seq_len(infinite) + } else { + calc_sizes <- unique(c(1, x)) + } + + ## first, get likelihood function as given by `offspring` and `stat`` + likelihoods <- c() + ll_func <- paste(offspring, stat, "ll", sep="_") + if (exists(ll_func)) { + func <- get(ll_func) + if (!is.function(func)) stop("'", ll_func, "' is not a function.") + likelihoods[calc_sizes] <- func(calc_sizes, ...) + } else { + likelihoods[calc_sizes] <- offspring_ll(calc_sizes, offspring, stat, ...) + } + + if (!missing(exclude)) { + likelihoods <- likelihoods - log(-expm1(sum(likelihoods[exclude]))) + likelihoods[exclude] <- -Inf + } + + sexpl <- sum(exp(likelihoods), na.rm = TRUE) + if (sexpl < 1) { + maxl <- log(1 - sum(exp(likelihoods), na.rm = TRUE)) + } else { + maxl <- -Inf + } + likelihoods <- c(likelihoods, maxl) + + x[x > infinite] <- infinite + 1 + chain_likelihoods <- likelihoods[x] + + return(sum(chain_likelihoods)) +} diff --git a/R/simulate.r b/R/simulate.r new file mode 100644 index 00000000..f43e02e8 --- /dev/null +++ b/R/simulate.r @@ -0,0 +1,43 @@ +##' Simulate chains using a branching process +##' +##' @param n number of simulations to run. +##' @param offspring offspring distribution as character string, e.g. "pois" for +##' the Poisson offspring distribution. +##' @param stat statistic to calculate ("size" or "length" of chains) +##' @param infinite a size or length from which the size/length is to be considered infinite +##' @param ... parameters of the offspring distribution +##' @return a vector of sizes/lengths +##' @author Sebastian Funk +chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ...) { + + stat <- match.arg(stat) + + ## first, get random function as given by `offspring` + random_func <- paste0("r", offspring) + if (!exists(random_func)) stop("Random sampling function '", random_func, "' does not exist.") + func <- get(random_func) + if (!is.function(func)) stop("'", random_func, "' is not a function.") + + ## next, simulate n chains + dist <- c() + for (i in seq_len(n)) { + stat_track <- 1 ## variable to track length or size (depending on `stat`) + state <- 1 + while (state > 0 && state < infinite) { + offspring <- sum(func(n=state, ...)) + if (stat=="size") { + stat_track <- stat_track + offspring + } else if (stat=="length"){ + if (offspring > 0) stat_track <- stat_track + 1 + } else { + stop("Unknown statistic: '", stat, "'.") + } + state <- offspring + } + if (state >= infinite) stat_track <- Inf + dist[i] <- stat_track + } + + return(dist) +} + From 9e0d38d5ad6bbf2a5cd51a2c156390e28442a33a Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 10 Jan 2019 23:30:34 +0000 Subject: [PATCH 003/828] use more robust log1p for maximal likelihood --- R/likelihoods.R | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index 364ca519..24828c29 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -138,15 +138,11 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), infinite = Inf, exc likelihoods[exclude] <- -Inf } - sexpl <- sum(exp(likelihoods), na.rm = TRUE) - if (sexpl < 1) { - maxl <- log(1 - sum(exp(likelihoods), na.rm = TRUE)) - } else { - maxl <- -Inf + if (any(x >= infinite)) { + maxl <- log1p(-sum(exp(likelihoods), na.rm = TRUE)) + likelihoods <- c(likelihoods, maxl) + x[x > infinite] <- infinite + 1 } - likelihoods <- c(likelihoods, maxl) - - x[x > infinite] <- infinite + 1 chain_likelihoods <- likelihoods[x] return(sum(chain_likelihoods)) From 2ec446e259aba5f6e456b176557dd7475d074b11 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 10 Jan 2019 23:30:56 +0000 Subject: [PATCH 004/828] convert vectors of parameters to lists in chain_ll --- R/likelihoods.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index 24828c29..0db365f0 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -125,12 +125,15 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), infinite = Inf, exc ## first, get likelihood function as given by `offspring` and `stat`` likelihoods <- c() ll_func <- paste(offspring, stat, "ll", sep="_") + pars <- as.list(unlist(list(...))) ## converts vectors to lists if (exists(ll_func)) { func <- get(ll_func) if (!is.function(func)) stop("'", ll_func, "' is not a function.") - likelihoods[calc_sizes] <- func(calc_sizes, ...) + likelihoods[calc_sizes] <- do.call(func, c(list(x=calc_sizes), pars)) } else { - likelihoods[calc_sizes] <- offspring_ll(calc_sizes, offspring, stat, ...) + likelihoods[calc_sizes] <- + do.call(offspring_ll, + c(list(x=calc_sizes, offspring=offspring, stat=stat), pars)) } if (!missing(exclude)) { From b0931e6d9ad602692e60dc2a2af48a2db5d41a1d Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 10 Jan 2019 23:31:25 +0000 Subject: [PATCH 005/828] add documentation --- DESCRIPTION | 10 ++++++++++ NAMESPACE | 2 ++ R/likelihoods.R | 11 ++++++----- man/chain_ll.Rd | 35 +++++++++++++++++++++++++++++++++++ man/chain_sim.Rd | 30 ++++++++++++++++++++++++++++++ man/dborel.Rd | 24 ++++++++++++++++++++++++ man/gborel_size_ll.Rd | 26 ++++++++++++++++++++++++++ man/geom_length_ll.Rd | 22 ++++++++++++++++++++++ man/nbinom_size_ll.Rd | 26 ++++++++++++++++++++++++++ man/offspring_ll.Rd | 31 +++++++++++++++++++++++++++++++ man/pois_length_ll.Rd | 22 ++++++++++++++++++++++ man/pois_size_ll.Rd | 22 ++++++++++++++++++++++ man/rborel.Rd | 24 ++++++++++++++++++++++++ 13 files changed, 280 insertions(+), 5 deletions(-) create mode 100644 DESCRIPTION create mode 100644 NAMESPACE create mode 100644 man/chain_ll.Rd create mode 100644 man/chain_sim.Rd create mode 100644 man/dborel.Rd create mode 100644 man/gborel_size_ll.Rd create mode 100644 man/geom_length_ll.Rd create mode 100644 man/nbinom_size_ll.Rd create mode 100644 man/offspring_ll.Rd create mode 100644 man/pois_length_ll.Rd create mode 100644 man/pois_size_ll.Rd create mode 100644 man/rborel.Rd diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 00000000..28fca7e4 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,10 @@ +Package: epichains +Version: 0.1 +Title: Analysis of transmission chains +Authors@R: c(person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre"))) +Description: Performs analysis of chain sizes +License: GPL-3 +URL: https://github.com/sbfnk/epichains +BugReports: https://github.com/sbfnk/epichains/issues +NeedsCompilation: no +RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 00000000..6ae92683 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,2 @@ +# Generated by roxygen2: do not edit by hand + diff --git a/R/likelihoods.R b/R/likelihoods.R index 0db365f0..d47da195 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -68,7 +68,7 @@ pois_length_ll <- function(x, lambda) { ##' @param x vector of sizes ##' @param prob probability of the geometric distribution with mean \code{1/prob} ##' @return log-likelihood values -##' @author Sebastian Funkgeom_length_ll <- function(x, prob) { +##' @author Sebastian Funk geom_length_ll <- function(x, prob) { lambda <- 1 / prob @@ -87,8 +87,9 @@ geom_length_ll <- function(x, prob) { ##' @param x vector of sizes ##' @param ... any paramaters to pass to \code{\link{chain_sim}} ##' @return log-likelihood values -##' @author Sebastian Funkgeom_length_ll <- function(x, prob) { -##' @inheritParams chain_ll chain_sim +##' @author Sebastian Funk +##' @inheritParams chain_ll +##' @inheritParams chain_sim offspring_ll <- function(x, offspring, stat, n=100, ...) { dist <- chain_sim(n, offspring, stat, ...) @@ -104,15 +105,15 @@ offspring_ll <- function(x, offspring, stat, n=100, ...) { ##' Likelihood for the outcome of a branching process ##' ##' @param x vector of sizes or lengths of transmission chains +##' @param ... parameters for the offspring distribution ##' @param stat statistic given as \code{x} ("size" or "length" of chains) ##' @param infinite any chains of this size/length will be treated as infinite ##' @param exclude any sizes/lengths to exclude from the likelihood calculation -##' @param ... parameters for the offspring distribution ##' @return likelihood ##' @inheritParams chain_sim ##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll ##' @author Sebastian Funk -chain_ll <- function(x, offspring, stat=c("size", "length"), infinite = Inf, exclude, ...) +chain_ll <- function(x, offspring, ..., stat=c("size", "length"), infinite = Inf, exclude) { stat <- match.arg(stat) diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd new file mode 100644 index 00000000..450be841 --- /dev/null +++ b/man/chain_ll.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{chain_ll} +\alias{chain_ll} +\title{Likelihood for the outcome of a branching process} +\usage{ +chain_ll(x, offspring, stat = c("size", "length"), infinite = Inf, + exclude, ...) +} +\arguments{ +\item{x}{vector of sizes or lengths of transmission chains} + +\item{offspring}{offspring distribution as character string, e.g. "pois" for +the Poisson offspring distribution.} + +\item{stat}{statistic given as \code{x} ("size" or "length" of chains)} + +\item{infinite}{any chains of this size/length will be treated as infinite} + +\item{exclude}{any sizes/lengths to exclude from the likelihood calculation} + +\item{...}{parameters for the offspring distribution} +} +\value{ +likelihood +} +\description{ +Likelihood for the outcome of a branching process +} +\seealso{ +pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll +} +\author{ +Sebastian Funk +} diff --git a/man/chain_sim.Rd b/man/chain_sim.Rd new file mode 100644 index 00000000..e3d09a24 --- /dev/null +++ b/man/chain_sim.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate.r +\name{chain_sim} +\alias{chain_sim} +\title{Simulate chains using a branching process} +\usage{ +chain_sim(n, offspring, stat = c("size", "length"), infinite = Inf, + ...) +} +\arguments{ +\item{n}{number of simulations to run.} + +\item{offspring}{offspring distribution as character string, e.g. "pois" for +the Poisson offspring distribution.} + +\item{stat}{statistic to calculate ("size" or "length" of chains)} + +\item{infinite}{a size or length from which the size/length is to be considered infinite} + +\item{...}{parameters of the offspring distribution} +} +\value{ +a vector of sizes/lengths +} +\description{ +Simulate chains using a branching process +} +\author{ +Sebastian Funk +} diff --git a/man/dborel.Rd b/man/dborel.Rd new file mode 100644 index 00000000..14d269d0 --- /dev/null +++ b/man/dborel.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/borel.r +\name{dborel} +\alias{dborel} +\title{Density of the Borel distribution} +\usage{ +dborel(x, mu, log = FALSE) +} +\arguments{ +\item{x}{vector of integers.} + +\item{mu}{mu parameter.} + +\item{log}{logical; if TRUE, probabilities p are given as log(p).} +} +\value{ +probability mass. +} +\description{ +Density of the Borel distribution +} +\author{ +Sebastian Funk +} diff --git a/man/gborel_size_ll.Rd b/man/gborel_size_ll.Rd new file mode 100644 index 00000000..1e6c2fc4 --- /dev/null +++ b/man/gborel_size_ll.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{gborel_size_ll} +\alias{gborel_size_ll} +\title{Likelihood of the size of chains with gamma-Borel offspring distribution} +\usage{ +gborel_size_ll(x, size, prob, mu) +} +\arguments{ +\item{x}{vector of sizes} + +\item{size}{the dispersion parameter (often called \code{k} in ecological applications)} + +\item{prob}{probability of success (in the parameterisation with \code{prob}, see also \code{\link[stats]{NegBinomial}})} + +\item{mu}{mean parameter} +} +\value{ +log-likelihood values +} +\description{ +Likelihood of the size of chains with gamma-Borel offspring distribution +} +\author{ +Sebastian Funk +} diff --git a/man/geom_length_ll.Rd b/man/geom_length_ll.Rd new file mode 100644 index 00000000..428bd355 --- /dev/null +++ b/man/geom_length_ll.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{geom_length_ll} +\alias{geom_length_ll} +\title{Likelihood of the length of chains with geometric offspring distribution} +\usage{ +geom_length_ll(x, prob) +} +\arguments{ +\item{x}{vector of sizes} + +\item{prob}{probability of the geometric distribution with mean \code{1/prob}} +} +\value{ +log-likelihood values +} +\description{ +Likelihood of the length of chains with geometric offspring distribution +} +\author{ +Sebastian Funk +} diff --git a/man/nbinom_size_ll.Rd b/man/nbinom_size_ll.Rd new file mode 100644 index 00000000..4d58ee7f --- /dev/null +++ b/man/nbinom_size_ll.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{nbinom_size_ll} +\alias{nbinom_size_ll} +\title{Likelihood of the size of chains with Negative-Binomial offspring distribution} +\usage{ +nbinom_size_ll(x, size, prob, mu) +} +\arguments{ +\item{x}{vector of sizes} + +\item{size}{the dispersion parameter (often called \code{k} in ecological applications)} + +\item{prob}{probability of success (in the parameterisation with \code{prob}, see also \code{\link[stats]{NegBinomial}})} + +\item{mu}{mean parameter} +} +\value{ +log-likelihood values +} +\description{ +Likelihood of the size of chains with Negative-Binomial offspring distribution +} +\author{ +Sebastian Funk +} diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd new file mode 100644 index 00000000..aab921f4 --- /dev/null +++ b/man/offspring_ll.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{offspring_ll} +\alias{offspring_ll} +\title{Likelihood of the length of chains with generic offspring distribution} +\usage{ +offspring_ll(x, offspring, stat, n = 100, ...) +} +\arguments{ +\item{x}{vector of sizes} + +\item{offspring}{offspring distribution as character string, e.g. "pois" for +the Poisson offspring distribution.} + +\item{stat}{statistic given as \code{x} ("size" or "length" of chains)} + +\item{n}{number of simulations to run.} + +\item{...}{any paramaters to pass to \code{\link{chain_sim}}} +} +\value{ +log-likelihood values +} +\description{ +The likelihoods are calculated with a crude approximation using simulated + chains by linearly approximating any missing values in the empirical + cumulative distribution function (ecdf). +} +\author{ +Sebastian Funk +} diff --git a/man/pois_length_ll.Rd b/man/pois_length_ll.Rd new file mode 100644 index 00000000..4d80bda1 --- /dev/null +++ b/man/pois_length_ll.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{pois_length_ll} +\alias{pois_length_ll} +\title{Likelihood of the length of chains with Poisson offspring distribution} +\usage{ +pois_length_ll(x, lambda) +} +\arguments{ +\item{x}{vector of sizes} + +\item{lambda}{rate of the Poisson distributino} +} +\value{ +log-likelihood values +} +\description{ +Likelihood of the length of chains with Poisson offspring distribution +} +\author{ +Sebastian Funk +} diff --git a/man/pois_size_ll.Rd b/man/pois_size_ll.Rd new file mode 100644 index 00000000..c5c0bd28 --- /dev/null +++ b/man/pois_size_ll.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{pois_size_ll} +\alias{pois_size_ll} +\title{Likelihood of the size of chains with Poisson offspring distribution} +\usage{ +pois_size_ll(x, lambda) +} +\arguments{ +\item{x}{vector of sizes} + +\item{lambda}{rate of the Poisson distributino} +} +\value{ +log-likelihood values +} +\description{ +Likelihood of the size of chains with Poisson offspring distribution +} +\author{ +Sebastian Funk +} diff --git a/man/rborel.Rd b/man/rborel.Rd new file mode 100644 index 00000000..8923dc65 --- /dev/null +++ b/man/rborel.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/borel.r +\name{rborel} +\alias{rborel} +\title{Generate random numbers from the Borel distribution} +\usage{ +rborel(n, mu, infinite = Inf) +} +\arguments{ +\item{n}{number of random variates to generate.} + +\item{mu}{mu parameter.} + +\item{infinite}{any number to treat as infinite; simulations will be stopped if this number is reached} +} +\value{ +vector of random numbers +} +\description{ +Random numbers are generated by simulating from a Poisson branching process +} +\author{ +Sebastian Funk +} From f14c22aca963e040996fe022fe6b732441a95ecc Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 15 Jan 2019 17:09:39 +0000 Subject: [PATCH 006/828] set infinite as lower limit for infinite outbreak sizes --- R/likelihoods.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index d47da195..c3e44381 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -118,15 +118,18 @@ chain_ll <- function(x, offspring, ..., stat=c("size", "length"), infinite = Inf stat <- match.arg(stat) if (any(x >= infinite)) { - calc_sizes <- seq_len(infinite) + calc_sizes <- seq_len(infinite-1) + x[x >= infinite] <- infinite } else { - calc_sizes <- unique(c(1, x)) + calc_sizes <- unique(x) } ## first, get likelihood function as given by `offspring` and `stat`` likelihoods <- c() ll_func <- paste(offspring, stat, "ll", sep="_") pars <- as.list(unlist(list(...))) ## converts vectors to lists + + ## calculate likelihoods if (exists(ll_func)) { func <- get(ll_func) if (!is.function(func)) stop("'", ll_func, "' is not a function.") @@ -145,7 +148,6 @@ chain_ll <- function(x, offspring, ..., stat=c("size", "length"), infinite = Inf if (any(x >= infinite)) { maxl <- log1p(-sum(exp(likelihoods), na.rm = TRUE)) likelihoods <- c(likelihoods, maxl) - x[x > infinite] <- infinite + 1 } chain_likelihoods <- likelihoods[x] From a7d49c619ebcb256c6184aa1cbf32a63b4c3bdb4 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 15 Jan 2019 21:11:31 +0000 Subject: [PATCH 007/828] catch machine precision errors --- R/likelihoods.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index c3e44381..09fc9e31 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -146,7 +146,8 @@ chain_ll <- function(x, offspring, ..., stat=c("size", "length"), infinite = Inf } if (any(x >= infinite)) { - maxl <- log1p(-sum(exp(likelihoods), na.rm = TRUE)) + maxl <- + tryCatch(log1p(-sum(exp(likelihoods), na.rm = TRUE)), error=function(e) -Inf) likelihoods <- c(likelihoods, maxl) } chain_likelihoods <- likelihoods[x] From 31e87ca60b8d6b1a4276c0c896643933a019b9c0 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 15 Jan 2019 21:11:52 +0000 Subject: [PATCH 008/828] export functions --- NAMESPACE | 2 ++ R/simulate.r | 1 + 2 files changed, 3 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 6ae92683..ddaddf44 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,4 @@ # Generated by roxygen2: do not edit by hand +export(chain_ll) +export(chain_sim) diff --git a/R/simulate.r b/R/simulate.r index f43e02e8..70d09b39 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -8,6 +8,7 @@ ##' @param ... parameters of the offspring distribution ##' @return a vector of sizes/lengths ##' @author Sebastian Funk +##' @export chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ...) { stat <- match.arg(stat) From 218f7a9165783cae4e074763654715ad00a0c81a Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 15 Jan 2019 23:38:35 +0000 Subject: [PATCH 009/828] doc update --- R/likelihoods.R | 1 + man/chain_ll.Rd | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index 09fc9e31..3af7537c 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -113,6 +113,7 @@ offspring_ll <- function(x, offspring, stat, n=100, ...) { ##' @inheritParams chain_sim ##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll ##' @author Sebastian Funk +##' @export chain_ll <- function(x, offspring, ..., stat=c("size", "length"), infinite = Inf, exclude) { stat <- match.arg(stat) diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd index 450be841..b91ebdbb 100644 --- a/man/chain_ll.Rd +++ b/man/chain_ll.Rd @@ -4,8 +4,8 @@ \alias{chain_ll} \title{Likelihood for the outcome of a branching process} \usage{ -chain_ll(x, offspring, stat = c("size", "length"), infinite = Inf, - exclude, ...) +chain_ll(x, offspring, ..., stat = c("size", "length"), infinite = Inf, + exclude) } \arguments{ \item{x}{vector of sizes or lengths of transmission chains} @@ -13,13 +13,13 @@ chain_ll(x, offspring, stat = c("size", "length"), infinite = Inf, \item{offspring}{offspring distribution as character string, e.g. "pois" for the Poisson offspring distribution.} +\item{...}{parameters for the offspring distribution} + \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} \item{infinite}{any chains of this size/length will be treated as infinite} \item{exclude}{any sizes/lengths to exclude from the likelihood calculation} - -\item{...}{parameters for the offspring distribution} } \value{ likelihood From 199372a881db3e6575af54aefa0b5024d63b0194 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 15 Jan 2019 17:07:10 +0000 Subject: [PATCH 010/828] observation probabilities <1 --- R/likelihoods.R | 42 +++++++++++++++++++++++++++--------- R/utils.r | 24 +++++++++++++++++++++ man/chain_ll.Rd | 6 ++++-- man/complementary_logprob.Rd | 21 ++++++++++++++++++ man/rbinom_size.Rd | 26 ++++++++++++++++++++++ 5 files changed, 107 insertions(+), 12 deletions(-) create mode 100644 R/utils.r create mode 100644 man/complementary_logprob.Rd create mode 100644 man/rbinom_size.Rd diff --git a/R/likelihoods.R b/R/likelihoods.R index 3af7537c..4f872d16 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -106,6 +106,8 @@ offspring_ll <- function(x, offspring, stat, n=100, ...) { ##' ##' @param x vector of sizes or lengths of transmission chains ##' @param ... parameters for the offspring distribution +##' @param obs_prob observation probability (assumed constant) +##' @param n number of samples for estimating the likelihood if obs_prob < 1 ##' @param stat statistic given as \code{x} ("size" or "length" of chains) ##' @param infinite any chains of this size/length will be treated as infinite ##' @param exclude any sizes/lengths to exclude from the likelihood calculation @@ -114,15 +116,27 @@ offspring_ll <- function(x, offspring, stat, n=100, ...) { ##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll ##' @author Sebastian Funk ##' @export -chain_ll <- function(x, offspring, ..., stat=c("size", "length"), infinite = Inf, exclude) +chain_ll <- function(x, offspring, ..., obs_prob=1, n, stat=c("size", "length"), infinite = Inf, exclude) { stat <- match.arg(stat) - if (any(x >= infinite)) { - calc_sizes <- seq_len(infinite-1) + ## checks + if (obs_prob <= 0 || obs_prob > 1) stop("'obs_prob' must be within (0,1]") + if (obs_prob < 1) { + if (missing(n)) stop("'n' must be specified if 'obs_prob' is <1") + sampled_x <- replicate(n, pmin(rbinom_size(length(x), x, obs_prob), infinite)) + size_x <- unlist(sampled_x) + if (!is.finite(infinite)) infinite <- max(size_x) + 1 + } else { x[x >= infinite] <- infinite + size_x <- x + } + + ## determine for which sizes to calculate the likelihood (for true chain size) + if (any(size_x == infinite)) { + calc_sizes <- seq_len(infinite-1) } else { - calc_sizes <- unique(x) + calc_sizes <- unique(size_x) } ## first, get likelihood function as given by `offspring` and `stat`` @@ -141,17 +155,25 @@ chain_ll <- function(x, offspring, ..., stat=c("size", "length"), infinite = Inf c(list(x=calc_sizes, offspring=offspring, stat=stat), pars)) } + ## assign probabilities to infinite outbreak sizes + if (any(size_x == infinite)) { + likelihoods[infinite] <- complementary_logprob(likelihoods) + } + if (!missing(exclude)) { likelihoods <- likelihoods - log(-expm1(sum(likelihoods[exclude]))) likelihoods[exclude] <- -Inf } - if (any(x >= infinite)) { - maxl <- - tryCatch(log1p(-sum(exp(likelihoods), na.rm = TRUE)), error=function(e) -Inf) - likelihoods <- c(likelihoods, maxl) + ## adjust for binomial observation probabilities + if (obs_prob < 1) { + chains_likelihood <- mean(apply(sampled_x, 2, function(sx) { + sum(likelihoods[sx]) + })) + } else { + chains_likelihood <- sum(likelihoods[x]) } - chain_likelihoods <- likelihoods[x] - return(sum(chain_likelihoods)) + return(chains_likelihood) } + diff --git a/R/utils.r b/R/utils.r new file mode 100644 index 00000000..6876b962 --- /dev/null +++ b/R/utils.r @@ -0,0 +1,24 @@ +##' Calculates the complementary log-probability +##' +##' Given x and norm, this calculates log(1-sum(exp(x))) +##' @param x log-probabilities +##' @return value +##' @author Sebastian Funk +##' @keywords internal +complementary_logprob <- function(x) { + tryCatch(log1p(-sum(exp(x))), error=function(e) -Inf) +} + +##' Samples size (the number of trials) of a binomial distribution +##' +##' Samples the size parameter from the binomial distribution with fixed x +##' (number of sucesses) and p (sucess probability) +##' @param n number of samples to generate +##' @param x number of successes +##' @param prob probability of success +##' @return a sampled size +##' @author Sebastian Funk +##' @keywords internal +rbinom_size <- function(n, x, prob) { + x + rnbinom(n, x, prob) + rnbinom(n, 1, prob) +} diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd index b91ebdbb..0621031d 100644 --- a/man/chain_ll.Rd +++ b/man/chain_ll.Rd @@ -4,8 +4,8 @@ \alias{chain_ll} \title{Likelihood for the outcome of a branching process} \usage{ -chain_ll(x, offspring, ..., stat = c("size", "length"), infinite = Inf, - exclude) +chain_ll(x, offspring, ..., obs_prob = 1, stat = c("size", "length"), + infinite = Inf, exclude) } \arguments{ \item{x}{vector of sizes or lengths of transmission chains} @@ -15,6 +15,8 @@ the Poisson offspring distribution.} \item{...}{parameters for the offspring distribution} +\item{obs_prob}{observation probability (assumed constant)} + \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} \item{infinite}{any chains of this size/length will be treated as infinite} diff --git a/man/complementary_logprob.Rd b/man/complementary_logprob.Rd new file mode 100644 index 00000000..221bccb0 --- /dev/null +++ b/man/complementary_logprob.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.r +\name{complementary_logprob} +\alias{complementary_logprob} +\title{Calculates the complementary log-probability} +\usage{ +complementary_logprob(x) +} +\arguments{ +\item{x}{log-probabilities} +} +\value{ +value +} +\description{ +Given x and norm, this calculates log(1-sum(exp(x))) +} +\author{ +Sebastian Funk +} +\keyword{internal} diff --git a/man/rbinom_size.Rd b/man/rbinom_size.Rd new file mode 100644 index 00000000..c50027b4 --- /dev/null +++ b/man/rbinom_size.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.r +\name{rbinom_size} +\alias{rbinom_size} +\title{Samples size (the number of trials) of a binomial distribution} +\usage{ +rbinom_size(n, x, prob) +} +\arguments{ +\item{n}{number of samples to generate} + +\item{x}{number of successes} + +\item{prob}{probability of success} +} +\value{ +a sampled size +} +\description{ +Samples the size parameter from the binomial distribution with fixed x +(number of sucesses) and p (sucess probability) +} +\author{ +Sebastian Funk +} +\keyword{internal} From a99bb79f3ededd5b5d20d833a3bf8ca884cd4771 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 08:22:59 +0000 Subject: [PATCH 011/828] update DESCRIPTION --- DESCRIPTION | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 28fca7e4..21988041 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ -Package: epichains +Package: bpmodels Version: 0.1 -Title: Analysis of transmission chains +Title: Analysing chain statistics using branching process models Authors@R: c(person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre"))) -Description: Performs analysis of chain sizes +Description: Provides methods to analyse and simulate the size and length of branching processes with an arbitrary offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks, as discussed in Farrington et al. (2003) . +Imports: matrixStats License: GPL-3 -URL: https://github.com/sbfnk/epichains -BugReports: https://github.com/sbfnk/epichains/issues -NeedsCompilation: no +URL: https://github.com/sbfnk/bpmodels +BugReports: https://github.com/sbfnk/bpmodels RoxygenNote: 6.1.1 From 91da15a555354b285b341bae032dd56cb7a01e79 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 08:23:45 +0000 Subject: [PATCH 012/828] update order of chain_ll parameters --- R/likelihoods.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index 4f872d16..69d69d6a 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -105,18 +105,17 @@ offspring_ll <- function(x, offspring, stat, n=100, ...) { ##' Likelihood for the outcome of a branching process ##' ##' @param x vector of sizes or lengths of transmission chains -##' @param ... parameters for the offspring distribution -##' @param obs_prob observation probability (assumed constant) -##' @param n number of samples for estimating the likelihood if obs_prob < 1 ##' @param stat statistic given as \code{x} ("size" or "length" of chains) +##' @param obs_prob observation probability (assumed constant) ##' @param infinite any chains of this size/length will be treated as infinite ##' @param exclude any sizes/lengths to exclude from the likelihood calculation +##' @param ... parameters for the offspring distribution ##' @return likelihood ##' @inheritParams chain_sim ##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll ##' @author Sebastian Funk ##' @export -chain_ll <- function(x, offspring, ..., obs_prob=1, n, stat=c("size", "length"), infinite = Inf, exclude) +chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, infinite = Inf, exclude, ...) { stat <- match.arg(stat) From 293e2b111496935727f5f1e5d19586d8d90bf7f9 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 08:23:59 +0000 Subject: [PATCH 013/828] update documentation (internals and examples) --- DESCRIPTION | 2 +- R/likelihoods.R | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 21988041..164e4615 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: bpmodels -Version: 0.1 +Version: 0.1.0 Title: Analysing chain statistics using branching process models Authors@R: c(person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre"))) Description: Provides methods to analyse and simulate the size and length of branching processes with an arbitrary offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks, as discussed in Farrington et al. (2003) . diff --git a/R/likelihoods.R b/R/likelihoods.R index 69d69d6a..ee5b8489 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -4,6 +4,7 @@ ##' @param lambda rate of the Poisson distributino ##' @return log-likelihood values ##' @author Sebastian Funk +##' @keywords internal pois_size_ll <- function(x, lambda) { (x - 1) * log(lambda) - lambda * x + (x - 2) * log(x) - lgamma(x) @@ -17,6 +18,7 @@ pois_size_ll <- function(x, lambda) ##' @param mu mean parameter ##' @return log-likelihood values ##' @author Sebastian Funk +##' @keywords internal nbinom_size_ll <- function(x, size, prob, mu) { if (!missing(prob)) { @@ -36,6 +38,7 @@ nbinom_size_ll <- function(x, size, prob, mu) ##' @param mu mean parameter ##' @return log-likelihood values ##' @author Sebastian Funk +##' @keywords internal gborel_size_ll <- function(x, size, prob, mu) { if (!missing(prob)) { if (!missing(mu)) stop("'prob' and 'mu' both specified") @@ -51,6 +54,7 @@ gborel_size_ll <- function(x, size, prob, mu) { ##' @param lambda rate of the Poisson distributino ##' @return log-likelihood values ##' @author Sebastian Funk +##' @keywords internal pois_length_ll <- function(x, lambda) { ## iterated exponential function @@ -69,6 +73,7 @@ pois_length_ll <- function(x, lambda) { ##' @param prob probability of the geometric distribution with mean \code{1/prob} ##' @return log-likelihood values ##' @author Sebastian Funk +##' @keywords internal geom_length_ll <- function(x, prob) { lambda <- 1 / prob @@ -90,6 +95,7 @@ geom_length_ll <- function(x, prob) { ##' @author Sebastian Funk ##' @inheritParams chain_ll ##' @inheritParams chain_sim +##' @keywords internal offspring_ll <- function(x, offspring, stat, n=100, ...) { dist <- chain_sim(n, offspring, stat, ...) @@ -115,6 +121,9 @@ offspring_ll <- function(x, offspring, stat, n=100, ...) { ##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll ##' @author Sebastian Funk ##' @export +##' @examples +##' chain_sizes <- c(1,1,4,7) # example of observed chain sizes +##' chain_ll(chain_sizes, "pois", "size", lambda=0.5) chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, infinite = Inf, exclude, ...) { stat <- match.arg(stat) From dc7d0a1e3732a03a332d23e8fa566f81dba77787 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 09:21:33 +0000 Subject: [PATCH 014/828] chain_ll: update parameters --- R/likelihoods.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index ee5b8489..12dca021 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -90,15 +90,16 @@ geom_length_ll <- function(x, prob) { ##' chains by linearly approximating any missing values in the empirical ##' cumulative distribution function (ecdf). ##' @param x vector of sizes +##' @param nsim_offspring number of simulations of the offspring distribution for approximation the size/length distribution ##' @param ... any paramaters to pass to \code{\link{chain_sim}} ##' @return log-likelihood values ##' @author Sebastian Funk ##' @inheritParams chain_ll ##' @inheritParams chain_sim ##' @keywords internal -offspring_ll <- function(x, offspring, stat, n=100, ...) { +offspring_ll <- function(x, offspring, stat, nsim_offspring=100, ...) { - dist <- chain_sim(n, offspring, stat, ...) + dist <- chain_sim(nsim_offspring, offspring, stat, ...) ## linear approximation f <- ecdf(dist) @@ -115,6 +116,7 @@ offspring_ll <- function(x, offspring, stat, n=100, ...) { ##' @param obs_prob observation probability (assumed constant) ##' @param infinite any chains of this size/length will be treated as infinite ##' @param exclude any sizes/lengths to exclude from the likelihood calculation +##' @param nsim_obs number of simulations if the likelihood is to be approximated for imperfect observations ##' @param ... parameters for the offspring distribution ##' @return likelihood ##' @inheritParams chain_sim @@ -124,15 +126,15 @@ offspring_ll <- function(x, offspring, stat, n=100, ...) { ##' @examples ##' chain_sizes <- c(1,1,4,7) # example of observed chain sizes ##' chain_ll(chain_sizes, "pois", "size", lambda=0.5) -chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, infinite = Inf, exclude, ...) +chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, infinite = Inf, exclude, nsim_obs, ...) { stat <- match.arg(stat) ## checks if (obs_prob <= 0 || obs_prob > 1) stop("'obs_prob' must be within (0,1]") if (obs_prob < 1) { - if (missing(n)) stop("'n' must be specified if 'obs_prob' is <1") - sampled_x <- replicate(n, pmin(rbinom_size(length(x), x, obs_prob), infinite)) + if (missing(nsim_obs)) stop("'nsim_obs' must be specified if 'obs_prob' is <1") + sampled_x <- replicate(nsim_obs, pmin(rbinom_size(length(x), x, obs_prob), infinite)) size_x <- unlist(sampled_x) if (!is.finite(infinite)) infinite <- max(size_x) + 1 } else { @@ -160,7 +162,8 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, infinit } else { likelihoods[calc_sizes] <- do.call(offspring_ll, - c(list(x=calc_sizes, offspring=offspring, stat=stat), pars)) + c(list(x=calc_sizes, offspring=offspring, + stat=stat, infinite=infinite), pars)) } ## assign probabilities to infinite outbreak sizes @@ -175,9 +178,9 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, infinit ## adjust for binomial observation probabilities if (obs_prob < 1) { - chains_likelihood <- mean(apply(sampled_x, 2, function(sx) { + chains_likelihood <- apply(sampled_x, 2, function(sx) { sum(likelihoods[sx]) - })) + }) } else { chains_likelihood <- sum(likelihoods[x]) } From 44efdd630dcf48af4517fa650089c35ba6849759 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 09:22:10 +0000 Subject: [PATCH 015/828] Roxygen documentation update --- man/chain_ll.Rd | 16 +++++++++++----- man/gborel_size_ll.Rd | 1 + man/geom_length_ll.Rd | 1 + man/nbinom_size_ll.Rd | 1 + man/offspring_ll.Rd | 5 +++-- man/pois_length_ll.Rd | 1 + man/pois_size_ll.Rd | 1 + 7 files changed, 19 insertions(+), 7 deletions(-) diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd index 0621031d..71f39d7e 100644 --- a/man/chain_ll.Rd +++ b/man/chain_ll.Rd @@ -4,8 +4,8 @@ \alias{chain_ll} \title{Likelihood for the outcome of a branching process} \usage{ -chain_ll(x, offspring, ..., obs_prob = 1, stat = c("size", "length"), - infinite = Inf, exclude) +chain_ll(x, offspring, stat = c("size", "length"), obs_prob = 1, + infinite = Inf, exclude, nsim_obs, ...) } \arguments{ \item{x}{vector of sizes or lengths of transmission chains} @@ -13,15 +13,17 @@ chain_ll(x, offspring, ..., obs_prob = 1, stat = c("size", "length"), \item{offspring}{offspring distribution as character string, e.g. "pois" for the Poisson offspring distribution.} -\item{...}{parameters for the offspring distribution} +\item{stat}{statistic given as \code{x} ("size" or "length" of chains)} \item{obs_prob}{observation probability (assumed constant)} -\item{stat}{statistic given as \code{x} ("size" or "length" of chains)} - \item{infinite}{any chains of this size/length will be treated as infinite} \item{exclude}{any sizes/lengths to exclude from the likelihood calculation} + +\item{nsim_obs}{number of simulations if the likelihood is to be approximated for imperfect observations} + +\item{...}{parameters for the offspring distribution} } \value{ likelihood @@ -29,6 +31,10 @@ likelihood \description{ Likelihood for the outcome of a branching process } +\examples{ +chain_sizes <- c(1,1,4,7) # example of observed chain sizes +chain_ll(chain_sizes, "pois", "size", lambda=0.5) +} \seealso{ pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll } diff --git a/man/gborel_size_ll.Rd b/man/gborel_size_ll.Rd index 1e6c2fc4..13ee9646 100644 --- a/man/gborel_size_ll.Rd +++ b/man/gborel_size_ll.Rd @@ -24,3 +24,4 @@ Likelihood of the size of chains with gamma-Borel offspring distribution \author{ Sebastian Funk } +\keyword{internal} diff --git a/man/geom_length_ll.Rd b/man/geom_length_ll.Rd index 428bd355..98015fe7 100644 --- a/man/geom_length_ll.Rd +++ b/man/geom_length_ll.Rd @@ -20,3 +20,4 @@ Likelihood of the length of chains with geometric offspring distribution \author{ Sebastian Funk } +\keyword{internal} diff --git a/man/nbinom_size_ll.Rd b/man/nbinom_size_ll.Rd index 4d58ee7f..974b5916 100644 --- a/man/nbinom_size_ll.Rd +++ b/man/nbinom_size_ll.Rd @@ -24,3 +24,4 @@ Likelihood of the size of chains with Negative-Binomial offspring distribution \author{ Sebastian Funk } +\keyword{internal} diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index aab921f4..d2cd9b8f 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -4,7 +4,7 @@ \alias{offspring_ll} \title{Likelihood of the length of chains with generic offspring distribution} \usage{ -offspring_ll(x, offspring, stat, n = 100, ...) +offspring_ll(x, offspring, stat, nsim_offspring = 100, ...) } \arguments{ \item{x}{vector of sizes} @@ -14,7 +14,7 @@ the Poisson offspring distribution.} \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} -\item{n}{number of simulations to run.} +\item{nsim_offspring}{number of simulations of the offspring distribution for approximation the size/length distribution} \item{...}{any paramaters to pass to \code{\link{chain_sim}}} } @@ -29,3 +29,4 @@ The likelihoods are calculated with a crude approximation using simulated \author{ Sebastian Funk } +\keyword{internal} diff --git a/man/pois_length_ll.Rd b/man/pois_length_ll.Rd index 4d80bda1..8bcf37d4 100644 --- a/man/pois_length_ll.Rd +++ b/man/pois_length_ll.Rd @@ -20,3 +20,4 @@ Likelihood of the length of chains with Poisson offspring distribution \author{ Sebastian Funk } +\keyword{internal} diff --git a/man/pois_size_ll.Rd b/man/pois_size_ll.Rd index c5c0bd28..19163265 100644 --- a/man/pois_size_ll.Rd +++ b/man/pois_size_ll.Rd @@ -20,3 +20,4 @@ Likelihood of the size of chains with Poisson offspring distribution \author{ Sebastian Funk } +\keyword{internal} From 54f60997c644cb0a0e030152803965d410f36cca Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 09:22:21 +0000 Subject: [PATCH 016/828] tests --- tests/testthat.R | 4 ++++ tests/testthat/tests-borel.r | 15 +++++++++++++++ tests/testthat/tests-ll.r | 33 +++++++++++++++++++++++++++++++++ tests/testthat/tests-sim.r | 15 +++++++++++++++ 4 files changed, 67 insertions(+) create mode 100644 tests/testthat.R create mode 100644 tests/testthat/tests-borel.r create mode 100644 tests/testthat/tests-ll.r create mode 100644 tests/testthat/tests-sim.r diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 00000000..b9a1b439 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(bpmodels) + +test_check("bpmodels") diff --git a/tests/testthat/tests-borel.r b/tests/testthat/tests-borel.r new file mode 100644 index 00000000..266997e3 --- /dev/null +++ b/tests/testthat/tests-borel.r @@ -0,0 +1,15 @@ +context("The Borel distribution is implemented") + +test_that("We can calculate probabilities and sample", +{ + expect_gt(dborel(1, 0.5), 0) + expect_equal(dborel(1, 0.5, log=TRUE), -0.5) + expect_length(rborel(2, 0.9), 2) +}) + +test_that("Errors are thrown", +{ + expect_error(dborel(0, 0.5), "greater than 0") +}) + + diff --git a/tests/testthat/tests-ll.r b/tests/testthat/tests-ll.r new file mode 100644 index 00000000..f1dfdfea --- /dev/null +++ b/tests/testthat/tests-ll.r @@ -0,0 +1,33 @@ +context("Calculating the likelihood from a branching process model") + +chains <- c(1,1,4,7) + +test_that("Likelihoods can be calculated", +{ + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5), 0) + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, exclude=1), 0) + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, infinite = 5), 0) + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, obs_prob = 0.5, nsim_obs=1), 0) + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, infinite = 5, obs_prob = 0.5, nsim_obs=1), 0) + expect_lt(chain_ll(chains, "binom", "size", size=1, prob=0.5), 0) +}) + +test_that("Analytical size/length distributions are implemented", +{ + expect_true(all(pois_size_ll(chains, lambda=0.5) < 0)) + expect_true(all(nbinom_size_ll(chains, mu=0.5, size=0.2) < 0)) + expect_true(all(nbinom_size_ll(chains, prob=0.5, size=0.2) < 0)) + expect_true(all(gborel_size_ll(chains, prob=0.5, size=0.2) < 0)) + expect_true(all(gborel_size_ll(chains, prob=0.5, size=0.2) < 0)) + expect_true(all(pois_length_ll(chains, lambda=0.5) < 0)) + expect_true(all(geom_length_ll(chains, prob=0.5) < 0)) +}) + +test_that("Errors are thrown", +{ + expect_error(chain_ll(chain_sizes, "pois", "size", lambda=0.5, obs_prob = 3), "must be within") + expect_error(chain_ll(chain_sizes, "pois", "size", lambda=0.5, obs_prob = 0.5), "must be specified") + expect_error(nbinom_size_ll(chains, mu=0.5, size=0.2, prob=0.1), "both specified") + expect_error(gborel_size_ll(chains, mu=0.5, size=0.2, prob=0.1), "both specified") + expect_error(chain_sim(n=2, "test"), "is not a function") +}) diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r new file mode 100644 index 00000000..8cb7795a --- /dev/null +++ b/tests/testthat/tests-sim.r @@ -0,0 +1,15 @@ +context("Simulating from a branching process model") + +test_that("Chains can be simulated", +{ + expect_length(chain_sim(n=2, "pois", lambda=0.5), 2) + expect_length(chain_sim(n=2, "pois", "length", lambda=0.5), 2) + expect_false(any(is.finite(chain_sim(n=2, "pois", "length", lambda=0.5, infinite=1)))) +}) + +test_that("Errors are thrown", +{ + rtest <- 0 + expect_error(chain_sim(n=2, "dummy"), "does not exist") + expect_error(chain_sim(n=2, "test"), "is not a function") +}) From 7ab18338ac78c8b192d114f834b0c5db2c494a57 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 09:22:27 +0000 Subject: [PATCH 017/828] vignette --- vignettes/introduction.Rmd | 73 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 vignettes/introduction.Rmd diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd new file mode 100644 index 00000000..24abe368 --- /dev/null +++ b/vignettes/introduction.Rmd @@ -0,0 +1,73 @@ +--- +title: "Analysing chain statistics using branching process models" +author: "Sebastian Funk" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Analysing chain statistics using branching process models} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +library('knitr') +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +[bpmodels](https://github.com/sbfnk/bpmodels) is an `R` package to analyse and simulate the size and length of branching processes with an arbitrary offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks. + +# Usage + +To load the package, use +```{r eval=FALSE} +library('bpmodels') +``` +```{r echo=FALSE} +suppressWarnings(library('bpmodels')) +``` + +At the heart of the `bpmodels` package are the `chains_ll` and `chains_sim` functions. The `chains_ll` function calculates the log-likelihood of a distribution of chain sizes or lengths given an offspring distribution and associated parameters. For example, to get the log-likelihood for a given observed distribution of chain sizes assuming a mean number of 0.5 Poisson-distributed offspring per generation, use + +```{r} +chain_sizes <- c(1,1,4,7) # example of observed chain sizes +chain_ll(chain_sizes, "pois", "size", lambda=0.5) +``` + +The first argument of `chain_ll` is the size (or length) distribution to analyse. The second argument (called `offspring`) specifies the offspring distribution. This is given as a character string that refers to the function used to generate random offspring. It can be any probability distribution implemented in R, that is, one that has a corresponding function for generating random numbers beginning with the letter `r`. In the case of the example above, since random Poisson numbers are generated in R using a function called `rpois`, "pois" is the corresponding string to pass to the `offspring` argument. + +The third argument (called `stat`) determines whether to analyse chain sizes ("size", the default if this argument is not specified) or lengths ("length"). Lastly, any named arguments not recognised by `chain_ll` are interpreted as parameters of the corresponding probability distribution, here `lambda=0.5` as the mean of the Poisson distribution (see the R help page for the Poisson distribution for more information). + +You can use the `R` help to find out about usage of the `chains_ll` function, + +```{r eval=FALSE} +?chains_ll +``` + +To simulate from a branching process, use the `chain_sim` function, which follows the same syntax as the `chain_ll` function: + +```{r} +chain_sim(n=5, "pois", "size", lambda=0.5) +``` + +# Methodology + +If the probability distribution of chain sizes or lengths has an analytical solution, this will be used (size distribution: Poisson and negative binomial; length distribution: Poisson and geometric). If not, simulations are used to approximate this probability distributions (using a linear approximation to the cumulative distribution for unobserved sizes/lengths), requiring an additional parameter `nsim_offspring` for the number of simulations to be used for this approximation. + +# Imperfect observations + +The `chain_ll` function has an `obs_prob` parameter that can be used to determine the likelihood if observations are imperfect. This only works when analysing chain sizes (`stat="size"`). In that case, true chain sizes are simulated repeatedly (the number of times given by the `nsim_obs` argument) and the likelihood calculated for each of these simulations. For example, if the probability of observing each case is 30%, use + +```{r} +ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda=0.5, nsim_obs=10) +summary(ll) +``` + +This returns `nsim_obs=10` likelihood values which can be averaged to come up with an overall likelihood estimate. + +# References + +* Farrington, C.P., Kanaan, M.N. and Gay, N.J. (2003). [Branching process models for surveillance of infectious diseases controlled by mass vaccination](https://doi.org/10.1093/biostatistics/4.2.279). +* Blumberg, S. and Lloyd-Smith, J.O. (2013). [Comparing methods for estimating R0 from the size distribution of subcritical transmission chains](https://doi.org/10.1016/j.epidem.2013.05.002). From 99f1adad871ee24832676fb79a23d4c06bbb777e Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 09:22:36 +0000 Subject: [PATCH 018/828] update package structure --- .Rbuildignore | 4 + .gitignore | 10 +- .travis.yml | 5 + CODE_OF_CONDUCT.md | 25 ++ DESCRIPTION | 5 + LICENSE | 674 -------------------------------------------- NEWS.md | 3 + README.md | 11 +- appveyor.yml | 41 +++ man/offspring_ll.Rd | 3 +- 10 files changed, 96 insertions(+), 685 deletions(-) create mode 100644 .Rbuildignore create mode 100644 .travis.yml create mode 100644 CODE_OF_CONDUCT.md delete mode 100644 LICENSE create mode 100644 NEWS.md create mode 100644 appveyor.yml diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 00000000..f0fea78d --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,4 @@ +^CODE_OF_CONDUCT\.md$ +^appveyor\.yml$ +^\.travis\.yml$ +cran-comments.md diff --git a/.gitignore b/.gitignore index 26fad6fa..57133000 100644 --- a/.gitignore +++ b/.gitignore @@ -1,36 +1,28 @@ +inst/doc # History files .Rhistory .Rapp.history # Session Data files .RData - # Example code in package build process *-Ex.R - # Output files from R CMD build /*.tar.gz - # Output files from R CMD check /*.Rcheck/ - # RStudio files .Rproj.user/ - # produced vignettes vignettes/*.html vignettes/*.pdf - # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 .httr-oauth - # knitr and R markdown default cache directories /*_cache/ /cache/ - # Temporary files created by R markdown *.utf8.md *.knit.md - # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html rsconnect/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..8d139ac6 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,5 @@ +# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r + +language: R +sudo: false +cache: packages diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..24aa0a3c --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,25 @@ +# Contributor Code of Conduct + +As contributors and maintainers of this project, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating documentation, +submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free experience for +everyone, regardless of level of experience, gender, gender identity and expression, +sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. + +Examples of unacceptable behavior by participants include the use of sexual language or +imagery, derogatory comments or personal attacks, trolling, public or private harassment, +insults, or other unprofessional conduct. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, +commits, code, wiki edits, issues, and other contributions that are not aligned to this +Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed +from the project team. + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by +opening an issue or contacting one or more of the project maintainers. + +This Code of Conduct is adapted from the Contributor Covenant +(http://contributor-covenant.org), version 1.0.0, available at +http://contributor-covenant.org/version/1/0/0/ diff --git a/DESCRIPTION b/DESCRIPTION index 164e4615..16c0fb0a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,12 @@ Title: Analysing chain statistics using branching process models Authors@R: c(person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre"))) Description: Provides methods to analyse and simulate the size and length of branching processes with an arbitrary offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks, as discussed in Farrington et al. (2003) . Imports: matrixStats +Suggests: + testthat, + knitr, + rmarkdown License: GPL-3 URL: https://github.com/sbfnk/bpmodels BugReports: https://github.com/sbfnk/bpmodels RoxygenNote: 6.1.1 +VignetteBuilder: knitr diff --git a/LICENSE b/LICENSE deleted file mode 100644 index f288702d..00000000 --- a/LICENSE +++ /dev/null @@ -1,674 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 00000000..1d3b8bbd --- /dev/null +++ b/NEWS.md @@ -0,0 +1,3 @@ +# bpmodels 0.1.0 + +* initial release diff --git a/README.md b/README.md index 9b3e57ea..fede0da4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,11 @@ -# epichains +# bpmodels + Methods for analysing the distribution of epidemiological chain sizes and lengths + +The latest development version of the `bpmodels` package can be installed via + +```{r eval=FALSE} +devtools::install_github('sbfnk/bpmodels') +``` + +Please note that the 'bpmodels' project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000..057d78b3 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,41 @@ +# DO NOT CHANGE the "init" and "install" sections below + +# Download script file from GitHub +init: + ps: | + $ErrorActionPreference = "Stop" + Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" + Import-Module '..\appveyor-tool.ps1' +install: + ps: Bootstrap + +# Adapt as necessary starting from here + +build_script: + - travis-tool.sh install_deps + +test_script: + - travis-tool.sh run_tests + +on_failure: + - 7z a failure.zip *.Rcheck\* + - appveyor PushArtifact failure.zip + +artifacts: + - path: '*.Rcheck\**\*.log' + name: Logs + + - path: '*.Rcheck\**\*.out' + name: Logs + + - path: '*.Rcheck\**\*.fail' + name: Logs + + - path: '*.Rcheck\**\*.Rout' + name: Logs + + - path: '\*_*.tar.gz' + name: Bits + + - path: '\*_*.zip' +name: Bits diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index d2cd9b8f..d9827f27 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -14,7 +14,8 @@ the Poisson offspring distribution.} \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} -\item{nsim_offspring}{number of simulations of the offspring distribution for approximation the size/length distribution} +\item{nsim_offspring}{number of simulations of the offspring distribution +for approximation the size/length distribution} \item{...}{any paramaters to pass to \code{\link{chain_sim}}} } From 97679dc2936881031457cac0010da3fc21e904ae Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 09:51:26 +0000 Subject: [PATCH 019/828] remove spurious tests --- tests/testthat/tests-ll.r | 1 - tests/testthat/tests-sim.r | 2 -- 2 files changed, 3 deletions(-) diff --git a/tests/testthat/tests-ll.r b/tests/testthat/tests-ll.r index f1dfdfea..e12b72ce 100644 --- a/tests/testthat/tests-ll.r +++ b/tests/testthat/tests-ll.r @@ -29,5 +29,4 @@ test_that("Errors are thrown", expect_error(chain_ll(chain_sizes, "pois", "size", lambda=0.5, obs_prob = 0.5), "must be specified") expect_error(nbinom_size_ll(chains, mu=0.5, size=0.2, prob=0.1), "both specified") expect_error(gborel_size_ll(chains, mu=0.5, size=0.2, prob=0.1), "both specified") - expect_error(chain_sim(n=2, "test"), "is not a function") }) diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index 8cb7795a..697886ab 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -9,7 +9,5 @@ test_that("Chains can be simulated", test_that("Errors are thrown", { - rtest <- 0 expect_error(chain_sim(n=2, "dummy"), "does not exist") - expect_error(chain_sim(n=2, "test"), "is not a function") }) From fe3718868677481622fd110f7cb534c899c99d71 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 09:52:22 +0000 Subject: [PATCH 020/828] qualify stats functions --- R/likelihoods.R | 9 ++++++--- R/utils.r | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index 12dca021..34911a31 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -90,7 +90,8 @@ geom_length_ll <- function(x, prob) { ##' chains by linearly approximating any missing values in the empirical ##' cumulative distribution function (ecdf). ##' @param x vector of sizes -##' @param nsim_offspring number of simulations of the offspring distribution for approximation the size/length distribution +##' @param nsim_offspring number of simulations of the offspring distribution +##' for approximation the size/length distribution ##' @param ... any paramaters to pass to \code{\link{chain_sim}} ##' @return log-likelihood values ##' @author Sebastian Funk @@ -102,8 +103,10 @@ offspring_ll <- function(x, offspring, stat, nsim_offspring=100, ...) { dist <- chain_sim(nsim_offspring, offspring, stat, ...) ## linear approximation - f <- ecdf(dist) - acdf <- diff(c(0, approx(unique(dist), f(unique(dist)), seq_len(max(dist[is.finite(dist)])))$y)) + f <- stats::ecdf(dist) + acdf <- + diff(c(0, stats::approx(unique(dist), f(unique(dist)), + seq_len(max(dist[is.finite(dist)])))$y)) lik <- acdf[x] lik[is.na(lik)] <- 0 log(lik) diff --git a/R/utils.r b/R/utils.r index 6876b962..86fe3f7d 100644 --- a/R/utils.r +++ b/R/utils.r @@ -20,5 +20,5 @@ complementary_logprob <- function(x) { ##' @author Sebastian Funk ##' @keywords internal rbinom_size <- function(n, x, prob) { - x + rnbinom(n, x, prob) + rnbinom(n, 1, prob) + x + stats::rnbinom(n, x, prob) + stats::rnbinom(n, 1, prob) } From c6d27e42895d10a4fa2b389d400d3264bf08e0a6 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 09:56:30 +0000 Subject: [PATCH 021/828] update travis.yml for osx and codecov --- .travis.yml | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8d139ac6..a7279d6d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,26 @@ # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r - -language: R -sudo: false +language: r cache: packages + +matrix: + include: + - os: linux + r: release + env: + - R_CODECOV=true + - os: linux + r: devel + - os: linux + r: oldrel + - os: osx + osx_image: xcode8.3 + +warnings_are_errors: true + +notifications: + email: + on_success: change + on_failure: change + +after_success: +- if [[ "${R_CODECOV}" ]]; then Rscript -e 'covr::codecov()'; fi From 4925cccf9bb0aaf5b1b9222556c22cba982eb33d Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 09:58:18 +0000 Subject: [PATCH 022/828] fix typo in appveyor.yml --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 057d78b3..bc46a87c 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -38,4 +38,4 @@ artifacts: name: Bits - path: '\*_*.zip' -name: Bits + name: Bits From b7346c161f35ae970018a7a5b55a52579ad5ad57 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 10:15:22 +0000 Subject: [PATCH 023/828] README badges --- DESCRIPTION | 3 ++- README.md | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 16c0fb0a..5020a6e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,8 @@ Imports: matrixStats Suggests: testthat, knitr, - rmarkdown + rmarkdown, + covr License: GPL-3 URL: https://github.com/sbfnk/bpmodels BugReports: https://github.com/sbfnk/bpmodels diff --git a/README.md b/README.md index fede0da4..028c7a9e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # bpmodels +[![Travis-CI Build Status](https://travis-ci.org/sbfnk/bpmodels.svg?branch=master)](https://travis-ci.org/sbfnk/bpmodels) +[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/github/sbfnk)](https://ci.appveyor.com/project/sbfnk/bpmodels) +[![codecov](https://codecov.io/github/sbfnk/bpmodels/branch/master/graphs/badge.svg)](https://codecov.io/github/sbfnk/bpmodels) + Methods for analysing the distribution of epidemiological chain sizes and lengths The latest development version of the `bpmodels` package can be installed via From d8f3e186b003bebd28ad9bae9b449af9c6f946e0 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 10:23:14 +0000 Subject: [PATCH 024/828] remove obsolete test --- R/simulate.r | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/simulate.r b/R/simulate.r index 70d09b39..8e13b68a 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -28,10 +28,8 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, offspring <- sum(func(n=state, ...)) if (stat=="size") { stat_track <- stat_track + offspring - } else if (stat=="length"){ + } else if (stat=="length") { if (offspring > 0) stat_track <- stat_track + 1 - } else { - stop("Unknown statistic: '", stat, "'.") } state <- offspring } From cb5258b65688ea4e0e6318a77aa30d25053d0378 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 10:28:45 +0000 Subject: [PATCH 025/828] give chain length test higher chance of finding length > 1 --- tests/testthat/tests-sim.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index 697886ab..88116f63 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -3,7 +3,7 @@ context("Simulating from a branching process model") test_that("Chains can be simulated", { expect_length(chain_sim(n=2, "pois", lambda=0.5), 2) - expect_length(chain_sim(n=2, "pois", "length", lambda=0.5), 2) + expect_length(chain_sim(n=10, "pois", "length", lambda=0.9), 2) expect_false(any(is.finite(chain_sim(n=2, "pois", "length", lambda=0.5, infinite=1)))) }) From 2308ff3e57081f74075e0d1cdfb7937f825ded80 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 10:42:52 +0000 Subject: [PATCH 026/828] fix simulation test --- tests/testthat/tests-sim.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index 88116f63..093e5a6b 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -3,7 +3,7 @@ context("Simulating from a branching process model") test_that("Chains can be simulated", { expect_length(chain_sim(n=2, "pois", lambda=0.5), 2) - expect_length(chain_sim(n=10, "pois", "length", lambda=0.9), 2) + expect_length(chain_sim(n=10, "pois", "length", lambda=0.9), 10) expect_false(any(is.finite(chain_sim(n=2, "pois", "length", lambda=0.5, infinite=1)))) }) From 5555ffbc750cd4ef7e9e12bd9f8f57506d6e9310 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 11:16:01 +0000 Subject: [PATCH 027/828] use actual function to generate offspring instead of name --- DESCRIPTION | 1 - R/borel.r | 5 ++-- R/likelihoods.R | 57 +++++++++++++++++++++++++------------- R/simulate.r | 30 +++++++++++--------- man/chain_ll.Rd | 13 +++++---- man/chain_sim.Rd | 11 ++++++-- man/gborel_size_ll.Rd | 6 ++-- man/geom_length_ll.Rd | 3 +- man/nbinom_size_ll.Rd | 12 +++++--- man/offspring_ll.Rd | 5 ++-- man/rborel.Rd | 3 +- tests/testthat/tests-ll.r | 28 ++++++++++++------- tests/testthat/tests-sim.r | 9 +++--- vignettes/introduction.Rmd | 14 ++++++---- 14 files changed, 125 insertions(+), 72 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5020a6e7..bf73caf5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,6 @@ Version: 0.1.0 Title: Analysing chain statistics using branching process models Authors@R: c(person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre"))) Description: Provides methods to analyse and simulate the size and length of branching processes with an arbitrary offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks, as discussed in Farrington et al. (2003) . -Imports: matrixStats Suggests: testthat, knitr, diff --git a/R/borel.r b/R/borel.r index a03e34f3..9e97df5d 100644 --- a/R/borel.r +++ b/R/borel.r @@ -17,9 +17,10 @@ dborel <- function(x, mu, log=FALSE) { ##' Random numbers are generated by simulating from a Poisson branching process ##' @param n number of random variates to generate. ##' @param mu mu parameter. -##' @param infinite any number to treat as infinite; simulations will be stopped if this number is reached +##' @param infinite any number to treat as infinite; simulations will be stopped +##' if this number is reached ##' @return vector of random numbers ##' @author Sebastian Funk rborel <- function(n, mu, infinite=Inf) { - chain_sim(n, "pois", "size", infinite=infinite, lambda=mu) + chain_sim(n, stats::rpois, "size", infinite=infinite, lambda=mu) } diff --git a/R/likelihoods.R b/R/likelihoods.R index 34911a31..f84b383f 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -10,11 +10,14 @@ pois_size_ll <- function(x, lambda) (x - 1) * log(lambda) - lambda * x + (x - 2) * log(x) - lgamma(x) } -##' Likelihood of the size of chains with Negative-Binomial offspring distribution +##' Likelihood of the size of chains with Negative-Binomial offspring +##' distribution ##' ##' @param x vector of sizes -##' @param size the dispersion parameter (often called \code{k} in ecological applications) -##' @param prob probability of success (in the parameterisation with \code{prob}, see also \code{\link[stats]{NegBinomial}}) +##' @param size the dispersion parameter (often called \code{k} in ecological +##' applications) +##' @param prob probability of success (in the parameterisation with +##' \code{prob}, see also \code{\link[stats]{NegBinomial}}) ##' @param mu mean parameter ##' @return log-likelihood values ##' @author Sebastian Funk @@ -33,8 +36,10 @@ nbinom_size_ll <- function(x, size, prob, mu) ##' Likelihood of the size of chains with gamma-Borel offspring distribution ##' ##' @param x vector of sizes -##' @param size the dispersion parameter (often called \code{k} in ecological applications) -##' @param prob probability of success (in the parameterisation with \code{prob}, see also \code{\link[stats]{NegBinomial}}) +##' @param size the dispersion parameter (often called \code{k} in ecological +##' applications) +##' @param prob probability of success (in the parameterisation with +##' \code{prob}, see also \code{\link[stats]{NegBinomial}}) ##' @param mu mean parameter ##' @return log-likelihood values ##' @author Sebastian Funk @@ -44,7 +49,8 @@ gborel_size_ll <- function(x, size, prob, mu) { if (!missing(mu)) stop("'prob' and 'mu' both specified") mu <- size * (1 - prob) / prob } - lgamma(size + x - 1) - (lgamma(x + 1) + lgamma(size)) - size * log(mu / size) + + lgamma(size + x - 1) - + (lgamma(x + 1) + lgamma(size)) - size * log(mu / size) + (x - 1) * log(x) - (size + x - 1) * log(x + size / mu) } @@ -70,7 +76,8 @@ pois_length_ll <- function(x, lambda) { ##' Likelihood of the length of chains with geometric offspring distribution ##' ##' @param x vector of sizes -##' @param prob probability of the geometric distribution with mean \code{1/prob} +##' @param prob probability of the geometric distribution with mean +##' \code{1/prob} ##' @return log-likelihood values ##' @author Sebastian Funk ##' @keywords internal @@ -91,7 +98,7 @@ geom_length_ll <- function(x, prob) { ##' cumulative distribution function (ecdf). ##' @param x vector of sizes ##' @param nsim_offspring number of simulations of the offspring distribution -##' for approximation the size/length distribution +##' for approximation the size/length distribution ##' @param ... any paramaters to pass to \code{\link{chain_sim}} ##' @return log-likelihood values ##' @author Sebastian Funk @@ -119,25 +126,30 @@ offspring_ll <- function(x, offspring, stat, nsim_offspring=100, ...) { ##' @param obs_prob observation probability (assumed constant) ##' @param infinite any chains of this size/length will be treated as infinite ##' @param exclude any sizes/lengths to exclude from the likelihood calculation -##' @param nsim_obs number of simulations if the likelihood is to be approximated for imperfect observations +##' @param nsim_obs number of simulations if the likelihood is to be +##' approximated for imperfect observations ##' @param ... parameters for the offspring distribution ##' @return likelihood ##' @inheritParams chain_sim -##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll +##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll +##' geom_length_ll offspring_ll ##' @author Sebastian Funk ##' @export ##' @examples ##' chain_sizes <- c(1,1,4,7) # example of observed chain sizes -##' chain_ll(chain_sizes, "pois", "size", lambda=0.5) -chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, infinite = Inf, exclude, nsim_obs, ...) -{ +##' chain_ll(chain_sizes, rpois, "size", lambda=0.5) +chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, + infinite = Inf, exclude, nsim_obs, ...) { stat <- match.arg(stat) ## checks if (obs_prob <= 0 || obs_prob > 1) stop("'obs_prob' must be within (0,1]") if (obs_prob < 1) { - if (missing(nsim_obs)) stop("'nsim_obs' must be specified if 'obs_prob' is <1") - sampled_x <- replicate(nsim_obs, pmin(rbinom_size(length(x), x, obs_prob), infinite)) + if (missing(nsim_obs)) { + stop("'nsim_obs' must be specified if 'obs_prob' is <1") + } + sampled_x <- + replicate(nsim_obs, pmin(rbinom_size(length(x), x, obs_prob), infinite)) size_x <- unlist(sampled_x) if (!is.finite(infinite)) infinite <- max(size_x) + 1 } else { @@ -152,15 +164,22 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, infinit calc_sizes <- unique(size_x) } - ## first, get likelihood function as given by `offspring` and `stat`` + ## get random function as given by `offspring` + if (!is.function(offspring)) { + stop("object passed as 'offspring' is not a function.") + } + + ## get likelihood function as given by `offspring` and `stat`` likelihoods <- c() - ll_func <- paste(offspring, stat, "ll", sep="_") + ## get offspring distribution by stripping first letter from offspring + ## function + offspring_dist <- sub("^.", "", deparse(substitute(offspring))) + ll_func <- paste(offspring_dist, stat, "ll", sep="_") pars <- as.list(unlist(list(...))) ## converts vectors to lists ## calculate likelihoods - if (exists(ll_func)) { + if (exists(ll_func, where=asNamespace('bpmodels'), mode='function')) { func <- get(ll_func) - if (!is.function(func)) stop("'", ll_func, "' is not a function.") likelihoods[calc_sizes] <- do.call(func, c(list(x=calc_sizes), pars)) } else { likelihoods[calc_sizes] <- diff --git a/R/simulate.r b/R/simulate.r index 8e13b68a..6ce66d1f 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -1,37 +1,41 @@ ##' Simulate chains using a branching process ##' ##' @param n number of simulations to run. -##' @param offspring offspring distribution as character string, e.g. "pois" for -##' the Poisson offspring distribution. +##' @param offspring offspring distribution, given as the function used to +##' generate the number of offspring in each generation, e.g. `rpois` for +##' Poisson distributed offspring ##' @param stat statistic to calculate ("size" or "length" of chains) -##' @param infinite a size or length from which the size/length is to be considered infinite +##' @param infinite a size or length from which the size/length is to be +##' considered infinite ##' @param ... parameters of the offspring distribution ##' @return a vector of sizes/lengths ##' @author Sebastian Funk ##' @export -chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ...) { +##' @examples +##' chain_sim(n=5, rpois, "size", lambda=0.5) +chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, + ...) { stat <- match.arg(stat) ## first, get random function as given by `offspring` - random_func <- paste0("r", offspring) - if (!exists(random_func)) stop("Random sampling function '", random_func, "' does not exist.") - func <- get(random_func) - if (!is.function(func)) stop("'", random_func, "' is not a function.") + if (!is.function(offspring)) { + stop("object passed as 'offspring' is not a function.") + } ## next, simulate n chains dist <- c() for (i in seq_len(n)) { - stat_track <- 1 ## variable to track length or size (depending on `stat`) + stat_track <- 1 ## track length or size (depending on `stat`) state <- 1 while (state > 0 && state < infinite) { - offspring <- sum(func(n=state, ...)) + n_offspring <- sum(offspring(n=state, ...)) if (stat=="size") { - stat_track <- stat_track + offspring + stat_track <- stat_track + n_offspring } else if (stat=="length") { - if (offspring > 0) stat_track <- stat_track + 1 + if (n_offspring > 0) stat_track <- stat_track + 1 } - state <- offspring + state <- n_offspring } if (state >= infinite) stat_track <- Inf dist[i] <- stat_track diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd index 71f39d7e..0ed5fa56 100644 --- a/man/chain_ll.Rd +++ b/man/chain_ll.Rd @@ -10,8 +10,9 @@ chain_ll(x, offspring, stat = c("size", "length"), obs_prob = 1, \arguments{ \item{x}{vector of sizes or lengths of transmission chains} -\item{offspring}{offspring distribution as character string, e.g. "pois" for -the Poisson offspring distribution.} +\item{offspring}{offspring distribution, given as the function used to +generate the number of offspring in each generation, e.g. `rpois` for +Poisson distributed offspring} \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} @@ -21,7 +22,8 @@ the Poisson offspring distribution.} \item{exclude}{any sizes/lengths to exclude from the likelihood calculation} -\item{nsim_obs}{number of simulations if the likelihood is to be approximated for imperfect observations} +\item{nsim_obs}{number of simulations if the likelihood is to be +approximated for imperfect observations} \item{...}{parameters for the offspring distribution} } @@ -33,10 +35,11 @@ Likelihood for the outcome of a branching process } \examples{ chain_sizes <- c(1,1,4,7) # example of observed chain sizes -chain_ll(chain_sizes, "pois", "size", lambda=0.5) +chain_ll(chain_sizes, rpois, "size", lambda=0.5) } \seealso{ -pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll geom_length_ll offspring_ll +pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll + geom_length_ll offspring_ll } \author{ Sebastian Funk diff --git a/man/chain_sim.Rd b/man/chain_sim.Rd index e3d09a24..9030121b 100644 --- a/man/chain_sim.Rd +++ b/man/chain_sim.Rd @@ -10,12 +10,14 @@ chain_sim(n, offspring, stat = c("size", "length"), infinite = Inf, \arguments{ \item{n}{number of simulations to run.} -\item{offspring}{offspring distribution as character string, e.g. "pois" for -the Poisson offspring distribution.} +\item{offspring}{offspring distribution, given as the function used to +generate the number of offspring in each generation, e.g. `rpois` for +Poisson distributed offspring} \item{stat}{statistic to calculate ("size" or "length" of chains)} -\item{infinite}{a size or length from which the size/length is to be considered infinite} +\item{infinite}{a size or length from which the size/length is to be +considered infinite} \item{...}{parameters of the offspring distribution} } @@ -25,6 +27,9 @@ a vector of sizes/lengths \description{ Simulate chains using a branching process } +\examples{ +chain_sim(n=5, rpois, "size", lambda=0.5) +} \author{ Sebastian Funk } diff --git a/man/gborel_size_ll.Rd b/man/gborel_size_ll.Rd index 13ee9646..221bf270 100644 --- a/man/gborel_size_ll.Rd +++ b/man/gborel_size_ll.Rd @@ -9,9 +9,11 @@ gborel_size_ll(x, size, prob, mu) \arguments{ \item{x}{vector of sizes} -\item{size}{the dispersion parameter (often called \code{k} in ecological applications)} +\item{size}{the dispersion parameter (often called \code{k} in ecological +applications)} -\item{prob}{probability of success (in the parameterisation with \code{prob}, see also \code{\link[stats]{NegBinomial}})} +\item{prob}{probability of success (in the parameterisation with +\code{prob}, see also \code{\link[stats]{NegBinomial}})} \item{mu}{mean parameter} } diff --git a/man/geom_length_ll.Rd b/man/geom_length_ll.Rd index 98015fe7..bdc6082d 100644 --- a/man/geom_length_ll.Rd +++ b/man/geom_length_ll.Rd @@ -9,7 +9,8 @@ geom_length_ll(x, prob) \arguments{ \item{x}{vector of sizes} -\item{prob}{probability of the geometric distribution with mean \code{1/prob}} +\item{prob}{probability of the geometric distribution with mean +\code{1/prob}} } \value{ log-likelihood values diff --git a/man/nbinom_size_ll.Rd b/man/nbinom_size_ll.Rd index 974b5916..363ecd30 100644 --- a/man/nbinom_size_ll.Rd +++ b/man/nbinom_size_ll.Rd @@ -2,16 +2,19 @@ % Please edit documentation in R/likelihoods.R \name{nbinom_size_ll} \alias{nbinom_size_ll} -\title{Likelihood of the size of chains with Negative-Binomial offspring distribution} +\title{Likelihood of the size of chains with Negative-Binomial offspring +distribution} \usage{ nbinom_size_ll(x, size, prob, mu) } \arguments{ \item{x}{vector of sizes} -\item{size}{the dispersion parameter (often called \code{k} in ecological applications)} +\item{size}{the dispersion parameter (often called \code{k} in ecological +applications)} -\item{prob}{probability of success (in the parameterisation with \code{prob}, see also \code{\link[stats]{NegBinomial}})} +\item{prob}{probability of success (in the parameterisation with +\code{prob}, see also \code{\link[stats]{NegBinomial}})} \item{mu}{mean parameter} } @@ -19,7 +22,8 @@ nbinom_size_ll(x, size, prob, mu) log-likelihood values } \description{ -Likelihood of the size of chains with Negative-Binomial offspring distribution +Likelihood of the size of chains with Negative-Binomial offspring +distribution } \author{ Sebastian Funk diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index d9827f27..19d8fee4 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -9,8 +9,9 @@ offspring_ll(x, offspring, stat, nsim_offspring = 100, ...) \arguments{ \item{x}{vector of sizes} -\item{offspring}{offspring distribution as character string, e.g. "pois" for -the Poisson offspring distribution.} +\item{offspring}{offspring distribution, given as the function used to +generate the number of offspring in each generation, e.g. `rpois` for +Poisson distributed offspring} \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} diff --git a/man/rborel.Rd b/man/rborel.Rd index 8923dc65..e32484ed 100644 --- a/man/rborel.Rd +++ b/man/rborel.Rd @@ -11,7 +11,8 @@ rborel(n, mu, infinite = Inf) \item{mu}{mu parameter.} -\item{infinite}{any number to treat as infinite; simulations will be stopped if this number is reached} +\item{infinite}{any number to treat as infinite; simulations will be stopped +if this number is reached} } \value{ vector of random numbers diff --git a/tests/testthat/tests-ll.r b/tests/testthat/tests-ll.r index e12b72ce..51c67a89 100644 --- a/tests/testthat/tests-ll.r +++ b/tests/testthat/tests-ll.r @@ -4,12 +4,14 @@ chains <- c(1,1,4,7) test_that("Likelihoods can be calculated", { - expect_lt(chain_ll(chains, "pois", "size", lambda=0.5), 0) - expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, exclude=1), 0) - expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, infinite = 5), 0) - expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, obs_prob = 0.5, nsim_obs=1), 0) - expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, infinite = 5, obs_prob = 0.5, nsim_obs=1), 0) - expect_lt(chain_ll(chains, "binom", "size", size=1, prob=0.5), 0) + expect_lt(chain_ll(chains, rpois, "size", lambda=0.5), 0) + expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, exclude=1), 0) + expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, infinite = 5), 0) + expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, obs_prob = 0.5, + nsim_obs=1), 0) + expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, infinite = 5, + obs_prob = 0.5, nsim_obs=1), 0) + expect_lt(chain_ll(chains, rbinom, "size", size=1, prob=0.5), 0) }) test_that("Analytical size/length distributions are implemented", @@ -25,8 +27,14 @@ test_that("Analytical size/length distributions are implemented", test_that("Errors are thrown", { - expect_error(chain_ll(chain_sizes, "pois", "size", lambda=0.5, obs_prob = 3), "must be within") - expect_error(chain_ll(chain_sizes, "pois", "size", lambda=0.5, obs_prob = 0.5), "must be specified") - expect_error(nbinom_size_ll(chains, mu=0.5, size=0.2, prob=0.1), "both specified") - expect_error(gborel_size_ll(chains, mu=0.5, size=0.2, prob=0.1), "both specified") + expect_error(chain_ll(chains, "dummy", "size", lambda=0.5), + "not a function") + expect_error(chain_ll(chains, rpois, "size", lambda=0.5, obs_prob = 3), + "must be within") + expect_error(chain_ll(chains, rpois, "size", lambda=0.5, obs_prob = 0.5), + "must be specified") + expect_error(nbinom_size_ll(chains, mu=0.5, size=0.2, prob=0.1), + "both specified") + expect_error(gborel_size_ll(chains, mu=0.5, size=0.2, prob=0.1), + "both specified") }) diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index 093e5a6b..c543e497 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -2,12 +2,13 @@ context("Simulating from a branching process model") test_that("Chains can be simulated", { - expect_length(chain_sim(n=2, "pois", lambda=0.5), 2) - expect_length(chain_sim(n=10, "pois", "length", lambda=0.9), 10) - expect_false(any(is.finite(chain_sim(n=2, "pois", "length", lambda=0.5, infinite=1)))) + expect_length(chain_sim(n=2, rpois, lambda=0.5), 2) + expect_length(chain_sim(n=10, rpois, "length", lambda=0.9), 10) + expect_false(any(is.finite(chain_sim(n=2, rpois, "length", lambda=0.5, + infinite=1)))) }) test_that("Errors are thrown", { - expect_error(chain_sim(n=2, "dummy"), "does not exist") + expect_error(chain_sim(n=2, "dummy"), "is not a function") }) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 24abe368..07e9cada 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -33,10 +33,10 @@ At the heart of the `bpmodels` package are the `chains_ll` and `chains_sim` func ```{r} chain_sizes <- c(1,1,4,7) # example of observed chain sizes -chain_ll(chain_sizes, "pois", "size", lambda=0.5) +chain_ll(chain_sizes, rpois, "size", lambda=0.5) ``` -The first argument of `chain_ll` is the size (or length) distribution to analyse. The second argument (called `offspring`) specifies the offspring distribution. This is given as a character string that refers to the function used to generate random offspring. It can be any probability distribution implemented in R, that is, one that has a corresponding function for generating random numbers beginning with the letter `r`. In the case of the example above, since random Poisson numbers are generated in R using a function called `rpois`, "pois" is the corresponding string to pass to the `offspring` argument. +The first argument of `chain_ll` is the size (or length) distribution to analyse. The second argument (called `offspring`) specifies the offspring distribution. This is given as a the function used to generate random offspring. It can be any probability distribution implemented in R, that is, one that has a corresponding function for generating random numbers beginning with the letter `r`. In the case of the example above, since random Poisson numbers are generated in R using a function called `rpois`, this is the function to pass as the `offspring` argument. The third argument (called `stat`) determines whether to analyse chain sizes ("size", the default if this argument is not specified) or lengths ("length"). Lastly, any named arguments not recognised by `chain_ll` are interpreted as parameters of the corresponding probability distribution, here `lambda=0.5` as the mean of the Poisson distribution (see the R help page for the Poisson distribution for more information). @@ -49,19 +49,23 @@ You can use the `R` help to find out about usage of the `chains_ll` function, To simulate from a branching process, use the `chain_sim` function, which follows the same syntax as the `chain_ll` function: ```{r} -chain_sim(n=5, "pois", "size", lambda=0.5) +chain_sim(n=5, rpois, "size", lambda=0.5) ``` # Methodology -If the probability distribution of chain sizes or lengths has an analytical solution, this will be used (size distribution: Poisson and negative binomial; length distribution: Poisson and geometric). If not, simulations are used to approximate this probability distributions (using a linear approximation to the cumulative distribution for unobserved sizes/lengths), requiring an additional parameter `nsim_offspring` for the number of simulations to be used for this approximation. +If the probability distribution of chain sizes or lengths has an analytical solution, this will be used (size distribution: Poisson and negative binomial; length distribution: Poisson and geometric). If not, simulations are used to approximate this probability distributions (using a linear approximation to the cumulative distribution for unobserved sizes/lengths), requiring an additional parameter `nsim_offspring` for the number of simulations to be used for this approximation. For example, to get offspring drawn from a binomial distribution with probability `p=0.5`. + +```{r} +chain_ll(chain_sizes, rbinom, "size", size=1, prob=0.5, nsim_offspring=100) +``` # Imperfect observations The `chain_ll` function has an `obs_prob` parameter that can be used to determine the likelihood if observations are imperfect. This only works when analysing chain sizes (`stat="size"`). In that case, true chain sizes are simulated repeatedly (the number of times given by the `nsim_obs` argument) and the likelihood calculated for each of these simulations. For example, if the probability of observing each case is 30%, use ```{r} -ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda=0.5, nsim_obs=10) +ll <- chain_ll(chain_sizes, rpois, "size", obs_prob = 0.3, lambda=0.5, nsim_obs=10) summary(ll) ``` From 48f83ce782d90019da048b88af46c93ccac99316 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 11:32:26 +0000 Subject: [PATCH 028/828] update link for Appveyor badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 028c7a9e..2654ac3e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # bpmodels [![Travis-CI Build Status](https://travis-ci.org/sbfnk/bpmodels.svg?branch=master)](https://travis-ci.org/sbfnk/bpmodels) -[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/github/sbfnk)](https://ci.appveyor.com/project/sbfnk/bpmodels) +[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/y37i8x0wo9o8s2wf?svg=true)](https://ci.appveyor.com/project/sbfnk/bpmodels) [![codecov](https://codecov.io/github/sbfnk/bpmodels/branch/master/graphs/badge.svg)](https://codecov.io/github/sbfnk/bpmodels) Methods for analysing the distribution of epidemiological chain sizes and lengths From 0ae93936d50883c4f4cb150dac0716869a6874a9 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 11:36:20 +0000 Subject: [PATCH 029/828] fix typos --- R/likelihoods.R | 6 +++--- R/utils.r | 2 +- man/offspring_ll.Rd | 2 +- man/pois_length_ll.Rd | 2 +- man/pois_size_ll.Rd | 2 +- man/rbinom_size.Rd | 2 +- vignettes/introduction.Rmd | 8 ++++---- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index f84b383f..cbcc04ec 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -1,7 +1,7 @@ ##' Likelihood of the size of chains with Poisson offspring distribution ##' ##' @param x vector of sizes -##' @param lambda rate of the Poisson distributino +##' @param lambda rate of the Poisson distribution ##' @return log-likelihood values ##' @author Sebastian Funk ##' @keywords internal @@ -57,7 +57,7 @@ gborel_size_ll <- function(x, size, prob, mu) { ##' Likelihood of the length of chains with Poisson offspring distribution ##' ##' @param x vector of sizes -##' @param lambda rate of the Poisson distributino +##' @param lambda rate of the Poisson distribution ##' @return log-likelihood values ##' @author Sebastian Funk ##' @keywords internal @@ -99,7 +99,7 @@ geom_length_ll <- function(x, prob) { ##' @param x vector of sizes ##' @param nsim_offspring number of simulations of the offspring distribution ##' for approximation the size/length distribution -##' @param ... any paramaters to pass to \code{\link{chain_sim}} +##' @param ... any parameters to pass to \code{\link{chain_sim}} ##' @return log-likelihood values ##' @author Sebastian Funk ##' @inheritParams chain_ll diff --git a/R/utils.r b/R/utils.r index 86fe3f7d..a1b74e9f 100644 --- a/R/utils.r +++ b/R/utils.r @@ -12,7 +12,7 @@ complementary_logprob <- function(x) { ##' Samples size (the number of trials) of a binomial distribution ##' ##' Samples the size parameter from the binomial distribution with fixed x -##' (number of sucesses) and p (sucess probability) +##' (number of successes) and p (success probability) ##' @param n number of samples to generate ##' @param x number of successes ##' @param prob probability of success diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index 19d8fee4..7bfe36c6 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -18,7 +18,7 @@ Poisson distributed offspring} \item{nsim_offspring}{number of simulations of the offspring distribution for approximation the size/length distribution} -\item{...}{any paramaters to pass to \code{\link{chain_sim}}} +\item{...}{any parameters to pass to \code{\link{chain_sim}}} } \value{ log-likelihood values diff --git a/man/pois_length_ll.Rd b/man/pois_length_ll.Rd index 8bcf37d4..4a767a99 100644 --- a/man/pois_length_ll.Rd +++ b/man/pois_length_ll.Rd @@ -9,7 +9,7 @@ pois_length_ll(x, lambda) \arguments{ \item{x}{vector of sizes} -\item{lambda}{rate of the Poisson distributino} +\item{lambda}{rate of the Poisson distribution} } \value{ log-likelihood values diff --git a/man/pois_size_ll.Rd b/man/pois_size_ll.Rd index 19163265..931b1430 100644 --- a/man/pois_size_ll.Rd +++ b/man/pois_size_ll.Rd @@ -9,7 +9,7 @@ pois_size_ll(x, lambda) \arguments{ \item{x}{vector of sizes} -\item{lambda}{rate of the Poisson distributino} +\item{lambda}{rate of the Poisson distribution} } \value{ log-likelihood values diff --git a/man/rbinom_size.Rd b/man/rbinom_size.Rd index c50027b4..89b2e539 100644 --- a/man/rbinom_size.Rd +++ b/man/rbinom_size.Rd @@ -18,7 +18,7 @@ a sampled size } \description{ Samples the size parameter from the binomial distribution with fixed x -(number of sucesses) and p (sucess probability) +(number of successes) and p (success probability) } \author{ Sebastian Funk diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 07e9cada..8a456e68 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -17,7 +17,7 @@ knitr::opts_chunk$set( ) ``` -[bpmodels](https://github.com/sbfnk/bpmodels) is an `R` package to analyse and simulate the size and length of branching processes with an arbitrary offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks. +[bpmodels](https://github.com/sbfnk/bpmodels) is an `R` package to analyse and simulate the size and length of branching processes with a given offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks. # Usage @@ -29,7 +29,7 @@ library('bpmodels') suppressWarnings(library('bpmodels')) ``` -At the heart of the `bpmodels` package are the `chains_ll` and `chains_sim` functions. The `chains_ll` function calculates the log-likelihood of a distribution of chain sizes or lengths given an offspring distribution and associated parameters. For example, to get the log-likelihood for a given observed distribution of chain sizes assuming a mean number of 0.5 Poisson-distributed offspring per generation, use +At the heart of the package are the `chains_ll` and `chains_sim` functions. The `chains_ll` function calculates the log-likelihood of a distribution of chain sizes or lengths given an offspring distribution and associated parameters. For example, to get the log-likelihood for a given observed distribution of chain sizes assuming a mean number of 0.5 Poisson-distributed offspring per generation, use ```{r} chain_sizes <- c(1,1,4,7) # example of observed chain sizes @@ -38,7 +38,7 @@ chain_ll(chain_sizes, rpois, "size", lambda=0.5) The first argument of `chain_ll` is the size (or length) distribution to analyse. The second argument (called `offspring`) specifies the offspring distribution. This is given as a the function used to generate random offspring. It can be any probability distribution implemented in R, that is, one that has a corresponding function for generating random numbers beginning with the letter `r`. In the case of the example above, since random Poisson numbers are generated in R using a function called `rpois`, this is the function to pass as the `offspring` argument. -The third argument (called `stat`) determines whether to analyse chain sizes ("size", the default if this argument is not specified) or lengths ("length"). Lastly, any named arguments not recognised by `chain_ll` are interpreted as parameters of the corresponding probability distribution, here `lambda=0.5` as the mean of the Poisson distribution (see the R help page for the Poisson distribution for more information). +The third argument (called `stat`) determines whether to analyse chain sizes (`"size"`, the default if this argument is not specified) or lengths (`"length"`). Lastly, any named arguments not recognised by `chain_ll` are interpreted as parameters of the corresponding probability distribution, here `lambda=0.5` as the mean of the Poisson distribution (see the R help page for the Poisson distribution for more information). You can use the `R` help to find out about usage of the `chains_ll` function, @@ -54,7 +54,7 @@ chain_sim(n=5, rpois, "size", lambda=0.5) # Methodology -If the probability distribution of chain sizes or lengths has an analytical solution, this will be used (size distribution: Poisson and negative binomial; length distribution: Poisson and geometric). If not, simulations are used to approximate this probability distributions (using a linear approximation to the cumulative distribution for unobserved sizes/lengths), requiring an additional parameter `nsim_offspring` for the number of simulations to be used for this approximation. For example, to get offspring drawn from a binomial distribution with probability `p=0.5`. +If the probability distribution of chain sizes or lengths has an analytical solution, this will be used (size distribution: Poisson and negative binomial; length distribution: Poisson and geometric). If not, simulations are used to approximate this probability distributions (using a linear approximation to the cumulative distribution for unobserved sizes/lengths), requiring an additional parameter `nsim_offspring` for the number of simulations to be used for this approximation. For example, to get offspring drawn from a binomial distribution with probability `prob=0.5`. ```{r} chain_ll(chain_sizes, rbinom, "size", size=1, prob=0.5, nsim_offspring=100) From addfa091979f6195685b5932215a0c2c0c7319f6 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Wed, 16 Jan 2019 11:37:57 +0000 Subject: [PATCH 030/828] set seed in vignette --- vignettes/introduction.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 8a456e68..0a237859 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -27,6 +27,7 @@ library('bpmodels') ``` ```{r echo=FALSE} suppressWarnings(library('bpmodels')) +set.seed(13) ``` At the heart of the package are the `chains_ll` and `chains_sim` functions. The `chains_ll` function calculates the log-likelihood of a distribution of chain sizes or lengths given an offspring distribution and associated parameters. For example, to get the log-likelihood for a given observed distribution of chain sizes assuming a mean number of 0.5 Poisson-distributed offspring per generation, use From dd31949ae1af69d03cc4ef169fc1542ec5bb6a1a Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 17 Jan 2019 12:28:50 +0000 Subject: [PATCH 031/828] fix for obs_size with only one chain --- R/likelihoods.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/likelihoods.R b/R/likelihoods.R index cbcc04ec..9e60a688 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -150,6 +150,7 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, } sampled_x <- replicate(nsim_obs, pmin(rbinom_size(length(x), x, obs_prob), infinite)) + if (length(x) == 1) sampled_x <- matrix(sampled_x, nrow=1) size_x <- unlist(sampled_x) if (!is.finite(infinite)) infinite <- max(size_x) + 1 } else { From 67bc19d7158f06701e96ee55dd477927eea79a5b Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 17 Jan 2019 12:29:21 +0000 Subject: [PATCH 032/828] fix typo --- R/utils.r | 2 +- man/rbinom_size.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utils.r b/R/utils.r index a1b74e9f..a03ffdd5 100644 --- a/R/utils.r +++ b/R/utils.r @@ -16,7 +16,7 @@ complementary_logprob <- function(x) { ##' @param n number of samples to generate ##' @param x number of successes ##' @param prob probability of success -##' @return a sampled size +##' @return sampled sizes ##' @author Sebastian Funk ##' @keywords internal rbinom_size <- function(n, x, prob) { diff --git a/man/rbinom_size.Rd b/man/rbinom_size.Rd index 89b2e539..5e19360d 100644 --- a/man/rbinom_size.Rd +++ b/man/rbinom_size.Rd @@ -14,7 +14,7 @@ rbinom_size(n, x, prob) \item{prob}{probability of success} } \value{ -a sampled size +sampled sizes } \description{ Samples the size parameter from the binomial distribution with fixed x From 43d5c883666a35b51330381c006f524b235a0778 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 17 Jan 2019 12:29:34 +0000 Subject: [PATCH 033/828] imperfect observation for chain lengths --- R/likelihoods.R | 9 +++++++-- R/utils.r | 15 +++++++++++++++ man/rgen_length.Rd | 27 +++++++++++++++++++++++++++ tests/testthat/tests-ll.r | 2 ++ vignettes/introduction.Rmd | 2 +- 5 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 man/rgen_length.Rd diff --git a/R/likelihoods.R b/R/likelihoods.R index 9e60a688..5da0966c 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -148,8 +148,13 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, if (missing(nsim_obs)) { stop("'nsim_obs' must be specified if 'obs_prob' is <1") } + if (stat=="size") { + sample_func <- rbinom_size + } else if (stat=="length"){ + sample_func <- rgen_length + } sampled_x <- - replicate(nsim_obs, pmin(rbinom_size(length(x), x, obs_prob), infinite)) + replicate(nsim_obs, pmin(sample_func(length(x), x, obs_prob), infinite)) if (length(x) == 1) sampled_x <- matrix(sampled_x, nrow=1) size_x <- unlist(sampled_x) if (!is.finite(infinite)) infinite <- max(size_x) + 1 @@ -173,7 +178,7 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, ## get likelihood function as given by `offspring` and `stat`` likelihoods <- c() ## get offspring distribution by stripping first letter from offspring - ## function + ## function offspring_dist <- sub("^.", "", deparse(substitute(offspring))) ll_func <- paste(offspring_dist, stat, "ll", sep="_") pars <- as.list(unlist(list(...))) ## converts vectors to lists diff --git a/R/utils.r b/R/utils.r index a03ffdd5..233e1b9d 100644 --- a/R/utils.r +++ b/R/utils.r @@ -22,3 +22,18 @@ complementary_logprob <- function(x) { rbinom_size <- function(n, x, prob) { x + stats::rnbinom(n, x, prob) + stats::rnbinom(n, 1, prob) } + +##' Samples chain lengths with given observation probabilities +##' +##' Samples the length of a transmission chain where each individual element is +##' observed with binomial probability +##' (number of successes) and p (success probability) +##' @param n number of samples to generate +##' @param x observed chain lengths +##' @param prob probability of observation +##' @return sampled lengths +##' @author Sebastian Funk +##' @keywords internal +rgen_length <- function(n, x, prob) { + x + ceiling(log(stats::runif(n, 0, 1)) / log(1 - prob) - 1) +} diff --git a/man/rgen_length.Rd b/man/rgen_length.Rd new file mode 100644 index 00000000..14ebbb17 --- /dev/null +++ b/man/rgen_length.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.r +\name{rgen_length} +\alias{rgen_length} +\title{Samples chain lengths with given observation probabilities} +\usage{ +rgen_length(n, x, prob) +} +\arguments{ +\item{n}{number of samples to generate} + +\item{x}{observed chain lengths} + +\item{prob}{probability of observation} +} +\value{ +sampled lengths +} +\description{ +Samples the length of a transmission chain where each individual element is +observed with binomial probability +(number of successes) and p (success probability) +} +\author{ +Sebastian Funk +} +\keyword{internal} diff --git a/tests/testthat/tests-ll.r b/tests/testthat/tests-ll.r index 51c67a89..65d10719 100644 --- a/tests/testthat/tests-ll.r +++ b/tests/testthat/tests-ll.r @@ -9,6 +9,8 @@ test_that("Likelihoods can be calculated", expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, infinite = 5), 0) expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, obs_prob = 0.5, nsim_obs=1), 0) + expect_lt(chain_ll(chains, rpois, "length", lambda=0.5, obs_prob = 0.5, + nsim_obs=1), 0) expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, infinite = 5, obs_prob = 0.5, nsim_obs=1), 0) expect_lt(chain_ll(chains, rbinom, "size", size=1, prob=0.5), 0) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 0a237859..a56b810d 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -63,7 +63,7 @@ chain_ll(chain_sizes, rbinom, "size", size=1, prob=0.5, nsim_offspring=100) # Imperfect observations -The `chain_ll` function has an `obs_prob` parameter that can be used to determine the likelihood if observations are imperfect. This only works when analysing chain sizes (`stat="size"`). In that case, true chain sizes are simulated repeatedly (the number of times given by the `nsim_obs` argument) and the likelihood calculated for each of these simulations. For example, if the probability of observing each case is 30%, use +The `chain_ll` function has an `obs_prob` parameter that can be used to determine the likelihood if observations are imperfect. In that case, true chain sizes or lengths are simulated repeatedly (the number of times given by the `nsim_obs` argument) and the likelihood calculated for each of these simulations. For example, if the probability of observing each case is 30%, use ```{r} ll <- chain_ll(chain_sizes, rpois, "size", obs_prob = 0.3, lambda=0.5, nsim_obs=10) From 42d742a691e9e3fdf49e8931f8863e0f0b337fa7 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 18 Jan 2019 07:53:01 +0000 Subject: [PATCH 034/828] simpler negative binomial sampling --- R/utils.r | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/utils.r b/R/utils.r index 233e1b9d..3dd3cc87 100644 --- a/R/utils.r +++ b/R/utils.r @@ -20,7 +20,7 @@ complementary_logprob <- function(x) { ##' @author Sebastian Funk ##' @keywords internal rbinom_size <- function(n, x, prob) { - x + stats::rnbinom(n, x, prob) + stats::rnbinom(n, 1, prob) + x + stats::rnbinom(n, x + 1, prob) - 1 } ##' Samples chain lengths with given observation probabilities @@ -35,5 +35,7 @@ rbinom_size <- function(n, x, prob) { ##' @author Sebastian Funk ##' @keywords internal rgen_length <- function(n, x, prob) { - x + ceiling(log(stats::runif(n, 0, 1)) / log(1 - prob) - 1) + x + + ceiling(log(stats::runif(n, 0, 1)) / log(1 - prob) - 1) + + ceiling(log(stats::runif(n, 0, 1)) / log(1 - prob) - 1) } From 444b4dc7659a8d57dccb342cabb01fa05dca94fa Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 18 Jan 2019 07:54:16 +0000 Subject: [PATCH 035/828] sample missed generations at the beginning and end of chains --- R/utils.r | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utils.r b/R/utils.r index 3dd3cc87..b5efaebc 100644 --- a/R/utils.r +++ b/R/utils.r @@ -26,8 +26,8 @@ rbinom_size <- function(n, x, prob) { ##' Samples chain lengths with given observation probabilities ##' ##' Samples the length of a transmission chain where each individual element is -##' observed with binomial probability -##' (number of successes) and p (success probability) +##' observed with binomial probability (number of successes) and p (success +##' probability) ##' @param n number of samples to generate ##' @param x observed chain lengths ##' @param prob probability of observation From b8d4145b8709b37a12a9a24e94a2475ed9ffa89a Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 18 Jan 2019 08:09:47 +0000 Subject: [PATCH 036/828] rbinom_size fix (no need to subtract 1) --- R/utils.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.r b/R/utils.r index b5efaebc..015b091b 100644 --- a/R/utils.r +++ b/R/utils.r @@ -20,7 +20,7 @@ complementary_logprob <- function(x) { ##' @author Sebastian Funk ##' @keywords internal rbinom_size <- function(n, x, prob) { - x + stats::rnbinom(n, x + 1, prob) - 1 + x + stats::rnbinom(n, x + 1, prob) } ##' Samples chain lengths with given observation probabilities From 773048931d8d67f69eeb0ccf5c6b5db20db41bf5 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 22 Jan 2019 11:40:59 +0000 Subject: [PATCH 037/828] throw error for non-integer offspring distributions --- R/simulate.r | 3 +++ tests/testthat/tests-sim.r | 1 + 2 files changed, 4 insertions(+) diff --git a/R/simulate.r b/R/simulate.r index 6ce66d1f..f5ede015 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -30,6 +30,9 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, state <- 1 while (state > 0 && state < infinite) { n_offspring <- sum(offspring(n=state, ...)) + if (n_offspring %% 1 > 0) { + stop("Offspring distribution must return integers") + } if (stat=="size") { stat_track <- stat_track + n_offspring } else if (stat=="length") { diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index c543e497..98eac794 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -11,4 +11,5 @@ test_that("Chains can be simulated", test_that("Errors are thrown", { expect_error(chain_sim(n=2, "dummy"), "is not a function") + expect_error(chain_sim(n=2, rlnorm, meanlog=log(1.6)), "integer") }) From bf2d11825b1f20b51244f2a264f209f5a0b6d26d Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 7 Feb 2019 12:40:46 +0900 Subject: [PATCH 038/828] pre-allocate dist Growing a vector in R tends to be slow. This process will run faster due to the pre-allocation: ```r Unit: relative expr min lq mean median uq max neval cld { slow <- c() for (i in seq(1e+06)) slow[i] <- 1 } 5.156572 5.071423 5.084367 5.023986 5.271332 3.261024 100 b { fast <- integer(1e+06) for (i in seq(1e+06)) fast[i] <- 1 } 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 100 a ``` --- R/simulate.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/simulate.r b/R/simulate.r index f5ede015..5237efc9 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -24,7 +24,7 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, } ## next, simulate n chains - dist <- c() + dist <- integer(n) for (i in seq_len(n)) { stat_track <- 1 ## track length or size (depending on `stat`) state <- 1 From f65e40f11d739480818898029baaee186b107bfc Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 7 Feb 2019 08:14:22 +0000 Subject: [PATCH 039/828] vectorise chain simulations --- R/simulate.r | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/R/simulate.r b/R/simulate.r index 5237efc9..4be6f8d6 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -23,27 +23,37 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, stop("object passed as 'offspring' is not a function.") } + stat_track <- rep(1, n) ## track length or size (depending on `stat`) + n_offspring <- rep(1, n) ## current number of offspring + sim <- seq_len(n) ## track chains that are still being simulated + ## next, simulate n chains - dist <- integer(n) - for (i in seq_len(n)) { - stat_track <- 1 ## track length or size (depending on `stat`) - state <- 1 - while (state > 0 && state < infinite) { - n_offspring <- sum(offspring(n=state, ...)) - if (n_offspring %% 1 > 0) { - stop("Offspring distribution must return integers") - } - if (stat=="size") { - stat_track <- stat_track + n_offspring - } else if (stat=="length") { - if (n_offspring > 0) stat_track <- stat_track + 1 - } - state <- n_offspring + while (length(sim) > 0) { + ## simulate next generation + next_gen <- offspring(n=sum(n_offspring[sim]), ...) + if (any(next_gen %% 1 > 0)) { + stop("Offspring distribution must return integers") + } + ## record indices corresponding the number of offspring of last + ## iteration, for the tapply call below + indices <- rep(sim, n_offspring[sim]) + ## initialise number of offspring + n_offspring <- rep(0, n) + ## assign offspring sum to indices still being simulated + n_offspring[sim] <- tapply(next_gen, indices, sum) + ## track size/length + if (stat=="size") { + stat_track <- stat_track + n_offspring + } else if (stat=="length") { + stat_track <- stat_track + pmin(1, n_offspring) } - if (state >= infinite) stat_track <- Inf - dist[i] <- stat_track + ## only continue to simulate chains that offspring and aren't of + ## infinite size/length + sim <- which(n_offspring > 0 & stat_track < infinite) } - return(dist) + stat_track[stat_track >= infinite] <- Inf + + return(stat_track) } From e22374d44a0ac007e76e1d4169118a0cc32d92e4 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 7 Feb 2019 08:18:58 +0000 Subject: [PATCH 040/828] documentation update --- R/utils.r | 4 ++-- man/rgen_length.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/utils.r b/R/utils.r index 015b091b..f5d85fce 100644 --- a/R/utils.r +++ b/R/utils.r @@ -26,8 +26,8 @@ rbinom_size <- function(n, x, prob) { ##' Samples chain lengths with given observation probabilities ##' ##' Samples the length of a transmission chain where each individual element is -##' observed with binomial probability (number of successes) and p (success -##' probability) +##' observed with binomial probability with parameters n (number of successes) +##' and p (success probability) ##' @param n number of samples to generate ##' @param x observed chain lengths ##' @param prob probability of observation diff --git a/man/rgen_length.Rd b/man/rgen_length.Rd index 14ebbb17..21a6359e 100644 --- a/man/rgen_length.Rd +++ b/man/rgen_length.Rd @@ -18,8 +18,8 @@ sampled lengths } \description{ Samples the length of a transmission chain where each individual element is -observed with binomial probability -(number of successes) and p (success probability) +observed with binomial probability with parameters n (number of successes) +and p (success probability) } \author{ Sebastian Funk From f0480f0285650b8d3035ca9c5a3776926c87b2a6 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 7 Feb 2019 08:25:13 +0000 Subject: [PATCH 041/828] update DESCRIPTION and NEWS --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bf73caf5..c8f4b3c1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: bpmodels Version: 0.1.0 Title: Analysing chain statistics using branching process models -Authors@R: c(person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre"))) +Authors@R: c(person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre")), person("Zhian N.", "Kamvar", email = "zkamvar@gmail.com", role = c("ctb"))) Description: Provides methods to analyse and simulate the size and length of branching processes with an arbitrary offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks, as discussed in Farrington et al. (2003) . Suggests: testthat, diff --git a/NEWS.md b/NEWS.md index 1d3b8bbd..0e74623f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# bpmodels 0.1.9999 + +* faster, vectorised chain simulations + # bpmodels 0.1.0 * initial release From 466bcbd48f0d3641e74e1fab78b79e4722a6ec82 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 7 Feb 2019 10:36:49 +0000 Subject: [PATCH 042/828] simulate trees --- R/simulate.r | 50 ++++++++++++++++++++++++++++++++------ tests/testthat/tests-sim.r | 2 ++ 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/R/simulate.r b/R/simulate.r index 4be6f8d6..5709ac26 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -7,14 +7,18 @@ ##' @param stat statistic to calculate ("size" or "length" of chains) ##' @param infinite a size or length from which the size/length is to be ##' considered infinite +##' @param tree return the tree of infectors ##' @param ... parameters of the offspring distribution -##' @return a vector of sizes/lengths +##' @return a vector of sizes/lengths (if \code{tree==FALSE}), or a data frame +##' with columns `n` (simulation ID), `id` (a unique ID within each +##' simulation for each individual element of the chain), `ancestor` (the ID +##' of the ancestor of each element) and `generation`. ##' @author Sebastian Funk ##' @export ##' @examples ##' chain_sim(n=5, rpois, "size", lambda=0.5) chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, - ...) { + tree=FALSE, ...) { stat <- match.arg(stat) @@ -27,6 +31,17 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, n_offspring <- rep(1, n) ## current number of offspring sim <- seq_len(n) ## track chains that are still being simulated + ## initialise data frame to hold the trees + if (tree) { + generation <- 1L + tdf <- + data.frame(n=seq_len(n), + id=1L, + ancestor=NA_integer_, + generation=generation) + ancestor_ids <- rep(1, n) + } + ## next, simulate n chains while (length(sim) > 0) { ## simulate next generation @@ -34,26 +49,47 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, if (any(next_gen %% 1 > 0)) { stop("Offspring distribution must return integers") } - ## record indices corresponding the number of offspring of last - ## iteration, for the tapply call below + + ## record indices corresponding the number of offspring indices <- rep(sim, n_offspring[sim]) + ## initialise number of offspring n_offspring <- rep(0, n) ## assign offspring sum to indices still being simulated n_offspring[sim] <- tapply(next_gen, indices, sum) + ## track size/length if (stat=="size") { stat_track <- stat_track + n_offspring } else if (stat=="length") { stat_track <- stat_track + pmin(1, n_offspring) } + + ## record ancestors (if tree==TRUE) + if (tree && sum(n_offspring[sim]) > 0) { + ancestors <- rep(ancestor_ids, next_gen) + ids <- ancestors + unlist(lapply(n_offspring[sim], seq_len)) + generation <- generation + 1L + ## record indices corresponding the number of offspring + new_df <- + data.frame(n=rep(sim, n_offspring[sim]), + id=ids, + ancestor=ancestors, + generation=generation) + tdf <- rbind(tdf, new_df) + } + ## only continue to simulate chains that offspring and aren't of ## infinite size/length sim <- which(n_offspring > 0 & stat_track < infinite) + if (tree) ancestor_ids <- unlist(lapply(n_offspring[sim], seq_len)) } - stat_track[stat_track >= infinite] <- Inf - - return(stat_track) + if (tree) { + return(tdf) + } else { + stat_track[stat_track >= infinite] <- Inf + return(stat_track) + } } diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index 98eac794..240b743d 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -4,6 +4,8 @@ test_that("Chains can be simulated", { expect_length(chain_sim(n=2, rpois, lambda=0.5), 2) expect_length(chain_sim(n=10, rpois, "length", lambda=0.9), 10) + expect_true(is.data.frame(chain_sim(n=10, rpois, lambda=2, tree=TRUE, + infinite=10))) expect_false(any(is.finite(chain_sim(n=2, rpois, "length", lambda=0.5, infinite=1)))) }) From b0f022515271fd38cfd90cbf47314700619a8123 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 7 Feb 2019 12:43:43 +0000 Subject: [PATCH 043/828] update `chain_sim` documentation --- man/chain_sim.Rd | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/man/chain_sim.Rd b/man/chain_sim.Rd index 9030121b..c7c2de75 100644 --- a/man/chain_sim.Rd +++ b/man/chain_sim.Rd @@ -5,7 +5,7 @@ \title{Simulate chains using a branching process} \usage{ chain_sim(n, offspring, stat = c("size", "length"), infinite = Inf, - ...) + tree = FALSE, ...) } \arguments{ \item{n}{number of simulations to run.} @@ -19,10 +19,15 @@ Poisson distributed offspring} \item{infinite}{a size or length from which the size/length is to be considered infinite} +\item{tree}{return the tree of infectors} + \item{...}{parameters of the offspring distribution} } \value{ -a vector of sizes/lengths +a vector of sizes/lengths (if \code{tree==FALSE}), or a data frame + with columns `n` (simulation ID), `id` (a unique ID within each + simulation for each individual element of the chain), `ancestor` (the ID + of the ancestor of each element) and `generation`. } \description{ Simulate chains using a branching process From bd05e51aef62ae4a7ade3f58b27050dd555dea0f Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 7 Feb 2019 13:23:22 +0000 Subject: [PATCH 044/828] tree simulations: fix IDs --- R/simulate.r | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/simulate.r b/R/simulate.r index 5709ac26..fdffe01a 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -40,6 +40,7 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ancestor=NA_integer_, generation=generation) ancestor_ids <- rep(1, n) + current_max_id <- rep(1, n) } ## next, simulate n chains @@ -68,11 +69,14 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ## record ancestors (if tree==TRUE) if (tree && sum(n_offspring[sim]) > 0) { ancestors <- rep(ancestor_ids, next_gen) - ids <- ancestors + unlist(lapply(n_offspring[sim], seq_len)) + current_max_id <- unname(tapply(ancestor_ids, indices, max)) + indices <- rep(sim, n_offspring[sim]) + ids <- rep(current_max_id, n_offspring[sim]) + + unlist(lapply(n_offspring[sim], seq_len)) generation <- generation + 1L ## record indices corresponding the number of offspring new_df <- - data.frame(n=rep(sim, n_offspring[sim]), + data.frame(n=indices, id=ids, ancestor=ancestors, generation=generation) @@ -82,7 +86,7 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ## only continue to simulate chains that offspring and aren't of ## infinite size/length sim <- which(n_offspring > 0 & stat_track < infinite) - if (tree) ancestor_ids <- unlist(lapply(n_offspring[sim], seq_len)) + if (tree) ancestor_ids <- ids[indices %in% sim] } if (tree) { From 16dfadf691b171a090f84980d4139b8caa1ceec2 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 7 Mar 2019 13:31:11 +0000 Subject: [PATCH 045/828] improved handling of excluded sizes --- R/likelihoods.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index 5da0966c..bd908666 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -167,7 +167,7 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, if (any(size_x == infinite)) { calc_sizes <- seq_len(infinite-1) } else { - calc_sizes <- unique(size_x) + calc_sizes <- unique(c(size_x, exclude)) } ## get random function as given by `offspring` From 09ef8cbec7aeeb81287a91c9a9e6efb9d2c82eb9 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Thu, 7 Mar 2019 13:31:31 +0000 Subject: [PATCH 046/828] get the correct function name, even in optim etc. --- R/likelihoods.R | 4 ++-- R/utils.r | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index bd908666..758b7626 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -139,7 +139,7 @@ offspring_ll <- function(x, offspring, stat, nsim_offspring=100, ...) { ##' chain_sizes <- c(1,1,4,7) # example of observed chain sizes ##' chain_ll(chain_sizes, rpois, "size", lambda=0.5) chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, - infinite = Inf, exclude, nsim_obs, ...) { + infinite = Inf, exclude=c(), nsim_obs, ...) { stat <- match.arg(stat) ## checks @@ -179,7 +179,7 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, likelihoods <- c() ## get offspring distribution by stripping first letter from offspring ## function - offspring_dist <- sub("^.", "", deparse(substitute(offspring))) + offspring_dist <- sub("^.", "", find_function_name(offspring)) ll_func <- paste(offspring_dist, stat, "ll", sep="_") pars <- as.list(unlist(list(...))) ## converts vectors to lists diff --git a/R/utils.r b/R/utils.r index f5d85fce..9f912db2 100644 --- a/R/utils.r +++ b/R/utils.r @@ -39,3 +39,21 @@ rgen_length <- function(n, x, prob) { ceiling(log(stats::runif(n, 0, 1)) / log(1 - prob) - 1) + ceiling(log(stats::runif(n, 0, 1)) / log(1 - prob) - 1) } + +##' Finds the name of a function passed as an argument +##' +##' This works even when a function is passed multiple times (e.g., when used +##' inside an \code{\link{optim}} call). +##' See https://stackoverflow.com/a/46740314/10886760 +##' @param fun function of which the name is to be determined +##' @return function name +##' @author Sebastian Funk +##' @keywords internal +find_function_name <- function(fun) { + objects <- ls(envir = environment(fun)) + for (i in objects) { + if (identical(fun, get(i, envir = environment(fun)))) { + return(i) + } + } +} From d512afbe8555eb44cc52fd6125ce2c91bcccc96b Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Sat, 17 Aug 2019 17:46:20 +0100 Subject: [PATCH 047/828] exclude sizes in likelihood as desired --- R/likelihoods.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index 758b7626..88064f11 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -207,10 +207,10 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, ## adjust for binomial observation probabilities if (obs_prob < 1) { chains_likelihood <- apply(sampled_x, 2, function(sx) { - sum(likelihoods[sx]) + sum(likelihoods[sx[!(sx %in% exclude)]]) }) } else { - chains_likelihood <- sum(likelihoods[x]) + chains_likelihood <- sum(likelihoods[x[!(x %in% exclude)]]) } return(chains_likelihood) From 3617f47fe57b9aa551af8fe1314ca0ae7027237a Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 19 Aug 2019 14:31:33 +0100 Subject: [PATCH 048/828] Likelihood: allow passing string for offspring This is useful if the random-number-generating function does not exist, but a closed form does --- R/likelihoods.R | 24 +++++++++++++++++++----- man/chain_ll.Rd | 6 ++---- man/find_function_name.Rd | 23 +++++++++++++++++++++++ man/offspring_ll.Rd | 4 +--- tests/testthat/tests-ll.r | 5 +++++ 5 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 man/find_function_name.Rd diff --git a/R/likelihoods.R b/R/likelihoods.R index 88064f11..f61c5e97 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -122,6 +122,7 @@ offspring_ll <- function(x, offspring, stat, nsim_offspring=100, ...) { ##' Likelihood for the outcome of a branching process ##' ##' @param x vector of sizes or lengths of transmission chains +##' @param offspring offspring distribution: either a function (e.g., \code{rpois} for Poisson) or a character string (e.g., "pois" for Poisson) ##' @param stat statistic given as \code{x} ("size" or "length" of chains) ##' @param obs_prob observation probability (assumed constant) ##' @param infinite any chains of this size/length will be treated as infinite @@ -143,6 +144,9 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, stat <- match.arg(stat) ## checks + if (!is.function(offspring) && !is.character(offspring)) { + stop("object passed as 'offspring' is not a function or character string.") + } if (obs_prob <= 0 || obs_prob > 1) stop("'obs_prob' must be within (0,1]") if (obs_prob < 1) { if (missing(nsim_obs)) { @@ -171,15 +175,16 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, } ## get random function as given by `offspring` - if (!is.function(offspring)) { - stop("object passed as 'offspring' is not a function.") + if (is.character(offspring)) { + offspring_dist <- offspring + } else { + ## get offspring distribution by stripping first letter from offspring + ## function + offspring_dist <- sub("^.", "", find_function_name(offspring)) } ## get likelihood function as given by `offspring` and `stat`` likelihoods <- c() - ## get offspring distribution by stripping first letter from offspring - ## function - offspring_dist <- sub("^.", "", find_function_name(offspring)) ll_func <- paste(offspring_dist, stat, "ll", sep="_") pars <- as.list(unlist(list(...))) ## converts vectors to lists @@ -188,6 +193,15 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, func <- get(ll_func) likelihoods[calc_sizes] <- do.call(func, c(list(x=calc_sizes), pars)) } else { + if (is.character(offspring)) { + roffspring_name <- paste0("r", offspring) + if (exists(roffspring_name)) { + offspring <- get(roffspring_name) + if (!is.function(offspring)) stop(roffspring_name, " is not a function.") + } else { + stop("Function ", roffspring_name, " does not exist.") + } + } likelihoods[calc_sizes] <- do.call(offspring_ll, c(list(x=calc_sizes, offspring=offspring, diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd index 0ed5fa56..a1619dc8 100644 --- a/man/chain_ll.Rd +++ b/man/chain_ll.Rd @@ -5,14 +5,12 @@ \title{Likelihood for the outcome of a branching process} \usage{ chain_ll(x, offspring, stat = c("size", "length"), obs_prob = 1, - infinite = Inf, exclude, nsim_obs, ...) + infinite = Inf, exclude = c(), nsim_obs, ...) } \arguments{ \item{x}{vector of sizes or lengths of transmission chains} -\item{offspring}{offspring distribution, given as the function used to -generate the number of offspring in each generation, e.g. `rpois` for -Poisson distributed offspring} +\item{offspring}{offspring distribution: either a function (e.g., \code{rpois} for Poisson) or a character string (e.g., "pois" for Poisson)} \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} diff --git a/man/find_function_name.Rd b/man/find_function_name.Rd new file mode 100644 index 00000000..d330baed --- /dev/null +++ b/man/find_function_name.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.r +\name{find_function_name} +\alias{find_function_name} +\title{Finds the name of a function passed as an argument} +\usage{ +find_function_name(fun) +} +\arguments{ +\item{fun}{function of which the name is to be determined} +} +\value{ +function name +} +\description{ +This works even when a function is passed multiple times (e.g., when used +inside an \code{\link{optim}} call). +See https://stackoverflow.com/a/46740314/10886760 +} +\author{ +Sebastian Funk +} +\keyword{internal} diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index 7bfe36c6..cc55a913 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -9,9 +9,7 @@ offspring_ll(x, offspring, stat, nsim_offspring = 100, ...) \arguments{ \item{x}{vector of sizes} -\item{offspring}{offspring distribution, given as the function used to -generate the number of offspring in each generation, e.g. `rpois` for -Poisson distributed offspring} +\item{offspring}{offspring distribution: either a function (e.g., \code{rpois} for Poisson) or a character string (e.g., "pois" for Poisson)} \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} diff --git a/tests/testthat/tests-ll.r b/tests/testthat/tests-ll.r index 65d10719..cff7b997 100644 --- a/tests/testthat/tests-ll.r +++ b/tests/testthat/tests-ll.r @@ -1,6 +1,7 @@ context("Calculating the likelihood from a branching process model") chains <- c(1,1,4,7) +rtest <- "test" test_that("Likelihoods can be calculated", { @@ -30,6 +31,10 @@ test_that("Analytical size/length distributions are implemented", test_that("Errors are thrown", { expect_error(chain_ll(chains, "dummy", "size", lambda=0.5), + "does not exist") + expect_error(chain_ll(chains, list(), "size", lambda=0.5), + "not a function or") + expect_error(chain_ll(chains, "test", "size", lambda=0.5), "not a function") expect_error(chain_ll(chains, rpois, "size", lambda=0.5, obs_prob = 3), "must be within") From 4311dd222df3498b1de9f3fa10ba4938485271e6 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Mon, 19 Aug 2019 17:39:19 +0100 Subject: [PATCH 049/828] likelihoods: don't allow passing offspring functions This is really, really slow --- R/borel.r | 2 +- R/likelihoods.R | 52 +++++++++++++------------------------- R/simulate.r | 19 +++++++++----- man/chain_ll.Rd | 12 ++++++--- man/chain_sim.Rd | 8 +++--- man/offspring_ll.Rd | 4 ++- tests/testthat/tests-ll.r | 25 ++++++++---------- tests/testthat/tests-sim.r | 12 ++++----- vignettes/introduction.Rmd | 10 ++++---- 9 files changed, 67 insertions(+), 77 deletions(-) diff --git a/R/borel.r b/R/borel.r index 9e97df5d..56dc4331 100644 --- a/R/borel.r +++ b/R/borel.r @@ -22,5 +22,5 @@ dborel <- function(x, mu, log=FALSE) { ##' @return vector of random numbers ##' @author Sebastian Funk rborel <- function(n, mu, infinite=Inf) { - chain_sim(n, stats::rpois, "size", infinite=infinite, lambda=mu) + chain_sim(n, "pois", "size", infinite=infinite, lambda=mu) } diff --git a/R/likelihoods.R b/R/likelihoods.R index f61c5e97..bdebd49a 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -122,15 +122,15 @@ offspring_ll <- function(x, offspring, stat, nsim_offspring=100, ...) { ##' Likelihood for the outcome of a branching process ##' ##' @param x vector of sizes or lengths of transmission chains -##' @param offspring offspring distribution: either a function (e.g., \code{rpois} for Poisson) or a character string (e.g., "pois" for Poisson) ##' @param stat statistic given as \code{x} ("size" or "length" of chains) ##' @param obs_prob observation probability (assumed constant) ##' @param infinite any chains of this size/length will be treated as infinite ##' @param exclude any sizes/lengths to exclude from the likelihood calculation +##' @param individual if TRUE, a vector of individual log-likelihood contributions will be returned rather than the sum ##' @param nsim_obs number of simulations if the likelihood is to be ##' approximated for imperfect observations ##' @param ... parameters for the offspring distribution -##' @return likelihood +##' @return likelihood, or vector of likelihoods (if \code{obs_prob} < 1), or a list of individual likelihood contributions (if \code{individual=TRUE}) ##' @inheritParams chain_sim ##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll ##' geom_length_ll offspring_ll @@ -138,14 +138,14 @@ offspring_ll <- function(x, offspring, stat, nsim_offspring=100, ...) { ##' @export ##' @examples ##' chain_sizes <- c(1,1,4,7) # example of observed chain sizes -##' chain_ll(chain_sizes, rpois, "size", lambda=0.5) +##' chain_ll(chain_sizes, "pois", "size", lambda=0.5) chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, - infinite = Inf, exclude=c(), nsim_obs, ...) { + infinite = Inf, exclude=c(), individual=FALSE, nsim_obs, ...) { stat <- match.arg(stat) ## checks - if (!is.function(offspring) && !is.character(offspring)) { - stop("object passed as 'offspring' is not a function or character string.") + if (!is.character(offspring)) { + stop("object passed as 'offspring' is not a character string.") } if (obs_prob <= 0 || obs_prob > 1) stop("'obs_prob' must be within (0,1]") if (obs_prob < 1) { @@ -158,13 +158,13 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, sample_func <- rgen_length } sampled_x <- - replicate(nsim_obs, pmin(sample_func(length(x), x, obs_prob), infinite)) - if (length(x) == 1) sampled_x <- matrix(sampled_x, nrow=1) + replicate(nsim_obs, pmin(sample_func(length(x), x, obs_prob), infinite), simplify = FALSE) size_x <- unlist(sampled_x) if (!is.finite(infinite)) infinite <- max(size_x) + 1 } else { x[x >= infinite] <- infinite size_x <- x + sampled_x <- list(x) } ## determine for which sizes to calculate the likelihood (for true chain size) @@ -174,18 +174,9 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, calc_sizes <- unique(c(size_x, exclude)) } - ## get random function as given by `offspring` - if (is.character(offspring)) { - offspring_dist <- offspring - } else { - ## get offspring distribution by stripping first letter from offspring - ## function - offspring_dist <- sub("^.", "", find_function_name(offspring)) - } - ## get likelihood function as given by `offspring` and `stat`` likelihoods <- c() - ll_func <- paste(offspring_dist, stat, "ll", sep="_") + ll_func <- paste(offspring, stat, "ll", sep="_") pars <- as.list(unlist(list(...))) ## converts vectors to lists ## calculate likelihoods @@ -193,15 +184,6 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, func <- get(ll_func) likelihoods[calc_sizes] <- do.call(func, c(list(x=calc_sizes), pars)) } else { - if (is.character(offspring)) { - roffspring_name <- paste0("r", offspring) - if (exists(roffspring_name)) { - offspring <- get(roffspring_name) - if (!is.function(offspring)) stop(roffspring_name, " is not a function.") - } else { - stop("Function ", roffspring_name, " does not exist.") - } - } likelihoods[calc_sizes] <- do.call(offspring_ll, c(list(x=calc_sizes, offspring=offspring, @@ -216,17 +198,19 @@ chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, if (!missing(exclude)) { likelihoods <- likelihoods - log(-expm1(sum(likelihoods[exclude]))) likelihoods[exclude] <- -Inf - } - ## adjust for binomial observation probabilities - if (obs_prob < 1) { - chains_likelihood <- apply(sampled_x, 2, function(sx) { - sum(likelihoods[sx[!(sx %in% exclude)]]) + sampled_x <- lapply(sampled_x, function(y) { + y[!(y %in% exclude)] }) - } else { - chains_likelihood <- sum(likelihoods[x[!(x %in% exclude)]]) } + ## assign likelihoods + chains_likelihood <- lapply(sampled_x, function(sx) { + likelihoods[sx[!(sx %in% exclude)]] + }) + + if (!individual) chains_likelihood <- vapply(chains_likelihood, sum, 0) + return(chains_likelihood) } diff --git a/R/simulate.r b/R/simulate.r index fdffe01a..1d041eda 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -1,9 +1,9 @@ ##' Simulate chains using a branching process ##' ##' @param n number of simulations to run. -##' @param offspring offspring distribution, given as the function used to -##' generate the number of offspring in each generation, e.g. `rpois` for -##' Poisson distributed offspring +##' @param offspring offspring distribution: a character string corresponding to +##' the R distribution function (e.g., "pois" for Poisson, where +##' \code{\link{rpois}} is the R function to generate Poisson random numbers) ##' @param stat statistic to calculate ("size" or "length" of chains) ##' @param infinite a size or length from which the size/length is to be ##' considered infinite @@ -16,15 +16,20 @@ ##' @author Sebastian Funk ##' @export ##' @examples -##' chain_sim(n=5, rpois, "size", lambda=0.5) +##' chain_sim(n=5, "pois", "size", lambda=0.5) chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, tree=FALSE, ...) { stat <- match.arg(stat) ## first, get random function as given by `offspring` - if (!is.function(offspring)) { - stop("object passed as 'offspring' is not a function.") + if (!is.character(offspring)) { + stop("object passed as 'offspring' is not a character string.") + } + + roffspring_name <- paste0("r", offspring) + if (!(exists(roffspring_name)) || !is.function(get(roffspring_name))) { + stop("Function ", roffspring_name, " does not exist.") } stat_track <- rep(1, n) ## track length or size (depending on `stat`) @@ -46,7 +51,7 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ## next, simulate n chains while (length(sim) > 0) { ## simulate next generation - next_gen <- offspring(n=sum(n_offspring[sim]), ...) + next_gen <- get(roffspring_name)(n=sum(n_offspring[sim]), ...) if (any(next_gen %% 1 > 0)) { stop("Offspring distribution must return integers") } diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd index a1619dc8..82276f74 100644 --- a/man/chain_ll.Rd +++ b/man/chain_ll.Rd @@ -5,12 +5,14 @@ \title{Likelihood for the outcome of a branching process} \usage{ chain_ll(x, offspring, stat = c("size", "length"), obs_prob = 1, - infinite = Inf, exclude = c(), nsim_obs, ...) + infinite = Inf, exclude = c(), individual = FALSE, nsim_obs, ...) } \arguments{ \item{x}{vector of sizes or lengths of transmission chains} -\item{offspring}{offspring distribution: either a function (e.g., \code{rpois} for Poisson) or a character string (e.g., "pois" for Poisson)} +\item{offspring}{offspring distribution: a character string corresponding to +the R distribution function (e.g., "pois" for Poisson, where +\code{\link{rpois}} is the R function to generate Poisson random numbers)} \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} @@ -20,20 +22,22 @@ chain_ll(x, offspring, stat = c("size", "length"), obs_prob = 1, \item{exclude}{any sizes/lengths to exclude from the likelihood calculation} +\item{individual}{if TRUE, a vector of individual log-likelihood contributions will be returned rather than the sum} + \item{nsim_obs}{number of simulations if the likelihood is to be approximated for imperfect observations} \item{...}{parameters for the offspring distribution} } \value{ -likelihood +likelihood, or vector of likelihoods (if \code{obs_prob} < 1), or a list of individual likelihood contributions (if \code{individual=TRUE}) } \description{ Likelihood for the outcome of a branching process } \examples{ chain_sizes <- c(1,1,4,7) # example of observed chain sizes -chain_ll(chain_sizes, rpois, "size", lambda=0.5) +chain_ll(chain_sizes, "pois", "size", lambda=0.5) } \seealso{ pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll diff --git a/man/chain_sim.Rd b/man/chain_sim.Rd index c7c2de75..e1a546d5 100644 --- a/man/chain_sim.Rd +++ b/man/chain_sim.Rd @@ -10,9 +10,9 @@ chain_sim(n, offspring, stat = c("size", "length"), infinite = Inf, \arguments{ \item{n}{number of simulations to run.} -\item{offspring}{offspring distribution, given as the function used to -generate the number of offspring in each generation, e.g. `rpois` for -Poisson distributed offspring} +\item{offspring}{offspring distribution: a character string corresponding to +the R distribution function (e.g., "pois" for Poisson, where +\code{\link{rpois}} is the R function to generate Poisson random numbers)} \item{stat}{statistic to calculate ("size" or "length" of chains)} @@ -33,7 +33,7 @@ a vector of sizes/lengths (if \code{tree==FALSE}), or a data frame Simulate chains using a branching process } \examples{ -chain_sim(n=5, rpois, "size", lambda=0.5) +chain_sim(n=5, "pois", "size", lambda=0.5) } \author{ Sebastian Funk diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index cc55a913..260f36cd 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -9,7 +9,9 @@ offspring_ll(x, offspring, stat, nsim_offspring = 100, ...) \arguments{ \item{x}{vector of sizes} -\item{offspring}{offspring distribution: either a function (e.g., \code{rpois} for Poisson) or a character string (e.g., "pois" for Poisson)} +\item{offspring}{offspring distribution: a character string corresponding to +the R distribution function (e.g., "pois" for Poisson, where +\code{\link{rpois}} is the R function to generate Poisson random numbers)} \item{stat}{statistic given as \code{x} ("size" or "length" of chains)} diff --git a/tests/testthat/tests-ll.r b/tests/testthat/tests-ll.r index cff7b997..7f2f638f 100644 --- a/tests/testthat/tests-ll.r +++ b/tests/testthat/tests-ll.r @@ -1,20 +1,19 @@ context("Calculating the likelihood from a branching process model") chains <- c(1,1,4,7) -rtest <- "test" test_that("Likelihoods can be calculated", { - expect_lt(chain_ll(chains, rpois, "size", lambda=0.5), 0) - expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, exclude=1), 0) - expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, infinite = 5), 0) - expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, obs_prob = 0.5, + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5), 0) + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, exclude=1), 0) + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, infinite = 5), 0) + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, obs_prob = 0.5, nsim_obs=1), 0) - expect_lt(chain_ll(chains, rpois, "length", lambda=0.5, obs_prob = 0.5, + expect_lt(chain_ll(chains, "pois", "length", lambda=0.5, obs_prob = 0.5, nsim_obs=1), 0) - expect_lt(chain_ll(chains, rpois, "size", lambda=0.5, infinite = 5, + expect_lt(chain_ll(chains, "pois", "size", lambda=0.5, infinite = 5, obs_prob = 0.5, nsim_obs=1), 0) - expect_lt(chain_ll(chains, rbinom, "size", size=1, prob=0.5), 0) + expect_lt(chain_ll(chains, "binom", "size", size=1, prob=0.5), 0) }) test_that("Analytical size/length distributions are implemented", @@ -30,15 +29,11 @@ test_that("Analytical size/length distributions are implemented", test_that("Errors are thrown", { - expect_error(chain_ll(chains, "dummy", "size", lambda=0.5), - "does not exist") expect_error(chain_ll(chains, list(), "size", lambda=0.5), - "not a function or") - expect_error(chain_ll(chains, "test", "size", lambda=0.5), - "not a function") - expect_error(chain_ll(chains, rpois, "size", lambda=0.5, obs_prob = 3), + "not a character") + expect_error(chain_ll(chains, "pois", "size", lambda=0.5, obs_prob = 3), "must be within") - expect_error(chain_ll(chains, rpois, "size", lambda=0.5, obs_prob = 0.5), + expect_error(chain_ll(chains, "pois", "size", lambda=0.5, obs_prob = 0.5), "must be specified") expect_error(nbinom_size_ll(chains, mu=0.5, size=0.2, prob=0.1), "both specified") diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index 240b743d..a30e2f7a 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -2,16 +2,16 @@ context("Simulating from a branching process model") test_that("Chains can be simulated", { - expect_length(chain_sim(n=2, rpois, lambda=0.5), 2) - expect_length(chain_sim(n=10, rpois, "length", lambda=0.9), 10) - expect_true(is.data.frame(chain_sim(n=10, rpois, lambda=2, tree=TRUE, + expect_length(chain_sim(n=2, "pois", lambda=0.5), 2) + expect_length(chain_sim(n=10, "pois", "length", lambda=0.9), 10) + expect_true(is.data.frame(chain_sim(n=10, "pois", lambda=2, tree=TRUE, infinite=10))) - expect_false(any(is.finite(chain_sim(n=2, rpois, "length", lambda=0.5, + expect_false(any(is.finite(chain_sim(n=2, "pois", "length", lambda=0.5, infinite=1)))) }) test_that("Errors are thrown", { - expect_error(chain_sim(n=2, "dummy"), "is not a function") - expect_error(chain_sim(n=2, rlnorm, meanlog=log(1.6)), "integer") + expect_error(chain_sim(n=2, "dummy"), "does not exist") + expect_error(chain_sim(n=2, "lnorm", meanlog=log(1.6)), "integer") }) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index a56b810d..5d3e0c67 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -34,10 +34,10 @@ At the heart of the package are the `chains_ll` and `chains_sim` functions. The ```{r} chain_sizes <- c(1,1,4,7) # example of observed chain sizes -chain_ll(chain_sizes, rpois, "size", lambda=0.5) +chain_ll(chain_sizes, "pois", "size", lambda=0.5) ``` -The first argument of `chain_ll` is the size (or length) distribution to analyse. The second argument (called `offspring`) specifies the offspring distribution. This is given as a the function used to generate random offspring. It can be any probability distribution implemented in R, that is, one that has a corresponding function for generating random numbers beginning with the letter `r`. In the case of the example above, since random Poisson numbers are generated in R using a function called `rpois`, this is the function to pass as the `offspring` argument. +The first argument of `chain_ll` is the size (or length) distribution to analyse. The second argument (called `offspring`) specifies the offspring distribution. This is given as a the function used to generate random offspring. It can be any probability distribution implemented in R, that is, one that has a corresponding function for generating random numbers beginning with the letter `r`. In the case of the example above, since random Poisson numbers are generated in R using a function called `rpois`, the string to pass to the `offspring` argument is `"pois"`. The third argument (called `stat`) determines whether to analyse chain sizes (`"size"`, the default if this argument is not specified) or lengths (`"length"`). Lastly, any named arguments not recognised by `chain_ll` are interpreted as parameters of the corresponding probability distribution, here `lambda=0.5` as the mean of the Poisson distribution (see the R help page for the Poisson distribution for more information). @@ -50,7 +50,7 @@ You can use the `R` help to find out about usage of the `chains_ll` function, To simulate from a branching process, use the `chain_sim` function, which follows the same syntax as the `chain_ll` function: ```{r} -chain_sim(n=5, rpois, "size", lambda=0.5) +chain_sim(n=5, "pois", "size", lambda=0.5) ``` # Methodology @@ -58,7 +58,7 @@ chain_sim(n=5, rpois, "size", lambda=0.5) If the probability distribution of chain sizes or lengths has an analytical solution, this will be used (size distribution: Poisson and negative binomial; length distribution: Poisson and geometric). If not, simulations are used to approximate this probability distributions (using a linear approximation to the cumulative distribution for unobserved sizes/lengths), requiring an additional parameter `nsim_offspring` for the number of simulations to be used for this approximation. For example, to get offspring drawn from a binomial distribution with probability `prob=0.5`. ```{r} -chain_ll(chain_sizes, rbinom, "size", size=1, prob=0.5, nsim_offspring=100) +chain_ll(chain_sizes, "binom", "size", size=1, prob=0.5, nsim_offspring=100) ``` # Imperfect observations @@ -66,7 +66,7 @@ chain_ll(chain_sizes, rbinom, "size", size=1, prob=0.5, nsim_offspring=100) The `chain_ll` function has an `obs_prob` parameter that can be used to determine the likelihood if observations are imperfect. In that case, true chain sizes or lengths are simulated repeatedly (the number of times given by the `nsim_obs` argument) and the likelihood calculated for each of these simulations. For example, if the probability of observing each case is 30%, use ```{r} -ll <- chain_ll(chain_sizes, rpois, "size", obs_prob = 0.3, lambda=0.5, nsim_obs=10) +ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda=0.5, nsim_obs=10) summary(ll) ``` From fd5c0d854755ccdce518f1fb556e3fef245fef3e Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 24 Jan 2020 09:59:55 +0000 Subject: [PATCH 050/828] simulate serial intervals --- R/simulate.r | 73 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/R/simulate.r b/R/simulate.r index 1d041eda..85715f34 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -8,17 +8,26 @@ ##' @param infinite a size or length from which the size/length is to be ##' considered infinite ##' @param tree return the tree of infectors +##' @param serial the serial interval; a function that takes one parameter +##' (`n`), the number of serial intervals to randomly sample; if this parameter +##' is set, `chain_sim` returns times of infection, too; implies (`tree`=TRUE) +##' @param t0 start time (if serial interval is given); either a single value (0 +##' by default for all simulations, or a vector of length `n` with initial +##' times) +##' @param tf end time (if serial interval is given) ##' @param ... parameters of the offspring distribution -##' @return a vector of sizes/lengths (if \code{tree==FALSE}), or a data frame -##' with columns `n` (simulation ID), `id` (a unique ID within each -##' simulation for each individual element of the chain), `ancestor` (the ID -##' of the ancestor of each element) and `generation`. +##' @return a vector of sizes/lengths (if \code{tree==FALSE} and no serial +##' interval given), or a data frame with columns `n` (simulation ID), `time` +##' (if the serial interval is given) and (if \code{tree==TRUE}) `id` (a +##' unique ID within each simulation for each individual element of the +##' chain), `ancestor` (the ID of the ancestor of each element) and +##' `generation`. ##' @author Sebastian Funk ##' @export ##' @examples ##' chain_sim(n=5, "pois", "size", lambda=0.5) chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, - tree=FALSE, ...) { + tree = FALSE, serial, init_time, t0 = 0, tf = Inf, ...) { stat <- match.arg(stat) @@ -32,6 +41,18 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, stop("Function ", roffspring_name, " does not exist.") } + if (!missing(serial)) { + if (!is.function(serial)) { + stop("The `serial` argument must be a function.") + } + if (!missing(tree) && tree == FALSE) { + stop("The `serial` argument can't be used with `tree==FALSE`.") + } + tree <- TRUE + } else if (!missing(tf)) { + stop("The `tf` argument needs a `serial` argument.") + } + stat_track <- rep(1, n) ## track length or size (depending on `stat`) n_offspring <- rep(1, n) ## current number of offspring sim <- seq_len(n) ## track chains that are still being simulated @@ -40,12 +61,16 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, if (tree) { generation <- 1L tdf <- - data.frame(n=seq_len(n), - id=1L, - ancestor=NA_integer_, - generation=generation) + data.frame(n = seq_len(n), + id = 1L, + ancestor = NA_integer_, + generation = generation) + ancestor_ids <- rep(1, n) - current_max_id <- rep(1, n) + if (!missing(serial)) { + tdf$time <- t0 + times <- tdf$time + } } ## next, simulate n chains @@ -71,7 +96,7 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, stat_track <- stat_track + pmin(1, n_offspring) } - ## record ancestors (if tree==TRUE) + ## record times/ancestors (if tree==TRUE) if (tree && sum(n_offspring[sim]) > 0) { ancestors <- rep(ancestor_ids, next_gen) current_max_id <- unname(tapply(ancestor_ids, indices, max)) @@ -79,22 +104,36 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ids <- rep(current_max_id, n_offspring[sim]) + unlist(lapply(n_offspring[sim], seq_len)) generation <- generation + 1L - ## record indices corresponding the number of offspring new_df <- - data.frame(n=indices, - id=ids, - ancestor=ancestors, - generation=generation) + data.frame(n = indices, + id = ids, + ancestor = ancestors, + generation = generation) + if (!missing(serial)) { + times <- rep(times, next_gen) + serial(sum(n_offspring)) + current_min_time <- unname(tapply(times, indices, min)) + new_df$time <- times + } tdf <- rbind(tdf, new_df) } ## only continue to simulate chains that offspring and aren't of ## infinite size/length sim <- which(n_offspring > 0 & stat_track < infinite) - if (tree) ancestor_ids <- ids[indices %in% sim] + if (tree) { + if (!missing(serial)) { + sim <- sim[current_min_time < tf] + times <- times[indices %in% sim] + } + ancestor_ids <- ids[indices %in% sim] + } } if (tree) { + if (!missing(tf)) { + tdf <- tdf[tdf$time < tf, ] + } + rownames(tdf) <- NULL return(tdf) } else { stat_track[stat_track >= infinite] <- Inf From e84891ed1d49552b09e962c860bd0100fde5ccb2 Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 24 Jan 2020 13:31:45 +0000 Subject: [PATCH 051/828] fix bug in `infinite` argument --- R/simulate.r | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/simulate.r b/R/simulate.r index 85715f34..605f62d3 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -27,7 +27,7 @@ ##' @examples ##' chain_sim(n=5, "pois", "size", lambda=0.5) chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, - tree = FALSE, serial, init_time, t0 = 0, tf = Inf, ...) { + tree = FALSE, serial, t0 = 0, tf = Inf, ...) { stat <- match.arg(stat) @@ -120,9 +120,12 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ## only continue to simulate chains that offspring and aren't of ## infinite size/length sim <- which(n_offspring > 0 & stat_track < infinite) + if (!missing(serial)) { + ## only continue to simulate chains that don't go beyond tf + sim <- intersect(sim, unique(indices)[current_min_time < tf]) + } if (tree) { if (!missing(serial)) { - sim <- sim[current_min_time < tf] times <- times[indices %in% sim] } ancestor_ids <- ids[indices %in% sim] From e7c6d6ff8a3b0c99f16ce0ab3281954d2d96532b Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 24 Jan 2020 13:31:57 +0000 Subject: [PATCH 052/828] roxygen update --- DESCRIPTION | 2 +- man/chain_ll.Rd | 13 +++++++++++-- man/chain_sim.Rd | 33 +++++++++++++++++++++++++++------ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c8f4b3c1..faa891a9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,5 +11,5 @@ Suggests: License: GPL-3 URL: https://github.com/sbfnk/bpmodels BugReports: https://github.com/sbfnk/bpmodels -RoxygenNote: 6.1.1 +RoxygenNote: 7.0.2 VignetteBuilder: knitr diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd index 82276f74..8dda43e3 100644 --- a/man/chain_ll.Rd +++ b/man/chain_ll.Rd @@ -4,8 +4,17 @@ \alias{chain_ll} \title{Likelihood for the outcome of a branching process} \usage{ -chain_ll(x, offspring, stat = c("size", "length"), obs_prob = 1, - infinite = Inf, exclude = c(), individual = FALSE, nsim_obs, ...) +chain_ll( + x, + offspring, + stat = c("size", "length"), + obs_prob = 1, + infinite = Inf, + exclude = c(), + individual = FALSE, + nsim_obs, + ... +) } \arguments{ \item{x}{vector of sizes or lengths of transmission chains} diff --git a/man/chain_sim.Rd b/man/chain_sim.Rd index e1a546d5..08593cab 100644 --- a/man/chain_sim.Rd +++ b/man/chain_sim.Rd @@ -4,8 +4,17 @@ \alias{chain_sim} \title{Simulate chains using a branching process} \usage{ -chain_sim(n, offspring, stat = c("size", "length"), infinite = Inf, - tree = FALSE, ...) +chain_sim( + n, + offspring, + stat = c("size", "length"), + infinite = Inf, + tree = FALSE, + serial, + t0 = 0, + tf = Inf, + ... +) } \arguments{ \item{n}{number of simulations to run.} @@ -21,13 +30,25 @@ considered infinite} \item{tree}{return the tree of infectors} +\item{serial}{the serial interval; a function that takes one parameter +(`n`), the number of serial intervals to randomly sample; if this parameter + is set, `chain_sim` returns times of infection, too; implies (`tree`=TRUE)} + +\item{t0}{start time (if serial interval is given); either a single value (0 +by default for all simulations, or a vector of length `n` with initial +times)} + +\item{tf}{end time (if serial interval is given)} + \item{...}{parameters of the offspring distribution} } \value{ -a vector of sizes/lengths (if \code{tree==FALSE}), or a data frame - with columns `n` (simulation ID), `id` (a unique ID within each - simulation for each individual element of the chain), `ancestor` (the ID - of the ancestor of each element) and `generation`. +a vector of sizes/lengths (if \code{tree==FALSE} and no serial + interval given), or a data frame with columns `n` (simulation ID), `time` + (if the serial interval is given) and (if \code{tree==TRUE}) `id` (a + unique ID within each simulation for each individual element of the + chain), `ancestor` (the ID of the ancestor of each element) and + `generation`. } \description{ Simulate chains using a branching process From aadf40ee16bf8f788040bc19dd9bf68d991cea9d Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Fri, 24 Jan 2020 14:18:56 +0000 Subject: [PATCH 053/828] fix for error where 'current_min_time' is not found --- R/simulate.r | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/simulate.r b/R/simulate.r index 605f62d3..89f62cde 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -120,15 +120,17 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ## only continue to simulate chains that offspring and aren't of ## infinite size/length sim <- which(n_offspring > 0 & stat_track < infinite) - if (!missing(serial)) { - ## only continue to simulate chains that don't go beyond tf - sim <- intersect(sim, unique(indices)[current_min_time < tf]) - } - if (tree) { + if (length(sim) > 0) { if (!missing(serial)) { - times <- times[indices %in% sim] + ## only continue to simulate chains that don't go beyond tf + sim <- intersect(sim, unique(indices)[current_min_time < tf]) + } + if (tree) { + if (!missing(serial)) { + times <- times[indices %in% sim] + } + ancestor_ids <- ids[indices %in% sim] } - ancestor_ids <- ids[indices %in% sim] } } From 6de58a2fe7c24541f488fe58b0a2dfe04f45b58f Mon Sep 17 00:00:00 2001 From: Sebastian Funk Date: Tue, 28 Jan 2020 10:00:28 +0000 Subject: [PATCH 054/828] rendered vignette --- vignettes/introduction.md | 95 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 vignettes/introduction.md diff --git a/vignettes/introduction.md b/vignettes/introduction.md new file mode 100644 index 00000000..641d09d0 --- /dev/null +++ b/vignettes/introduction.md @@ -0,0 +1,95 @@ +[bpmodels](https://github.com/sbfnk/bpmodels) is an `R` package to +analyse and simulate the size and length of branching processes with a +given offspring distribution. These can be used, for example, to analyse +the distribution of chain sizes or length of infectious disease +outbreaks. + +Usage +===== + +To load the package, use + + library('bpmodels') + +At the heart of the package are the `chains_ll` and `chains_sim` +functions. The `chains_ll` function calculates the log-likelihood of a +distribution of chain sizes or lengths given an offspring distribution +and associated parameters. For example, to get the log-likelihood for a +given observed distribution of chain sizes assuming a mean number of 0.5 +Poisson-distributed offspring per generation, use + + chain_sizes <- c(1,1,4,7) # example of observed chain sizes + chain_ll(chain_sizes, "pois", "size", lambda=0.5) + #> [1] -8.607196 + +The first argument of `chain_ll` is the size (or length) distribution to +analyse. The second argument (called `offspring`) specifies the +offspring distribution. This is given as a the function used to generate +random offspring. It can be any probability distribution implemented in +R, that is, one that has a corresponding function for generating random +numbers beginning with the letter `r`. In the case of the example above, +since random Poisson numbers are generated in R using a function called +`rpois`, the string to pass to the `offspring` argument is `"pois"`. + +The third argument (called `stat`) determines whether to analyse chain +sizes (`"size"`, the default if this argument is not specified) or +lengths (`"length"`). Lastly, any named arguments not recognised by +`chain_ll` are interpreted as parameters of the corresponding +probability distribution, here `lambda=0.5` as the mean of the Poisson +distribution (see the R help page for the Poisson distribution for more +information). + +You can use the `R` help to find out about usage of the `chains_ll` +function, + + ?chains_ll + +To simulate from a branching process, use the `chain_sim` function, +which follows the same syntax as the `chain_ll` function: + + chain_sim(n=5, "pois", "size", lambda=0.5) + #> [1] 2 1 1 1 5 + +Methodology +=========== + +If the probability distribution of chain sizes or lengths has an +analytical solution, this will be used (size distribution: Poisson and +negative binomial; length distribution: Poisson and geometric). If not, +simulations are used to approximate this probability distributions +(using a linear approximation to the cumulative distribution for +unobserved sizes/lengths), requiring an additional parameter +`nsim_offspring` for the number of simulations to be used for this +approximation. For example, to get offspring drawn from a binomial +distribution with probability `prob=0.5`. + + chain_ll(chain_sizes, "binom", "size", size=1, prob=0.5, nsim_offspring=100) + #> [1] -8.477588 + +Imperfect observations +====================== + +The `chain_ll` function has an `obs_prob` parameter that can be used to +determine the likelihood if observations are imperfect. In that case, +true chain sizes or lengths are simulated repeatedly (the number of +times given by the `nsim_obs` argument) and the likelihood calculated +for each of these simulations. For example, if the probability of +observing each case is 30%, use + + ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda=0.5, nsim_obs=10) + summary(ll) + #> Min. 1st Qu. Median Mean 3rd Qu. Max. + #> -35.30 -25.68 -23.23 -24.19 -20.89 -18.91 + +This returns `nsim_obs=10` likelihood values which can be averaged to +come up with an overall likelihood estimate. + +References +========== + +- Farrington, C.P., Kanaan, M.N. and Gay, N.J. (2003). [Branching + process models for surveillance of infectious diseases controlled by + mass vaccination](https://doi.org/10.1093/biostatistics/4.2.279). +- Blumberg, S. and Lloyd-Smith, J.O. (2013). [Comparing methods for + estimating R0 from the size distribution of subcritical transmission + chains](https://doi.org/10.1016/j.epidem.2013.05.002). From f39cc7fd243bca0cb49a77a5122dbd0b6fa9b0dc Mon Sep 17 00:00:00 2001 From: ffinger <12323626+ffinger@users.noreply.github.com> Date: Wed, 4 Mar 2020 21:32:54 +0100 Subject: [PATCH 055/828] simulator function accounting for susceptible depletion corrected error in time checking corrected bug minor fixes changed function name update doc --- DESCRIPTION | 10 ++- NAMESPACE | 2 + R/globals.R | 2 + R/simulate_susceptibles.R | 166 ++++++++++++++++++++++++++++++++++++++ R/utils.r | 23 +++++- man/chain_ll.Rd | 13 +-- man/chain_sim.Rd | 13 +-- man/chain_sim_susc.Rd | 56 +++++++++++++ man/rbinom_size.Rd | 12 ++- man/rnbinom_mean_disp.Rd | 29 +++++++ 10 files changed, 300 insertions(+), 26 deletions(-) create mode 100644 R/globals.R create mode 100644 R/simulate_susceptibles.R create mode 100644 man/chain_sim_susc.Rd create mode 100644 man/rnbinom_mean_disp.Rd diff --git a/DESCRIPTION b/DESCRIPTION index faa891a9..df1508bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,19 @@ Package: bpmodels Version: 0.1.0 Title: Analysing chain statistics using branching process models -Authors@R: c(person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre")), person("Zhian N.", "Kamvar", email = "zkamvar@gmail.com", role = c("ctb"))) +Authors@R: c( + person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre")), + person("Zhian N.", "Kamvar", email = "zkamvar@gmail.com", role = c("ctb")), + person("Flavio", "Finger", email = "flavio.finger@epicentre.msf.org", role = c("aut")) + ) Description: Provides methods to analyse and simulate the size and length of branching processes with an arbitrary offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks, as discussed in Farrington et al. (2003) . Suggests: testthat, knitr, rmarkdown, - covr + covr, + extraDistr, + truncdist License: GPL-3 URL: https://github.com/sbfnk/bpmodels BugReports: https://github.com/sbfnk/bpmodels diff --git a/NAMESPACE b/NAMESPACE index ddaddf44..1b73b8d9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,3 +2,5 @@ export(chain_ll) export(chain_sim) +export(chain_sim_susc) +export(rnbinom_mean_disp) diff --git a/R/globals.R b/R/globals.R new file mode 100644 index 00000000..75781a1a --- /dev/null +++ b/R/globals.R @@ -0,0 +1,2 @@ +## avoid "no visible bindings" warning for dplyr verbs +utils::globalVariables(c("generation", "time", "id")) \ No newline at end of file diff --git a/R/simulate_susceptibles.R b/R/simulate_susceptibles.R new file mode 100644 index 00000000..df7cb793 --- /dev/null +++ b/R/simulate_susceptibles.R @@ -0,0 +1,166 @@ +##' Simulate a single chain using a branching process while accounting +##' for depletion of susceptibles. +##' +##' @param offspring offspring distribution: a character string corresponding to +##' the R distribution function. Currently only "pois" & "nbinom" are +##' supported. Internally truncated distributions are used to avoid infecting +##' more people than susceptibles available. +##' @param mn_offspring the average number of secondary cases for each case +##' @param disp_offspring the dispersion coefficient (var/mean) of the number of +##' secondary cases. Ignored if offspring == "pois". Must be > 1. +##' @param serial the serial interval. A function that takes one parameter +##' (`n`), the number of serial intervals to randomly sample. +##' Value must be >= 0. +##' @param t0 start time +##' @param tf end time +##' @param pop the population +##' @param initial_immune the number of initial immunes in the population +##' @return a data frame with columns `time`, `id` (a unique ID for each +##' individual element of the chain), `ancestor` (the ID of the ancestor +##' of each element), and `generation`. +##' +##' @details This function has a couple of key differences with chain_sim: +##' it can only simulate one chain at a time, +##' it can only handle implemented offspring distributions +##' ("pois" and "nbinom"), +##' it always tracks and returns a data frame containing the entire tree, +##' the maximal length of chains is limited with pop instead of infinite. +##' +##' @author Flavio Finger +##' @export +##' @examples +##' chain_sim_susc("pois", mn_offspring=0.5, serial = function(x) 3, pop = 100) +chain_sim_susc <- function( + offspring = c("pois", "nbinom"), + mn_offspring, + disp_offspring, + serial, + t0 = 0, + tf = Inf, + pop, + initial_immune = 0 +) { + + offspring <- match.arg(offspring) + + if (missing(pop)) { + stop("Argument pop required.") + } + + if (missing(mn_offspring)) { + stop("Argument mn_offspring reequired.") + } + + if (offspring == "pois") { + if (!missing(disp_offspring)) { + warning("argument disp_offspring not used for + poisson offspring distribution.") + } + + ## using a right truncated poisson distribution + ## to avoid more cases than susceptibles + offspring_fun <- function(n, susc) { + extraDistr::rtpois( + n, + lambda = mn_offspring * susc / pop, + b = susc) + } + + } else if (offspring == "nbinom") { + + if (missing(disp_offspring) | disp_offspring <= 1) { ## dispersion index + stop("Offspring distribution 'nbinom' requires argument + disp_offspring > 1. Use 'pois' if there is no overdispersion.") + } + + offspring_fun <- function(n, susc) { + ## get distribution params from mean and dispersion + ## see ?rnbinom for parameter definition + new_mn <- mn_offspring * susc / pop ##apply susceptibility + size <- new_mn / (disp_offspring - 1) + + ## using a right truncated nbinom distribution + ## to avoid more cases than susceptibles + truncdist::rtrunc( + n, + spec = "nbinom", + b = susc, + mu = new_mn, + size = size) + } + } + + ## initializations + tdf <- data.frame( + id = 1L, + ancestor = NA_integer_, + generation = 1L, + time = t0, + offspring_generated = FALSE + ) + + susc <- pop - initial_immune - 1L + t <- t0 + + ## continue if any unsimulated has t <= tf + ## AND there is still susceptibles left + while ( + any(tdf$time[!tdf$offspring_generated] <= tf) & + susc > 0 + ) { + + ## select from which case to generate offspring + t <- min(tdf$time[!tdf$offspring_generated]) #lowest unsimulated t + + ## index of the first in df with t, extract vars + idx <- which(tdf$time == t & !tdf$offspring_generated)[1] + id_parent <- tdf$id[idx] + t_parent <- tdf$time[idx] + gen_parent <- tdf$generation[idx] + + ## generate it + current_max_id <- max(tdf$id) + n_offspring <- offspring_fun(1, susc) + + if (n_offspring %% 1 > 0) { + stop("Offspring distribution must return integers") + } + + ## mark as done + tdf$offspring_generated[idx] <- TRUE + + ## add to df + if (n_offspring > 0) { + ## draw times + new_times <- serial(n_offspring) + + if (any(new_times < 0)) { + stop("Serial interval must be >= 0.") + } + + new_df <- data.frame( + id = current_max_id + seq_len(n_offspring), + time = new_times + t_parent, + ancestor = id_parent, + generation = gen_parent + 1L, + offspring_generated = FALSE + ) + + ## add new cases to tdf + tdf <- rbind(tdf, new_df) + } + + ## adjust susceptibles + susc <- susc - n_offspring + } + + ## remove cases with time > tf that could + ## have been generated in the last generation + tdf <- tdf[tdf$time <= tf, ] + + ## sort output and remove columns not needed + tdf <- tdf[order(tdf$time, tdf$id), ] + tdf$offspring_generated <- NULL + + return(tdf) +} \ No newline at end of file diff --git a/R/utils.r b/R/utils.r index 9f912db2..7e9f6f46 100644 --- a/R/utils.r +++ b/R/utils.r @@ -13,7 +13,13 @@ complementary_logprob <- function(x) { ##' ##' Samples the size parameter from the binomial distribution with fixed x ##' (number of successes) and p (success probability) -##' @param n number of samples to generate +##' @param n number of samples to generate##' secondary cases. Ignored if offspring == "pois". Must be > 1. +##' @param serial the serial interval. A function that takes one parameter +##' (`n`), the number of serial intervals to randomly sample. +##' Value must be >= 0. +##' @param t0 start time +##' @param tf end time +##' @param pop the population ##' @param x number of successes ##' @param prob probability of success ##' @return sampled sizes @@ -57,3 +63,18 @@ find_function_name <- function(fun) { } } } + +##' Negative binomial random numbers parametrized +##' in terms of mean and dispersion coefficient +##' @param n number of samples to draw +##' @param mn mean of distribution +##' @param disp dispersion coefficient (var/mean) +##' @return vector containing the random numbers +##' @author Flavio Finger +##' @export +##' @examples +##' rnbinom_mean_disp(n = 5, mn = 4, disp = 2) +rnbinom_mean_disp <- function(n, mn, disp) { + size <- mn / (disp - 1) + stats::rnbinom(n, size = size, mu = mn) + } \ No newline at end of file diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd index 8dda43e3..82276f74 100644 --- a/man/chain_ll.Rd +++ b/man/chain_ll.Rd @@ -4,17 +4,8 @@ \alias{chain_ll} \title{Likelihood for the outcome of a branching process} \usage{ -chain_ll( - x, - offspring, - stat = c("size", "length"), - obs_prob = 1, - infinite = Inf, - exclude = c(), - individual = FALSE, - nsim_obs, - ... -) +chain_ll(x, offspring, stat = c("size", "length"), obs_prob = 1, + infinite = Inf, exclude = c(), individual = FALSE, nsim_obs, ...) } \arguments{ \item{x}{vector of sizes or lengths of transmission chains} diff --git a/man/chain_sim.Rd b/man/chain_sim.Rd index 08593cab..5d2cf74d 100644 --- a/man/chain_sim.Rd +++ b/man/chain_sim.Rd @@ -4,17 +4,8 @@ \alias{chain_sim} \title{Simulate chains using a branching process} \usage{ -chain_sim( - n, - offspring, - stat = c("size", "length"), - infinite = Inf, - tree = FALSE, - serial, - t0 = 0, - tf = Inf, - ... -) +chain_sim(n, offspring, stat = c("size", "length"), infinite = Inf, + tree = FALSE, serial, t0 = 0, tf = Inf, ...) } \arguments{ \item{n}{number of simulations to run.} diff --git a/man/chain_sim_susc.Rd b/man/chain_sim_susc.Rd new file mode 100644 index 00000000..09b5858b --- /dev/null +++ b/man/chain_sim_susc.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_susceptibles.R +\name{chain_sim_susc} +\alias{chain_sim_susc} +\title{Simulate a single chain using a branching process while accounting +for depletion of susceptibles.} +\usage{ +chain_sim_susc(offspring = c("pois", "nbinom"), mn_offspring, + disp_offspring, serial, t0 = 0, tf = Inf, pop, initial_immune = 0) +} +\arguments{ +\item{offspring}{offspring distribution: a character string corresponding to +the R distribution function. Currently only "pois" & "nbinom" are +supported. Internally truncated distributions are used to avoid infecting +more people than susceptibles available.} + +\item{mn_offspring}{the average number of secondary cases for each case} + +\item{disp_offspring}{the dispersion coefficient (var/mean) of the number of +secondary cases. Ignored if offspring == "pois". Must be > 1.} + +\item{serial}{the serial interval. A function that takes one parameter +(`n`), the number of serial intervals to randomly sample. +Value must be >= 0.} + +\item{t0}{start time} + +\item{tf}{end time} + +\item{pop}{the population} + +\item{initial_immune}{the number of initial immunes in the population} +} +\value{ +a data frame with columns `time`, `id` (a unique ID for each + individual element of the chain), `ancestor` (the ID of the ancestor + of each element), and `generation`. +} +\description{ +Simulate a single chain using a branching process while accounting +for depletion of susceptibles. +} +\details{ +This function has a couple of key differences with chain_sim: + it can only simulate one chain at a time, + it can only handle implemented offspring distributions + ("pois" and "nbinom"), + it always tracks and returns a data frame containing the entire tree, + the maximal length of chains is limited with pop instead of infinite. +} +\examples{ +chain_sim_susc("pois", mn_offspring=0.5, serial = function(x) 3, pop = 100) +} +\author{ +Flavio Finger +} diff --git a/man/rbinom_size.Rd b/man/rbinom_size.Rd index 5e19360d..4be7a76a 100644 --- a/man/rbinom_size.Rd +++ b/man/rbinom_size.Rd @@ -7,11 +7,21 @@ rbinom_size(n, x, prob) } \arguments{ -\item{n}{number of samples to generate} +\item{n}{number of samples to generate##' secondary cases. Ignored if offspring == "pois". Must be > 1.} \item{x}{number of successes} \item{prob}{probability of success} + +\item{serial}{the serial interval. A function that takes one parameter +(`n`), the number of serial intervals to randomly sample. +Value must be >= 0.} + +\item{t0}{start time} + +\item{tf}{end time} + +\item{pop}{the population} } \value{ sampled sizes diff --git a/man/rnbinom_mean_disp.Rd b/man/rnbinom_mean_disp.Rd new file mode 100644 index 00000000..698836d6 --- /dev/null +++ b/man/rnbinom_mean_disp.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.r +\name{rnbinom_mean_disp} +\alias{rnbinom_mean_disp} +\title{Negative binomial random numbers parametrized +in terms of mean and dispersion coefficient} +\usage{ +rnbinom_mean_disp(n, mn, disp) +} +\arguments{ +\item{n}{number of samples to draw} + +\item{mn}{mean of distribution} + +\item{disp}{dispersion coefficient (var/mean)} +} +\value{ +vector containing the random numbers +} +\description{ +Negative binomial random numbers parametrized +in terms of mean and dispersion coefficient +} +\examples{ +rnbinom_mean_disp(n = 5, mn = 4, disp = 2) +} +\author{ +Flavio Finger +} From da842ca24863b165de386b30ff72fdec31cc271e Mon Sep 17 00:00:00 2001 From: ffinger <12323626+ffinger@users.noreply.github.com> Date: Sat, 7 Mar 2020 22:32:18 +0100 Subject: [PATCH 056/828] implemented tests and improved argument checking fix doc fix doc remove flawed test remove globals (unused) --- R/globals.R | 2 - R/simulate_susceptibles.R | 10 +--- R/utils.r | 8 +-- man/rbinom_size.Rd | 12 +---- tests/testthat/tests-sim.r | 103 +++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 29 deletions(-) delete mode 100644 R/globals.R diff --git a/R/globals.R b/R/globals.R deleted file mode 100644 index 75781a1a..00000000 --- a/R/globals.R +++ /dev/null @@ -1,2 +0,0 @@ -## avoid "no visible bindings" warning for dplyr verbs -utils::globalVariables(c("generation", "time", "id")) \ No newline at end of file diff --git a/R/simulate_susceptibles.R b/R/simulate_susceptibles.R index df7cb793..f6fd180c 100644 --- a/R/simulate_susceptibles.R +++ b/R/simulate_susceptibles.R @@ -43,14 +43,6 @@ chain_sim_susc <- function( offspring <- match.arg(offspring) - if (missing(pop)) { - stop("Argument pop required.") - } - - if (missing(mn_offspring)) { - stop("Argument mn_offspring reequired.") - } - if (offspring == "pois") { if (!missing(disp_offspring)) { warning("argument disp_offspring not used for @@ -68,7 +60,7 @@ chain_sim_susc <- function( } else if (offspring == "nbinom") { - if (missing(disp_offspring) | disp_offspring <= 1) { ## dispersion index + if (disp_offspring <= 1) { ## dispersion index stop("Offspring distribution 'nbinom' requires argument disp_offspring > 1. Use 'pois' if there is no overdispersion.") } diff --git a/R/utils.r b/R/utils.r index 7e9f6f46..79c225e9 100644 --- a/R/utils.r +++ b/R/utils.r @@ -13,13 +13,7 @@ complementary_logprob <- function(x) { ##' ##' Samples the size parameter from the binomial distribution with fixed x ##' (number of successes) and p (success probability) -##' @param n number of samples to generate##' secondary cases. Ignored if offspring == "pois". Must be > 1. -##' @param serial the serial interval. A function that takes one parameter -##' (`n`), the number of serial intervals to randomly sample. -##' Value must be >= 0. -##' @param t0 start time -##' @param tf end time -##' @param pop the population +##' @param n number of samples to generate ##' @param x number of successes ##' @param prob probability of success ##' @return sampled sizes diff --git a/man/rbinom_size.Rd b/man/rbinom_size.Rd index 4be7a76a..5e19360d 100644 --- a/man/rbinom_size.Rd +++ b/man/rbinom_size.Rd @@ -7,21 +7,11 @@ rbinom_size(n, x, prob) } \arguments{ -\item{n}{number of samples to generate##' secondary cases. Ignored if offspring == "pois". Must be > 1.} +\item{n}{number of samples to generate} \item{x}{number of successes} \item{prob}{probability of success} - -\item{serial}{the serial interval. A function that takes one parameter -(`n`), the number of serial intervals to randomly sample. -Value must be >= 0.} - -\item{t0}{start time} - -\item{tf}{end time} - -\item{pop}{the population} } \value{ sampled sizes diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index a30e2f7a..5f28b6a1 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -15,3 +15,106 @@ test_that("Errors are thrown", expect_error(chain_sim(n=2, "dummy"), "does not exist") expect_error(chain_sim(n=2, "lnorm", meanlog=log(1.6)), "integer") }) + +context("Simulating from a branching process model + accounting for depletion of susceptibles") + + +test_that("Chains can be simulated", +{ + expect_true( + is.data.frame( + chain_sim_susc( + "pois", + mn_offspring = 2, + serial = function(x) 3, + pop = 100 + ) + ) + ) + + expect_true( + is.data.frame( + chain_sim_susc( + "nbinom", + mn_offspring = 2, + disp = 1.5, + serial = function(x) 3, + pop = 100 + ) + ) + ) + + expect_true( + nrow( + chain_sim_susc( + "pois", + mn_offspring = 2, + serial = function(x) 3, + pop = 1 + ) + ) == 1 + ) + + expect_true( + nrow( + chain_sim_susc( + "pois", + mn_offspring = 100, + tf = 2, + serial = function(x) 3, + pop = 999 + ) + ) == 1 + ) + + expect_true( + nrow( + chain_sim_susc( + "pois", + mn_offspring = 100, + serial = function(x) 3, + pop = 999, + initial_immune = 998 + ) + ) == 1 + ) + +}) + +test_that("Errors are thrown", +{ + expect_error( + chain_sim_susc( + "dummy", + mn_offspring = 3, + serial = function(x) 3, + pop = 100), + "'arg' should be one of \"pois\", \"nbinom\"") + expect_error( + chain_sim_susc( + "nbinom", + mn_offspring = 3, + disp_offspring = 1, + serial = function(x) 3, + pop = 100 + ), + "Offspring distribution 'nbinom' requires argument + disp_offspring > 1. Use 'pois' if there is no overdispersion.") + expect_error( + chain_sim_susc( + "nbinom", + mn_offspring = 3, + serial = function(x) 3, + pop = 100 + ), + "argument \"disp_offspring\" is missing, with no default") + expect_error( + chain_sim_susc( + "pois", + mn_offspring = 3, + serial = function(x) -3, + pop = 100), + "Serial interval must be >= 0.") + +}) \ No newline at end of file From 3d48c609bafe413f5d00f5ec7ede4d32e5d7618a Mon Sep 17 00:00:00 2001 From: ffinger <12323626+ffinger@users.noreply.github.com> Date: Tue, 10 Mar 2020 11:32:48 +0100 Subject: [PATCH 057/828] removed extraDistr dependency and test that sometimes fails --- DESCRIPTION | 1 - R/simulate_susceptibles.R | 3 ++- tests/testthat/tests-sim.r | 7 ------- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index df1508bb..53a89a8a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,6 @@ Suggests: knitr, rmarkdown, covr, - extraDistr, truncdist License: GPL-3 URL: https://github.com/sbfnk/bpmodels diff --git a/R/simulate_susceptibles.R b/R/simulate_susceptibles.R index f6fd180c..b292d5f0 100644 --- a/R/simulate_susceptibles.R +++ b/R/simulate_susceptibles.R @@ -52,8 +52,9 @@ chain_sim_susc <- function( ## using a right truncated poisson distribution ## to avoid more cases than susceptibles offspring_fun <- function(n, susc) { - extraDistr::rtpois( + truncdist::rtrunc( n, + spec = "pois", lambda = mn_offspring * susc / pop, b = susc) } diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index 5f28b6a1..348c4bf3 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -109,12 +109,5 @@ test_that("Errors are thrown", pop = 100 ), "argument \"disp_offspring\" is missing, with no default") - expect_error( - chain_sim_susc( - "pois", - mn_offspring = 3, - serial = function(x) -3, - pop = 100), - "Serial interval must be >= 0.") }) \ No newline at end of file From c46eb565880184d89d8125ffe200ef37bf27b158 Mon Sep 17 00:00:00 2001 From: James Azam Date: Wed, 25 Jan 2023 16:31:11 +0000 Subject: [PATCH 058/828] Replace travis CI with GHA (#24) * removed appveyor file * removed travis yml * removed all references to travis and appveyor * ignore .Rproj * add GHA badge to README * GHA setup * ignore .Rproj files * Edited the README badge URLs to redirect to the Epiverse TRACE org --- .Rbuildignore | 5 +-- .github/.gitignore | 1 + .github/workflows/R-CMD-check.yaml | 49 +++++++++++++++++++++++++++ .github/workflows/pkgdown.yaml | 46 +++++++++++++++++++++++++ .github/workflows/test-coverage.yaml | 50 ++++++++++++++++++++++++++++ .gitignore | 1 + .travis.yml | 26 --------------- README.md | 10 +++--- appveyor.yml | 41 ----------------------- 9 files changed, 155 insertions(+), 74 deletions(-) create mode 100644 .github/.gitignore create mode 100644 .github/workflows/R-CMD-check.yaml create mode 100644 .github/workflows/pkgdown.yaml create mode 100644 .github/workflows/test-coverage.yaml delete mode 100644 .travis.yml delete mode 100644 appveyor.yml diff --git a/.Rbuildignore b/.Rbuildignore index f0fea78d..0a6b13a1 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,4 +1,5 @@ ^CODE_OF_CONDUCT\.md$ -^appveyor\.yml$ -^\.travis\.yml$ cran-comments.md +^\.github$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 00000000..2d19fc76 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 00000000..a3ac6182 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,49 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 00000000..087f0b05 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,46 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 00000000..2c5bb502 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr + needs: coverage + + - name: Test coverage + run: | + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + ) + shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index 57133000..97ff1c51 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ inst/doc /*.Rcheck/ # RStudio files .Rproj.user/ +*.Rproj # produced vignettes vignettes/*.html vignettes/*.pdf diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index a7279d6d..00000000 --- a/.travis.yml +++ /dev/null @@ -1,26 +0,0 @@ -# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r -language: r -cache: packages - -matrix: - include: - - os: linux - r: release - env: - - R_CODECOV=true - - os: linux - r: devel - - os: linux - r: oldrel - - os: osx - osx_image: xcode8.3 - -warnings_are_errors: true - -notifications: - email: - on_success: change - on_failure: change - -after_success: -- if [[ "${R_CODECOV}" ]]; then Rscript -e 'covr::codecov()'; fi diff --git a/README.md b/README.md index 2654ac3e..37b7901a 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ # bpmodels - -[![Travis-CI Build Status](https://travis-ci.org/sbfnk/bpmodels.svg?branch=master)](https://travis-ci.org/sbfnk/bpmodels) -[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/y37i8x0wo9o8s2wf?svg=true)](https://ci.appveyor.com/project/sbfnk/bpmodels) -[![codecov](https://codecov.io/github/sbfnk/bpmodels/branch/master/graphs/badge.svg)](https://codecov.io/github/sbfnk/bpmodels) + +[![R-CMD-check](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml) +[![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/master/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) + Methods for analysing the distribution of epidemiological chain sizes and lengths The latest development version of the `bpmodels` package can be installed via ```{r eval=FALSE} -devtools::install_github('sbfnk/bpmodels') +devtools::install_github('epiverse-trace/bpmodels') ``` Please note that the 'bpmodels' project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index bc46a87c..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,41 +0,0 @@ -# DO NOT CHANGE the "init" and "install" sections below - -# Download script file from GitHub -init: - ps: | - $ErrorActionPreference = "Stop" - Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" - Import-Module '..\appveyor-tool.ps1' -install: - ps: Bootstrap - -# Adapt as necessary starting from here - -build_script: - - travis-tool.sh install_deps - -test_script: - - travis-tool.sh run_tests - -on_failure: - - 7z a failure.zip *.Rcheck\* - - appveyor PushArtifact failure.zip - -artifacts: - - path: '*.Rcheck\**\*.log' - name: Logs - - - path: '*.Rcheck\**\*.out' - name: Logs - - - path: '*.Rcheck\**\*.fail' - name: Logs - - - path: '*.Rcheck\**\*.Rout' - name: Logs - - - path: '\*_*.tar.gz' - name: Bits - - - path: '\*_*.zip' - name: Bits From 45a482219211ba52bc09080aafa3ab035ea155ea Mon Sep 17 00:00:00 2001 From: James Azam Date: Thu, 26 Jan 2023 14:07:55 +0000 Subject: [PATCH 059/828] Revised chain_sim() documentation (#14) * Added a bib file for the references * added UTF-8 encoding to DESCRIPTION to fix Roxygen warning * Changed the comment tags * gitignore * Changed the comment tags * Updated the documentation of chain_sim() * Converted the introduction vignette from md to rmd format * Edited the README.md file * Deleted this after converting to rmd format * gitignore * Starting to convert README to rmd format to provide more flexibility in adding code and output * revised the chain_sim() doc * Updated the DESCRIPTION and turned on markdown support * Revised chain_sim() documentation * Revised chain_sim() documentation * added yours truly as an author and maintainer * updated chain_sim() documentation * Rendered README.md * Fixed an issue with the author file * Added an example * Added our assumptions about the serial interval * .Rd files generated * Update chain_sim Co-authored-by: Sebastian Funk * Update chain_sim() documentation Co-authored-by: Sebastian Funk * Update chain_sim() documentation Co-authored-by: Sebastian Funk * Update chain_sim() documentation Co-authored-by: Sebastian Funk * Update chain_sim() documentation Co-authored-by: Sebastian Funk * updated the README * deleted unwanted file byproduct * ignore .bib.sav files * ignore README.Rmd * rebuilt the documentation * added bookdown to Suggests * fixed badges * added the MIT license Co-authored-by: Sebastian Funk --- .Rbuildignore | 2 + .gitignore | 8 ++ DESCRIPTION | 35 ++++--- LICENSE | 2 + LICENSE.md | 21 ++++ R/likelihoods.R | 158 +++++++++++++++---------------- R/simulate.r | 132 ++++++++++++++++++++------ R/simulate_susceptibles.R | 64 ++++++------- R/utils.r | 94 +++++++++--------- README.Rmd | 32 +++++++ README.md | 21 ++-- man/chain_ll.Rd | 17 +++- man/chain_sim.Rd | 133 +++++++++++++++++++++----- man/chain_sim_susc.Rd | 30 +++--- man/offspring_ll.Rd | 6 +- vignettes/introduction.R | 31 ++++++ vignettes/introduction.Rmd | 61 ++++++++---- vignettes/introduction.md | 95 ------------------- vignettes/projecting_incidence.R | 120 +++++++++++++++++++++++ vignettes/references.bib | 23 +++++ 20 files changed, 719 insertions(+), 366 deletions(-) create mode 100644 LICENSE create mode 100644 LICENSE.md create mode 100644 README.Rmd create mode 100644 vignettes/introduction.R delete mode 100644 vignettes/introduction.md create mode 100644 vignettes/projecting_incidence.R create mode 100644 vignettes/references.bib diff --git a/.Rbuildignore b/.Rbuildignore index 0a6b13a1..2bd66f52 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,5 @@ cran-comments.md ^\.github$ ^.*\.Rproj$ ^\.Rproj\.user$ +^README\.Rmd$ +^LICENSE\.md$ diff --git a/.gitignore b/.gitignore index 97ff1c51..8b89fb81 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,11 @@ vignettes/*.pdf *.knit.md # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html rsconnect/ +/doc/ +/Meta/ + +.Rbuildignore + +*.Rproj + +*.bib.sav diff --git a/DESCRIPTION b/DESCRIPTION index 53a89a8a..86bd2ff4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,20 +1,29 @@ Package: bpmodels -Version: 0.1.0 Title: Analysing chain statistics using branching process models +Version: 0.1.0 Authors@R: c( - person("Sebastian", "Funk", email = "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre")), - person("Zhian N.", "Kamvar", email = "zkamvar@gmail.com", role = c("ctb")), - person("Flavio", "Finger", email = "flavio.finger@epicentre.msf.org", role = c("aut")) - ) -Description: Provides methods to analyse and simulate the size and length of branching processes with an arbitrary offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks, as discussed in Farrington et al. (2003) . + person("Sebastian", "Funk", , "sebastian.funk@lshtm.ac.uk", role = c("aut", "cre")), + person("Zhian N.", "Kamvar", , "zkamvar@gmail.com", role = "ctb"), + person("Flavio", "Finger", , "flavio.finger@epicentre.msf.org", role = "aut"), + person("James", "Azam", "Mba", "james.azam@lshtm.ac.uk", role = c("aut")) + ) +Description: Provides methods to analyse and simulate the size and length + of branching processes with an arbitrary offspring distribution. These + can be used, for example, to analyse the distribution of chain sizes + or length of infectious disease outbreaks, as discussed in Farrington + et al. (2003) . +License: MIT + file LICENSE +URL: https://github.com/sbfnk/bpmodels +BugReports: https://github.com/sbfnk/bpmodels/issues Suggests: - testthat, + covr, knitr, rmarkdown, - covr, + bookdown, + testthat, truncdist -License: GPL-3 -URL: https://github.com/sbfnk/bpmodels -BugReports: https://github.com/sbfnk/bpmodels -RoxygenNote: 7.0.2 -VignetteBuilder: knitr +VignetteBuilder: + knitr +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.2.3 diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..bad553b7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2023 +COPYRIGHT HOLDER: bpmodels authors diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 00000000..9293f3eb --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2023 bpmodels authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/R/likelihoods.R b/R/likelihoods.R index bdebd49a..70778346 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -1,27 +1,27 @@ -##' Likelihood of the size of chains with Poisson offspring distribution -##' -##' @param x vector of sizes -##' @param lambda rate of the Poisson distribution -##' @return log-likelihood values -##' @author Sebastian Funk -##' @keywords internal +#' Likelihood of the size of chains with Poisson offspring distribution +#' +#' @param x vector of sizes +#' @param lambda rate of the Poisson distribution +#' @return log-likelihood values +#' @author Sebastian Funk +#' @keywords internal pois_size_ll <- function(x, lambda) { (x - 1) * log(lambda) - lambda * x + (x - 2) * log(x) - lgamma(x) } -##' Likelihood of the size of chains with Negative-Binomial offspring -##' distribution -##' -##' @param x vector of sizes -##' @param size the dispersion parameter (often called \code{k} in ecological -##' applications) -##' @param prob probability of success (in the parameterisation with -##' \code{prob}, see also \code{\link[stats]{NegBinomial}}) -##' @param mu mean parameter -##' @return log-likelihood values -##' @author Sebastian Funk -##' @keywords internal +#' Likelihood of the size of chains with Negative-Binomial offspring +#' distribution +#' +#' @param x vector of sizes +#' @param size the dispersion parameter (often called \code{k} in ecological +#' applications) +#' @param prob probability of success (in the parameterisation with +#' \code{prob}, see also \code{\link[stats]{NegBinomial}}) +#' @param mu mean parameter +#' @return log-likelihood values +#' @author Sebastian Funk +#' @keywords internal nbinom_size_ll <- function(x, size, prob, mu) { if (!missing(prob)) { @@ -33,17 +33,17 @@ nbinom_size_ll <- function(x, size, prob, mu) (size * x + (x - 1)) * log(1 + mu / size) } -##' Likelihood of the size of chains with gamma-Borel offspring distribution -##' -##' @param x vector of sizes -##' @param size the dispersion parameter (often called \code{k} in ecological -##' applications) -##' @param prob probability of success (in the parameterisation with -##' \code{prob}, see also \code{\link[stats]{NegBinomial}}) -##' @param mu mean parameter -##' @return log-likelihood values -##' @author Sebastian Funk -##' @keywords internal +#' Likelihood of the size of chains with gamma-Borel offspring distribution +#' +#' @param x vector of sizes +#' @param size the dispersion parameter (often called \code{k} in ecological +#' applications) +#' @param prob probability of success (in the parameterisation with +#' \code{prob}, see also \code{\link[stats]{NegBinomial}}) +#' @param mu mean parameter +#' @return log-likelihood values +#' @author Sebastian Funk +#' @keywords internal gborel_size_ll <- function(x, size, prob, mu) { if (!missing(prob)) { if (!missing(mu)) stop("'prob' and 'mu' both specified") @@ -54,13 +54,13 @@ gborel_size_ll <- function(x, size, prob, mu) { (x - 1) * log(x) - (size + x - 1) * log(x + size / mu) } -##' Likelihood of the length of chains with Poisson offspring distribution -##' -##' @param x vector of sizes -##' @param lambda rate of the Poisson distribution -##' @return log-likelihood values -##' @author Sebastian Funk -##' @keywords internal +#' Likelihood of the length of chains with Poisson offspring distribution +#' +#' @param x vector of sizes +#' @param lambda rate of the Poisson distribution +#' @return log-likelihood values +#' @author Sebastian Funk +#' @keywords internal pois_length_ll <- function(x, lambda) { ## iterated exponential function @@ -73,14 +73,14 @@ pois_length_ll <- function(x, lambda) { log(Gk[x + 1] - Gk[x]) } -##' Likelihood of the length of chains with geometric offspring distribution -##' -##' @param x vector of sizes -##' @param prob probability of the geometric distribution with mean -##' \code{1/prob} -##' @return log-likelihood values -##' @author Sebastian Funk -##' @keywords internal +#' Likelihood of the length of chains with geometric offspring distribution +#' +#' @param x vector of sizes +#' @param prob probability of the geometric distribution with mean +#' \code{1/prob} +#' @return log-likelihood values +#' @author Sebastian Funk +#' @keywords internal geom_length_ll <- function(x, prob) { lambda <- 1 / prob @@ -91,20 +91,20 @@ geom_length_ll <- function(x, prob) { log(GkmGkm1) } -##' Likelihood of the length of chains with generic offspring distribution -##' -##' The likelihoods are calculated with a crude approximation using simulated -##' chains by linearly approximating any missing values in the empirical -##' cumulative distribution function (ecdf). -##' @param x vector of sizes -##' @param nsim_offspring number of simulations of the offspring distribution -##' for approximation the size/length distribution -##' @param ... any parameters to pass to \code{\link{chain_sim}} -##' @return log-likelihood values -##' @author Sebastian Funk -##' @inheritParams chain_ll -##' @inheritParams chain_sim -##' @keywords internal +#' Likelihood of the length of chains with generic offspring distribution +#' +#' The likelihoods are calculated with a crude approximation using simulated +#' chains by linearly approximating any missing values in the empirical +#' cumulative distribution function (ecdf). +#' @param x vector of sizes +#' @param nsim_offspring number of simulations of the offspring distribution +#' for approximation the size/length distribution +#' @param ... any parameters to pass to \code{\link{chain_sim}} +#' @return log-likelihood values +#' @author Sebastian Funk +#' @inheritParams chain_ll +#' @inheritParams chain_sim +#' @keywords internal offspring_ll <- function(x, offspring, stat, nsim_offspring=100, ...) { dist <- chain_sim(nsim_offspring, offspring, stat, ...) @@ -119,26 +119,26 @@ offspring_ll <- function(x, offspring, stat, nsim_offspring=100, ...) { log(lik) } -##' Likelihood for the outcome of a branching process -##' -##' @param x vector of sizes or lengths of transmission chains -##' @param stat statistic given as \code{x} ("size" or "length" of chains) -##' @param obs_prob observation probability (assumed constant) -##' @param infinite any chains of this size/length will be treated as infinite -##' @param exclude any sizes/lengths to exclude from the likelihood calculation -##' @param individual if TRUE, a vector of individual log-likelihood contributions will be returned rather than the sum -##' @param nsim_obs number of simulations if the likelihood is to be -##' approximated for imperfect observations -##' @param ... parameters for the offspring distribution -##' @return likelihood, or vector of likelihoods (if \code{obs_prob} < 1), or a list of individual likelihood contributions (if \code{individual=TRUE}) -##' @inheritParams chain_sim -##' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll -##' geom_length_ll offspring_ll -##' @author Sebastian Funk -##' @export -##' @examples -##' chain_sizes <- c(1,1,4,7) # example of observed chain sizes -##' chain_ll(chain_sizes, "pois", "size", lambda=0.5) +#' Likelihood for the outcome of a branching process +#' +#' @param x vector of sizes or lengths of transmission chains +#' @param stat statistic given as \code{x} ("size" or "length" of chains) +#' @param obs_prob observation probability (assumed constant) +#' @param infinite any chains of this size/length will be treated as infinite +#' @param exclude any sizes/lengths to exclude from the likelihood calculation +#' @param individual if TRUE, a vector of individual log-likelihood contributions will be returned rather than the sum +#' @param nsim_obs number of simulations if the likelihood is to be +#' approximated for imperfect observations +#' @param ... parameters for the offspring distribution +#' @return likelihood, or vector of likelihoods (if \code{obs_prob} < 1), or a list of individual likelihood contributions (if \code{individual=TRUE}) +#' @inheritParams chain_sim +#' @seealso pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll +#' geom_length_ll offspring_ll +#' @author Sebastian Funk +#' @export +#' @examples +#' chain_sizes <- c(1,1,4,7) # example of observed chain sizes +#' chain_ll(chain_sizes, "pois", "size", lambda=0.5) chain_ll <- function(x, offspring, stat=c("size", "length"), obs_prob=1, infinite = Inf, exclude=c(), individual=FALSE, nsim_obs, ...) { stat <- match.arg(stat) diff --git a/R/simulate.r b/R/simulate.r index 89f62cde..24f66ba8 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -1,31 +1,100 @@ -##' Simulate chains using a branching process -##' -##' @param n number of simulations to run. -##' @param offspring offspring distribution: a character string corresponding to -##' the R distribution function (e.g., "pois" for Poisson, where -##' \code{\link{rpois}} is the R function to generate Poisson random numbers) -##' @param stat statistic to calculate ("size" or "length" of chains) -##' @param infinite a size or length from which the size/length is to be -##' considered infinite -##' @param tree return the tree of infectors -##' @param serial the serial interval; a function that takes one parameter -##' (`n`), the number of serial intervals to randomly sample; if this parameter -##' is set, `chain_sim` returns times of infection, too; implies (`tree`=TRUE) -##' @param t0 start time (if serial interval is given); either a single value (0 -##' by default for all simulations, or a vector of length `n` with initial -##' times) -##' @param tf end time (if serial interval is given) -##' @param ... parameters of the offspring distribution -##' @return a vector of sizes/lengths (if \code{tree==FALSE} and no serial -##' interval given), or a data frame with columns `n` (simulation ID), `time` -##' (if the serial interval is given) and (if \code{tree==TRUE}) `id` (a -##' unique ID within each simulation for each individual element of the -##' chain), `ancestor` (the ID of the ancestor of each element) and -##' `generation`. -##' @author Sebastian Funk -##' @export -##' @examples -##' chain_sim(n=5, "pois", "size", lambda=0.5) +#' Simulate transmission chains using a branching process +#' @description \code{chain_sim()} is a stochastic simulator for generating +#' transmission chain data given information on the offspring distribution, +#' serial interval, time since the first case, etc. +#' @param n Number of simulations to run. +#' @param offspring Offspring distribution: a character string corresponding to +#' the R distribution function (e.g., "pois" for Poisson, where +#' \code{\link{rpois}} is the R function to generate Poisson random numbers) +#' @param stat String; Statistic to calculate. Can be one of: +#' \itemize{ +#' \item "size": the total number of offspring. +#' \item "length": the total number of ancestors. +#' } +#' @param infinite A size or length above which the simulation results should be +#' set to `Inf`. Defaults to `Inf`, resulting in no results ever set to `Inf` +#' @param tree Logical. Should the transmission tree be returned? Defaults to `FALSE`. +#' @param serial The serial interval generator function; the name of a user-defined +#' named or anonymous function with only one argument `n`, representing the number +#' of serial intervals to generate. +#' @param t0 Start time (if serial interval is given); either a single value or a +#' vector of length `n` (number of simulations) with initial times. Defaults to 0. +#' @param tf End time (if serial interval is given). +#' @param ... Parameters of the offspring distribution as required by R. +#' @return Either: +#' \itemize{ +#' \item{A vector of sizes/lengths (if \code{tree == FALSE} OR serial +#' interval function not specified, since that implies \code{tree == FALSE})}, or +#' \item {a data frame with +#' columns `n` (simulation ID), `time` (if the serial interval is given) and +#' (if \code{tree == TRUE}), `id` (a unique ID within each simulation for each +#' individual element of the chain), `ancestor` (the ID of the ancestor of each +#' element), and `generation`.} +#' } +#' @author Sebastian Funk, James M. Azam +#' @export +#' @details +#' `chain_sim()` either returns a vector or a data.frame. The output is either a +#' vector if `serial` is not provided, which automatically sets \code{tree = FALSE}, +#' or a `data.frame`, which means that `serial` was provided as a function. When `serial` +#' is provided, it means \code{tree = TRUE} automatically. However, setting +#' \code{tree = TRUE} would require providing a function for `serial`. +#' +#' # The serial interval (`serial`): +#' +#' ## Assumptions/disambiguation +#' +#' In epidemiology, the generation interval is the duration between successive +#' infectious events in a chain of transmission. Similarly, the serial interval is the +#' duration between observed symptom onset times between successive +#' cases in a transmission chain. The generation interval is often hard to observe +#' because exact times of infection are hard to measure hence, the serial interval +#' is often used instead. Here, we use the serial interval to represent what would +#' normally be called the generation interval, that is, the time between successive +#' cases. +#' +#' ## Specifying `serial` in `chain_sim()` +#' +#' `serial` must be specified as a named or +#' [anonymous/inline/unnamed function](https://en.wikipedia.org/wiki/Anonymous_function#R) +#' with one argument. +#' +#' If `serial` is specified, `chain_sim()` returns times of +#' infection as a column in the output. Moreover, specifying a function for `serial` implies +#' \code{tree = TRUE} and a tree of infectors (`ancestor`) and infectees (`id`) +#' will be generated in the output. +#' +#' For example, assuming we want to specify the serial interval +#' generator as a random log-normally distributed variable with `meanlog = 0.58` +#' and `sdlog = 1.58`, we could define a named function, let's call it +#' "serial_interval", with only one argument representing the number of serial +#' intervals to sample: \code{serial_interval <- function(n){rlnorm(n, 0.58, 1.38)}}, +#' and assign the name of the function to serial in `chain_sim()` like so +#' \code{chain_sim(..., serial = serial_interval)}, +#' where `...` are the other arguments to `chain_sim()`. Alternatively, we +#' could assign an anonymous function to serial in the `chain_sim()` call like so +#' \code{chain_sim(..., serial = function(n){rlnorm(n, 0.58, 1.38)})}, +#' where `...` are the other arguments to `chain_sim()`. +#' @examples +#' # Specifying no `serial` and `tree == FALSE` (default) returns a vector +#' set.seed(123) +#' chain_sim(n = 5, offspring = "pois", stat = "size", lambda = 0.5, tree = FALSE) +#' +#' # Specifying `serial` without specifying `tree` will set `tree = TRUE` internally. +#' +#' # We'll first define the serial function +#' set.seed(123) +#' serial_interval <- function(n){rlnorm(n, meanlog = 0.58, sdlog = 1.58)} +#' chain_sim(n = 5, offspring = 'pois', lambda = 0.5, stat = 'length', infinite = 100, +#' serial = serial_interval) +#' +#' # Specifying `serial` and `tree = FALSE` will throw an error +#' set.seed(123) +#' \dontrun{ +#' try(chain_sim(n = 10, serial = function(x) 3, offspring = "pois", lambda = 2, +#' infinite = 10, tree = FALSE) +#' ) +#' } chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, tree = FALSE, serial, t0 = 0, tf = Inf, ...) { @@ -33,7 +102,8 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, ## first, get random function as given by `offspring` if (!is.character(offspring)) { - stop("object passed as 'offspring' is not a character string.") + stop("object passed as 'offspring' is not a character string. Did you forget + to enclose it in quotes?") } roffspring_name <- paste0("r", offspring) @@ -43,7 +113,7 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, if (!missing(serial)) { if (!is.function(serial)) { - stop("The `serial` argument must be a function.") + stop("The `serial` argument must be a function (see details in ?chain_sim()).") } if (!missing(tree) && tree == FALSE) { stop("The `serial` argument can't be used with `tree==FALSE`.") @@ -81,7 +151,7 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, stop("Offspring distribution must return integers") } - ## record indices corresponding the number of offspring + ## record indices corresponding to the number of offspring indices <- rep(sim, n_offspring[sim]) ## initialise number of offspring diff --git a/R/simulate_susceptibles.R b/R/simulate_susceptibles.R index b292d5f0..1ece7158 100644 --- a/R/simulate_susceptibles.R +++ b/R/simulate_susceptibles.R @@ -1,35 +1,35 @@ -##' Simulate a single chain using a branching process while accounting -##' for depletion of susceptibles. -##' -##' @param offspring offspring distribution: a character string corresponding to -##' the R distribution function. Currently only "pois" & "nbinom" are -##' supported. Internally truncated distributions are used to avoid infecting -##' more people than susceptibles available. -##' @param mn_offspring the average number of secondary cases for each case -##' @param disp_offspring the dispersion coefficient (var/mean) of the number of -##' secondary cases. Ignored if offspring == "pois". Must be > 1. -##' @param serial the serial interval. A function that takes one parameter -##' (`n`), the number of serial intervals to randomly sample. -##' Value must be >= 0. -##' @param t0 start time -##' @param tf end time -##' @param pop the population -##' @param initial_immune the number of initial immunes in the population -##' @return a data frame with columns `time`, `id` (a unique ID for each -##' individual element of the chain), `ancestor` (the ID of the ancestor -##' of each element), and `generation`. -##' -##' @details This function has a couple of key differences with chain_sim: -##' it can only simulate one chain at a time, -##' it can only handle implemented offspring distributions -##' ("pois" and "nbinom"), -##' it always tracks and returns a data frame containing the entire tree, -##' the maximal length of chains is limited with pop instead of infinite. -##' -##' @author Flavio Finger -##' @export -##' @examples -##' chain_sim_susc("pois", mn_offspring=0.5, serial = function(x) 3, pop = 100) +#' Simulate a single chain using a branching process while accounting +#' for depletion of susceptibles. +#' +#' @param offspring offspring distribution: a character string corresponding to +#' the R distribution function. Currently only "pois" & "nbinom" are +#' supported. Internally truncated distributions are used to avoid infecting +#' more people than susceptibles available. +#' @param mn_offspring the average number of secondary cases for each case +#' @param disp_offspring the dispersion coefficient (var/mean) of the number of +#' secondary cases. Ignored if offspring == "pois". Must be > 1. +#' @param serial the serial interval. A function that takes one parameter +#' (`n`), the number of serial intervals to randomly sample. +#' Value must be >= 0. +#' @param t0 start time +#' @param tf end time +#' @param pop the population +#' @param initial_immune the number of initial immunes in the population +#' @return a data frame with columns `time`, `id` (a unique ID for each +#' individual element of the chain), `ancestor` (the ID of the ancestor +#' of each element), and `generation`. +#' +#' @details This function has a couple of key differences with chain_sim: +#' it can only simulate one chain at a time, +#' it can only handle implemented offspring distributions +#' ("pois" and "nbinom"), +#' it always tracks and returns a data frame containing the entire tree, +#' the maximal length of chains is limited with pop instead of infinite. +#' +#' @author Flavio Finger +#' @export +#' @examples +#' chain_sim_susc("pois", mn_offspring=0.5, serial = function(x) 3, pop = 100) chain_sim_susc <- function( offspring = c("pois", "nbinom"), mn_offspring, diff --git a/R/utils.r b/R/utils.r index 79c225e9..573e342c 100644 --- a/R/utils.r +++ b/R/utils.r @@ -1,54 +1,54 @@ -##' Calculates the complementary log-probability -##' -##' Given x and norm, this calculates log(1-sum(exp(x))) -##' @param x log-probabilities -##' @return value -##' @author Sebastian Funk -##' @keywords internal +#' Calculates the complementary log-probability +#' +#' Given x and norm, this calculates log(1-sum(exp(x))) +#' @param x log-probabilities +#' @return value +#' @author Sebastian Funk +#' @keywords internal complementary_logprob <- function(x) { tryCatch(log1p(-sum(exp(x))), error=function(e) -Inf) } -##' Samples size (the number of trials) of a binomial distribution -##' -##' Samples the size parameter from the binomial distribution with fixed x -##' (number of successes) and p (success probability) -##' @param n number of samples to generate -##' @param x number of successes -##' @param prob probability of success -##' @return sampled sizes -##' @author Sebastian Funk -##' @keywords internal +#' Samples size (the number of trials) of a binomial distribution +#' +#' Samples the size parameter from the binomial distribution with fixed x +#' (number of successes) and p (success probability) +#' @param n number of samples to generate +#' @param x number of successes +#' @param prob probability of success +#' @return sampled sizes +#' @author Sebastian Funk +#' @keywords internal rbinom_size <- function(n, x, prob) { x + stats::rnbinom(n, x + 1, prob) } -##' Samples chain lengths with given observation probabilities -##' -##' Samples the length of a transmission chain where each individual element is -##' observed with binomial probability with parameters n (number of successes) -##' and p (success probability) -##' @param n number of samples to generate -##' @param x observed chain lengths -##' @param prob probability of observation -##' @return sampled lengths -##' @author Sebastian Funk -##' @keywords internal +#' Samples chain lengths with given observation probabilities +#' +#' Samples the length of a transmission chain where each individual element is +#' observed with binomial probability with parameters n (number of successes) +#' and p (success probability) +#' @param n number of samples to generate +#' @param x observed chain lengths +#' @param prob probability of observation +#' @return sampled lengths +#' @author Sebastian Funk +#' @keywords internal rgen_length <- function(n, x, prob) { x + ceiling(log(stats::runif(n, 0, 1)) / log(1 - prob) - 1) + ceiling(log(stats::runif(n, 0, 1)) / log(1 - prob) - 1) } -##' Finds the name of a function passed as an argument -##' -##' This works even when a function is passed multiple times (e.g., when used -##' inside an \code{\link{optim}} call). -##' See https://stackoverflow.com/a/46740314/10886760 -##' @param fun function of which the name is to be determined -##' @return function name -##' @author Sebastian Funk -##' @keywords internal +#' Finds the name of a function passed as an argument +#' +#' This works even when a function is passed multiple times (e.g., when used +#' inside an \code{\link{optim}} call). +#' See https://stackoverflow.com/a/46740314/10886760 +#' @param fun function of which the name is to be determined +#' @return function name +#' @author Sebastian Funk +#' @keywords internal find_function_name <- function(fun) { objects <- ls(envir = environment(fun)) for (i in objects) { @@ -58,16 +58,16 @@ find_function_name <- function(fun) { } } -##' Negative binomial random numbers parametrized -##' in terms of mean and dispersion coefficient -##' @param n number of samples to draw -##' @param mn mean of distribution -##' @param disp dispersion coefficient (var/mean) -##' @return vector containing the random numbers -##' @author Flavio Finger -##' @export -##' @examples -##' rnbinom_mean_disp(n = 5, mn = 4, disp = 2) +#' Negative binomial random numbers parametrized +#' in terms of mean and dispersion coefficient +#' @param n number of samples to draw +#' @param mn mean of distribution +#' @param disp dispersion coefficient (var/mean) +#' @return vector containing the random numbers +#' @author Flavio Finger +#' @export +#' @examples +#' rnbinom_mean_disp(n = 5, mn = 4, disp = 2) rnbinom_mean_disp <- function(n, mn, disp) { size <- mn / (disp - 1) stats::rnbinom(n, size = size, mu = mn) diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 00000000..38d313ed --- /dev/null +++ b/README.Rmd @@ -0,0 +1,32 @@ +--- +output: github_document +bibliography: vignettes/references.bib +link-citations: true +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) +``` + +[![R-CMD-check](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml) +[![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/master/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +`bpmodels` is an R package to simulate and analyse the size and length of branching processes with a given offspring distribution. + +# Installation +The latest development version of the `bpmodels` package can be installed via + +```{r eval=FALSE} +devtools::install_github('sbfnk/bpmodels') +``` + +Please note that the 'bpmodels' project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. diff --git a/README.md b/README.md index 37b7901a..f64355ad 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,22 @@ -# bpmodels + + [![R-CMD-check](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml) -[![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/master/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) +[![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/master/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) -Methods for analysing the distribution of epidemiological chain sizes and lengths +`bpmodels` is an R package to simulate and analyse the size and length +of branching processes with a given offspring distribution. + +# Installation -The latest development version of the `bpmodels` package can be installed via +The latest development version of the `bpmodels` package can be +installed via -```{r eval=FALSE} -devtools::install_github('epiverse-trace/bpmodels') +``` r +devtools::install_github('sbfnk/bpmodels') ``` -Please note that the 'bpmodels' project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. +Please note that the ‘bpmodels’ project is released with a [Contributor +Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, +you agree to abide by its terms. diff --git a/man/chain_ll.Rd b/man/chain_ll.Rd index 82276f74..b110fdce 100644 --- a/man/chain_ll.Rd +++ b/man/chain_ll.Rd @@ -4,13 +4,22 @@ \alias{chain_ll} \title{Likelihood for the outcome of a branching process} \usage{ -chain_ll(x, offspring, stat = c("size", "length"), obs_prob = 1, - infinite = Inf, exclude = c(), individual = FALSE, nsim_obs, ...) +chain_ll( + x, + offspring, + stat = c("size", "length"), + obs_prob = 1, + infinite = Inf, + exclude = c(), + individual = FALSE, + nsim_obs, + ... +) } \arguments{ \item{x}{vector of sizes or lengths of transmission chains} -\item{offspring}{offspring distribution: a character string corresponding to +\item{offspring}{Offspring distribution: a character string corresponding to the R distribution function (e.g., "pois" for Poisson, where \code{\link{rpois}} is the R function to generate Poisson random numbers)} @@ -41,7 +50,7 @@ chain_ll(chain_sizes, "pois", "size", lambda=0.5) } \seealso{ pois_size_ll nbinom_size_ll gborel_size_ll pois_length_ll - geom_length_ll offspring_ll +geom_length_ll offspring_ll } \author{ Sebastian Funk diff --git a/man/chain_sim.Rd b/man/chain_sim.Rd index 5d2cf74d..fb86c0bf 100644 --- a/man/chain_sim.Rd +++ b/man/chain_sim.Rd @@ -2,51 +2,132 @@ % Please edit documentation in R/simulate.r \name{chain_sim} \alias{chain_sim} -\title{Simulate chains using a branching process} +\title{Simulate transmission chains using a branching process} \usage{ -chain_sim(n, offspring, stat = c("size", "length"), infinite = Inf, - tree = FALSE, serial, t0 = 0, tf = Inf, ...) +chain_sim( + n, + offspring, + stat = c("size", "length"), + infinite = Inf, + tree = FALSE, + serial, + t0 = 0, + tf = Inf, + ... +) } \arguments{ -\item{n}{number of simulations to run.} +\item{n}{Number of simulations to run.} -\item{offspring}{offspring distribution: a character string corresponding to +\item{offspring}{Offspring distribution: a character string corresponding to the R distribution function (e.g., "pois" for Poisson, where \code{\link{rpois}} is the R function to generate Poisson random numbers)} -\item{stat}{statistic to calculate ("size" or "length" of chains)} +\item{stat}{String; Statistic to calculate. Can be one of: +\itemize{ +\item "size": the total number of offspring. +\item "length": the total number of ancestors. +}} -\item{infinite}{a size or length from which the size/length is to be -considered infinite} +\item{infinite}{A size or length above which the simulation results should be +set to \code{Inf}. Defaults to \code{Inf}, resulting in no results ever set to \code{Inf}} -\item{tree}{return the tree of infectors} +\item{tree}{Logical. Should the transmission tree be returned? Defaults to \code{FALSE}.} -\item{serial}{the serial interval; a function that takes one parameter -(`n`), the number of serial intervals to randomly sample; if this parameter - is set, `chain_sim` returns times of infection, too; implies (`tree`=TRUE)} +\item{serial}{The serial interval generator function; the name of a user-defined +named or anonymous function with only one argument \code{n}, representing the number +of serial intervals to generate.} -\item{t0}{start time (if serial interval is given); either a single value (0 -by default for all simulations, or a vector of length `n` with initial -times)} +\item{t0}{Start time (if serial interval is given); either a single value or a +vector of length \code{n} (number of simulations) with initial times. Defaults to 0.} -\item{tf}{end time (if serial interval is given)} +\item{tf}{End time (if serial interval is given).} -\item{...}{parameters of the offspring distribution} +\item{...}{Parameters of the offspring distribution as required by R.} } \value{ -a vector of sizes/lengths (if \code{tree==FALSE} and no serial - interval given), or a data frame with columns `n` (simulation ID), `time` - (if the serial interval is given) and (if \code{tree==TRUE}) `id` (a - unique ID within each simulation for each individual element of the - chain), `ancestor` (the ID of the ancestor of each element) and - `generation`. +Either: +\itemize{ +\item{A vector of sizes/lengths (if \code{tree == FALSE} OR serial +interval function not specified, since that implies \code{tree == FALSE})}, or +\item {a data frame with +columns \code{n} (simulation ID), \code{time} (if the serial interval is given) and +(if \code{tree == TRUE}), \code{id} (a unique ID within each simulation for each +individual element of the chain), \code{ancestor} (the ID of the ancestor of each +element), and \code{generation}.} +} } \description{ -Simulate chains using a branching process +\code{chain_sim()} is a stochastic simulator for generating +transmission chain data given information on the offspring distribution, +serial interval, time since the first case, etc. +} +\details{ +\code{chain_sim()} either returns a vector or a data.frame. The output is either a +vector if \code{serial} is not provided, which automatically sets \code{tree = FALSE}, +or a \code{data.frame}, which means that \code{serial} was provided as a function. When \code{serial} +is provided, it means \code{tree = TRUE} automatically. However, setting +\code{tree = TRUE} would require providing a function for \code{serial}. +} +\section{The serial interval (\code{serial}):}{ +\subsection{Assumptions/disambiguation}{ + +In epidemiology, the generation interval is the duration between successive +infectious events in a chain of transmission. Similarly, the serial interval is the +duration between observed symptom onset times between successive +cases in a transmission chain. The generation interval is often hard to observe +because exact times of infection are hard to measure hence, the serial interval +is often used instead. Here, we use the serial interval to represent what would +normally be called the generation interval, that is, the time between successive +cases. +} + +\subsection{Specifying \code{serial} in \code{chain_sim()}}{ + +\code{serial} must be specified as a named or +\href{https://en.wikipedia.org/wiki/Anonymous_function#R}{anonymous/inline/unnamed function} +with one argument. + +If \code{serial} is specified, \code{chain_sim()} returns times of +infection as a column in the output. Moreover, specifying a function for \code{serial} implies +\code{tree = TRUE} and a tree of infectors (\code{ancestor}) and infectees (\code{id}) +will be generated in the output. + +For example, assuming we want to specify the serial interval +generator as a random log-normally distributed variable with \code{meanlog = 0.58} +and \code{sdlog = 1.58}, we could define a named function, let's call it +"serial_interval", with only one argument representing the number of serial +intervals to sample: \code{serial_interval <- function(n){rlnorm(n, 0.58, 1.38)}}, +and assign the name of the function to serial in \code{chain_sim()} like so +\code{chain_sim(..., serial = serial_interval)}, +where \code{...} are the other arguments to \code{chain_sim()}. Alternatively, we +could assign an anonymous function to serial in the \code{chain_sim()} call like so +\code{chain_sim(..., serial = function(n){rlnorm(n, 0.58, 1.38)})}, +where \code{...} are the other arguments to \code{chain_sim()}. } +} + \examples{ -chain_sim(n=5, "pois", "size", lambda=0.5) +# Specifying no `serial` and `tree == FALSE` (default) returns a vector +set.seed(123) +chain_sim(n = 5, offspring = "pois", stat = "size", lambda = 0.5, tree = FALSE) + +# Specifying `serial` without specifying `tree` will set `tree = TRUE` internally. + +# We'll first define the serial function +set.seed(123) +serial_interval <- function(n){rlnorm(n, meanlog = 0.58, sdlog = 1.58)} +chain_sim(n = 5, offspring = 'pois', lambda = 0.5, stat = 'length', infinite = 100, +serial = serial_interval) + +# Specifying `serial` and `tree = FALSE` will throw an error +set.seed(123) +\dontrun{ +try(chain_sim(n = 10, serial = function(x) 3, offspring = "pois", lambda = 2, +infinite = 10, tree = FALSE) +) +} } \author{ -Sebastian Funk +Sebastian Funk, James M. Azam } diff --git a/man/chain_sim_susc.Rd b/man/chain_sim_susc.Rd index 09b5858b..c06e52f1 100644 --- a/man/chain_sim_susc.Rd +++ b/man/chain_sim_susc.Rd @@ -5,8 +5,16 @@ \title{Simulate a single chain using a branching process while accounting for depletion of susceptibles.} \usage{ -chain_sim_susc(offspring = c("pois", "nbinom"), mn_offspring, - disp_offspring, serial, t0 = 0, tf = Inf, pop, initial_immune = 0) +chain_sim_susc( + offspring = c("pois", "nbinom"), + mn_offspring, + disp_offspring, + serial, + t0 = 0, + tf = Inf, + pop, + initial_immune = 0 +) } \arguments{ \item{offspring}{offspring distribution: a character string corresponding to @@ -20,7 +28,7 @@ more people than susceptibles available.} secondary cases. Ignored if offspring == "pois". Must be > 1.} \item{serial}{the serial interval. A function that takes one parameter -(`n`), the number of serial intervals to randomly sample. +(\code{n}), the number of serial intervals to randomly sample. Value must be >= 0.} \item{t0}{start time} @@ -32,9 +40,9 @@ Value must be >= 0.} \item{initial_immune}{the number of initial immunes in the population} } \value{ -a data frame with columns `time`, `id` (a unique ID for each - individual element of the chain), `ancestor` (the ID of the ancestor - of each element), and `generation`. +a data frame with columns \code{time}, \code{id} (a unique ID for each +individual element of the chain), \code{ancestor} (the ID of the ancestor +of each element), and \code{generation}. } \description{ Simulate a single chain using a branching process while accounting @@ -42,11 +50,11 @@ for depletion of susceptibles. } \details{ This function has a couple of key differences with chain_sim: - it can only simulate one chain at a time, - it can only handle implemented offspring distributions - ("pois" and "nbinom"), - it always tracks and returns a data frame containing the entire tree, - the maximal length of chains is limited with pop instead of infinite. +it can only simulate one chain at a time, +it can only handle implemented offspring distributions +("pois" and "nbinom"), +it always tracks and returns a data frame containing the entire tree, +the maximal length of chains is limited with pop instead of infinite. } \examples{ chain_sim_susc("pois", mn_offspring=0.5, serial = function(x) 3, pop = 100) diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index 260f36cd..427eb61a 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -9,7 +9,7 @@ offspring_ll(x, offspring, stat, nsim_offspring = 100, ...) \arguments{ \item{x}{vector of sizes} -\item{offspring}{offspring distribution: a character string corresponding to +\item{offspring}{Offspring distribution: a character string corresponding to the R distribution function (e.g., "pois" for Poisson, where \code{\link{rpois}} is the R function to generate Poisson random numbers)} @@ -25,8 +25,8 @@ log-likelihood values } \description{ The likelihoods are calculated with a crude approximation using simulated - chains by linearly approximating any missing values in the empirical - cumulative distribution function (ecdf). +chains by linearly approximating any missing values in the empirical +cumulative distribution function (ecdf). } \author{ Sebastian Funk diff --git a/vignettes/introduction.R b/vignettes/introduction.R new file mode 100644 index 00000000..af96b639 --- /dev/null +++ b/vignettes/introduction.R @@ -0,0 +1,31 @@ +## ----setup, include = FALSE--------------------------------------------------- +library('knitr') +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +## ----eval=FALSE--------------------------------------------------------------- +# library('bpmodels') + +## ----echo=FALSE--------------------------------------------------------------- +suppressWarnings(library('bpmodels')) +set.seed(13) + +## ----------------------------------------------------------------------------- +chain_sizes <- c(1, 1, 4, 7) # example of observed chain sizes +chain_ll(chain_sizes, "pois", "size", lambda = 0.5) + +## ----eval=FALSE--------------------------------------------------------------- +# ?chains_ll + +## ----------------------------------------------------------------------------- +chain_sim(n = 5, "pois", "size", lambda = 0.5) + +## ----------------------------------------------------------------------------- +chain_ll(chain_sizes, "binom", "size", size = 1, prob = 0.5, nsim_offspring = 100) + +## ----------------------------------------------------------------------------- +ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda = 0.5, nsim_obs = 10) +summary(ll) + diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 5d3e0c67..c66bb4f4 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -1,14 +1,23 @@ --- title: "Analysing chain statistics using branching process models" author: "Sebastian Funk" -date: "`r Sys.Date()`" -output: rmarkdown::html_vignette +output: + bookdown::html_vignette2: + fig_caption: yes + code_folding: show +pkgdown: + as_is: true +bibliography: references.bib +link-citations: true vignette: > %\VignetteIndexEntry{Analysing chain statistics using branching process models} - %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console --- + ```{r setup, include = FALSE} library('knitr') knitr::opts_chunk$set( @@ -17,9 +26,9 @@ knitr::opts_chunk$set( ) ``` -[bpmodels](https://github.com/sbfnk/bpmodels) is an `R` package to analyse and simulate the size and length of branching processes with a given offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks. +[bpmodels](https://github.com/sbfnk/bpmodels) is an `R` package to simulate and analyse the size and length of branching processes with a given offspring distribution. These can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks. -# Usage +# Quick start To load the package, use ```{r eval=FALSE} @@ -30,47 +39,63 @@ suppressWarnings(library('bpmodels')) set.seed(13) ``` -At the heart of the package are the `chains_ll` and `chains_sim` functions. The `chains_ll` function calculates the log-likelihood of a distribution of chain sizes or lengths given an offspring distribution and associated parameters. For example, to get the log-likelihood for a given observed distribution of chain sizes assuming a mean number of 0.5 Poisson-distributed offspring per generation, use +At the heart of the package are the `chains_ll()` and `chains_sim()` functions. + +## Calculating log-likelihoods + +The `chains_ll()` function calculates the log-likelihood of a distribution of chain sizes or lengths given an offspring distribution and its associated parameters. + +If we have observed a distribution of chains of sizes $1, 1, 4, 7$, we can calculate the log-likelihood of this observed chain by assuming the offspring per generation is Poisson distributed with a mean number of 0.5. + +To do this, we run ```{r} -chain_sizes <- c(1,1,4,7) # example of observed chain sizes -chain_ll(chain_sizes, "pois", "size", lambda=0.5) +chain_sizes <- c(1, 1, 4, 7) # example of observed chain sizes +chain_ll(chain_sizes, "pois", "size", lambda = 0.5) ``` -The first argument of `chain_ll` is the size (or length) distribution to analyse. The second argument (called `offspring`) specifies the offspring distribution. This is given as a the function used to generate random offspring. It can be any probability distribution implemented in R, that is, one that has a corresponding function for generating random numbers beginning with the letter `r`. In the case of the example above, since random Poisson numbers are generated in R using a function called `rpois`, the string to pass to the `offspring` argument is `"pois"`. +The first argument of `chain_ll()` is the size (or length) distribution to analyse. The second argument (called `offspring`) specifies the offspring distribution. This is given as a function used to generate random offspring. It can be any probability distribution implemented in `R`, that is, one that has a corresponding function for generating random numbers beginning with the letter `r`. In the case of the example above, since random Poisson numbers are generated in `R` using a function called `rpois()`, the string to pass to the `offspring` argument is `"pois"`. -The third argument (called `stat`) determines whether to analyse chain sizes (`"size"`, the default if this argument is not specified) or lengths (`"length"`). Lastly, any named arguments not recognised by `chain_ll` are interpreted as parameters of the corresponding probability distribution, here `lambda=0.5` as the mean of the Poisson distribution (see the R help page for the Poisson distribution for more information). +The third argument (called `stat`) determines whether to analyse chain sizes (`"size"`, the default if this argument is not specified) or lengths (`"length"`). Lastly, any named arguments not recognised by `chain_ll()` are interpreted as parameters of the corresponding probability distribution, here `lambda = 0.5` as the mean of the Poisson distribution (see the `R` help page for the [Poisson distribution](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/Poisson.html) for more information). -You can use the `R` help to find out about usage of the `chains_ll` function, +To find out about usage of the `chains_ll()` function, you can use the `R` help file ```{r eval=FALSE} ?chains_ll ``` -To simulate from a branching process, use the `chain_sim` function, which follows the same syntax as the `chain_ll` function: +## Simulating branching processes + +To simulate a branching process, we use the `chain_sim()` function. This function follows the same syntax as `chain_ll()`, that is: ```{r} -chain_sim(n=5, "pois", "size", lambda=0.5) +chain_sim(n = 5, "pois", "size", lambda = 0.5) ``` # Methodology -If the probability distribution of chain sizes or lengths has an analytical solution, this will be used (size distribution: Poisson and negative binomial; length distribution: Poisson and geometric). If not, simulations are used to approximate this probability distributions (using a linear approximation to the cumulative distribution for unobserved sizes/lengths), requiring an additional parameter `nsim_offspring` for the number of simulations to be used for this approximation. For example, to get offspring drawn from a binomial distribution with probability `prob=0.5`. +If the probability distribution of chain sizes or lengths has an analytical solution, this will be used (size distribution: Poisson and negative binomial; length distribution: Poisson and geometric). + +If an analytical solution does not exist, simulations are used to approximate this probability distributions (using a linear approximation to the cumulative distribution for unobserved sizes/lengths). The argument `nsim_offspring` is used to specify the number of simulations to be used for this approximation. + +For example, to get offspring drawn from a binomial distribution with probability `prob = 0.5`, we run ```{r} -chain_ll(chain_sizes, "binom", "size", size=1, prob=0.5, nsim_offspring=100) +chain_ll(chain_sizes, "binom", "size", size = 1, prob = 0.5, nsim_offspring = 100) ``` # Imperfect observations -The `chain_ll` function has an `obs_prob` parameter that can be used to determine the likelihood if observations are imperfect. In that case, true chain sizes or lengths are simulated repeatedly (the number of times given by the `nsim_obs` argument) and the likelihood calculated for each of these simulations. For example, if the probability of observing each case is 30%, use +If observations are imperfect, the `chain_ll()` function has an `obs_prob` argument that can be used to determine the likelihood. In that case, true chain sizes or lengths are simulated repeatedly (the number of times given by the `nsim_obs` argument), and the likelihood calculated for each of these simulations. + +For example, if the probability of observing each case is $30%$, we use ```{r} -ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda=0.5, nsim_obs=10) +ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda = 0.5, nsim_obs = 10) summary(ll) ``` -This returns `nsim_obs=10` likelihood values which can be averaged to come up with an overall likelihood estimate. +This returns `nsim_obs = 10` likelihood values which can be averaged to come up with an overall likelihood estimate. # References diff --git a/vignettes/introduction.md b/vignettes/introduction.md deleted file mode 100644 index 641d09d0..00000000 --- a/vignettes/introduction.md +++ /dev/null @@ -1,95 +0,0 @@ -[bpmodels](https://github.com/sbfnk/bpmodels) is an `R` package to -analyse and simulate the size and length of branching processes with a -given offspring distribution. These can be used, for example, to analyse -the distribution of chain sizes or length of infectious disease -outbreaks. - -Usage -===== - -To load the package, use - - library('bpmodels') - -At the heart of the package are the `chains_ll` and `chains_sim` -functions. The `chains_ll` function calculates the log-likelihood of a -distribution of chain sizes or lengths given an offspring distribution -and associated parameters. For example, to get the log-likelihood for a -given observed distribution of chain sizes assuming a mean number of 0.5 -Poisson-distributed offspring per generation, use - - chain_sizes <- c(1,1,4,7) # example of observed chain sizes - chain_ll(chain_sizes, "pois", "size", lambda=0.5) - #> [1] -8.607196 - -The first argument of `chain_ll` is the size (or length) distribution to -analyse. The second argument (called `offspring`) specifies the -offspring distribution. This is given as a the function used to generate -random offspring. It can be any probability distribution implemented in -R, that is, one that has a corresponding function for generating random -numbers beginning with the letter `r`. In the case of the example above, -since random Poisson numbers are generated in R using a function called -`rpois`, the string to pass to the `offspring` argument is `"pois"`. - -The third argument (called `stat`) determines whether to analyse chain -sizes (`"size"`, the default if this argument is not specified) or -lengths (`"length"`). Lastly, any named arguments not recognised by -`chain_ll` are interpreted as parameters of the corresponding -probability distribution, here `lambda=0.5` as the mean of the Poisson -distribution (see the R help page for the Poisson distribution for more -information). - -You can use the `R` help to find out about usage of the `chains_ll` -function, - - ?chains_ll - -To simulate from a branching process, use the `chain_sim` function, -which follows the same syntax as the `chain_ll` function: - - chain_sim(n=5, "pois", "size", lambda=0.5) - #> [1] 2 1 1 1 5 - -Methodology -=========== - -If the probability distribution of chain sizes or lengths has an -analytical solution, this will be used (size distribution: Poisson and -negative binomial; length distribution: Poisson and geometric). If not, -simulations are used to approximate this probability distributions -(using a linear approximation to the cumulative distribution for -unobserved sizes/lengths), requiring an additional parameter -`nsim_offspring` for the number of simulations to be used for this -approximation. For example, to get offspring drawn from a binomial -distribution with probability `prob=0.5`. - - chain_ll(chain_sizes, "binom", "size", size=1, prob=0.5, nsim_offspring=100) - #> [1] -8.477588 - -Imperfect observations -====================== - -The `chain_ll` function has an `obs_prob` parameter that can be used to -determine the likelihood if observations are imperfect. In that case, -true chain sizes or lengths are simulated repeatedly (the number of -times given by the `nsim_obs` argument) and the likelihood calculated -for each of these simulations. For example, if the probability of -observing each case is 30%, use - - ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda=0.5, nsim_obs=10) - summary(ll) - #> Min. 1st Qu. Median Mean 3rd Qu. Max. - #> -35.30 -25.68 -23.23 -24.19 -20.89 -18.91 - -This returns `nsim_obs=10` likelihood values which can be averaged to -come up with an overall likelihood estimate. - -References -========== - -- Farrington, C.P., Kanaan, M.N. and Gay, N.J. (2003). [Branching - process models for surveillance of infectious diseases controlled by - mass vaccination](https://doi.org/10.1093/biostatistics/4.2.279). -- Blumberg, S. and Lloyd-Smith, J.O. (2013). [Comparing methods for - estimating R0 from the size distribution of subcritical transmission - chains](https://doi.org/10.1016/j.epidem.2013.05.002). diff --git a/vignettes/projecting_incidence.R b/vignettes/projecting_incidence.R new file mode 100644 index 00000000..f46303b5 --- /dev/null +++ b/vignettes/projecting_incidence.R @@ -0,0 +1,120 @@ +## ----setup, include=FALSE----------------------------------------------------- +knitr::opts_chunk$set(echo = TRUE, + message = FALSE, + warning = FALSE, + collapse = TRUE, + comment = "#>" + ) + + +## ----loading_packages, include=TRUE------------------------------------------- +library("bpmodels") +library('dplyr') +library('ggplot2') +library('lubridate') + +## ----data_generation, message=FALSE------------------------------------------- +set.seed(12) +cases_df <- data.frame(date = as.Date('2023-01-01') + seq_len(12), + cases = rnbinom(12, size = 7.5, mu = 5) + ) +head(cases_df) + +ggplot(cases_df, + aes(x = date, y = cases) + ) + + geom_col(fill = 'tomato3', size = 1) + +## ----input_prep, message=FALSE------------------------------------------------ +# We will create a vector of starting times for each case, using the time of the index cases as the reference point +cases_df$days_since_index <- as.integer(cases_df$date - min(cases_df$date)) + +#'Disaggregate the time series +case_times <- unlist(mapply(function(x, y) rep(x, times = ifelse(y == 0, 1, y)), + cases_df$days_since_index, + cases_df$cases + ) + ) + + + +#' Date to end simulation (14 day projection in this case) +projection_window <- 14 #2 week ahead projection +project_to_date <- max(cases_df$days_since_index) + projection_window + + +#' Number of simulations and maximum chain size +sim_rep <- 1000 +cases_to_project <- 1000 + + +### Specifying the `serial` argument to `chain_sim()` +#' Assume serial interval follows log-normal distribution with mean, mu = 4.7, +#' and standard deviation, sigma = 2.9, then the desired standard deviation, si_sd, +#' and mean, si_mean, are +sigma = 2.9 +mu = 4.7 + +si_sd <- sqrt(log(1 + (sigma/mu)^2)) #log standard deviation +si_mean <- log((mu^2)/(sqrt(sigma^2 + mu^2))) #log mean + +#' serial interval function +serial_interval <- function(sample_size) { + si <- rlnorm(sample_size, meanlog = si_mean, sdlog = si_sd) + return(si) +} + +## ----simulations, message=FALSE----------------------------------------------- +## Chain log-likelihood simulation +sim_chain_sizes <- lapply(seq_len(sim_rep), + function(sim){chain_sim( + n = length(case_times), + offspring = "nbinom", + mu = 2.0, + size = 0.38, + stat = "size", + infinite = cases_to_project, + serial = serial_interval, + t0 = case_times, + tf = project_to_date, + tree = TRUE + ) |> + mutate(sim = sim)} + ) + +sim_output <- do.call(rbind, sim_chain_sizes) + +## ----post_processing---------------------------------------------------------- +ref_date <- min(cases_df$date) + +incidence_ts <- sim_output |> + mutate(day = floor(time)) |> + group_by(sim, day) |> + summarise(cases = n()) |> + ungroup() + + +## Median cases by date. +median_daily_cases <- incidence_ts |> + group_by(day)|> + summarise(median_cases = median(cases)) |> + ungroup()|> + arrange(day) |> + mutate(date = ymd(ref_date) + 0:(project_to_date - 1)) + + +## ----visualisation------------------------------------------------------------ +# Visualization +cases_plot <- ggplot(data = median_daily_cases) + + geom_col(aes(x = date, y = median_cases), + fill = "tomato3", + size = 1 + ) + + scale_y_continuous(breaks = seq(0, max(median_daily_cases$median_cases) + 20, 20), + labels = seq(0, max(median_daily_cases$median_cases) + 20, 20) + ) + + labs(x = 'Date', y = 'Daily cases (median)') + + theme_minimal(base_size = 14) + +print(cases_plot) + diff --git a/vignettes/references.bib b/vignettes/references.bib new file mode 100644 index 00000000..64afb555 --- /dev/null +++ b/vignettes/references.bib @@ -0,0 +1,23 @@ +@Article{Farrington2003, + author = {Farrington, CP and Kanaan, MN and Gay, NJ}, + journal = {Biostatistics}, + title = {Branching process models for surveillance of infectious diseases controlled by mass vaccination}, + year = {2003}, + number = {2}, + pages = {279--295}, + volume = {4}, + publisher = {Oxford University Press}, +} + +@Article{Blumberg2013, + author = {Blumberg, Seth and Lloyd-Smith, James O}, + journal = {Epidemics}, + title = {Comparing methods for estimating R0 from the size distribution of subcritical transmission chains}, + year = {2013}, + number = {3}, + pages = {131--145}, + volume = {5}, + publisher = {Elsevier}, +} + +@Comment{jabref-meta: databaseType:bibtex;} From 16b5de5ba7b7cc847f97be68d9a16b3fe508b709 Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 27 Jan 2023 11:15:20 +0000 Subject: [PATCH 060/828] Removed error when tree = FALSE and serial specified (#20) Previously, an error would be thrown if the user specified a serial interval but also set tree=FALSE. The allowed specification here is tree=TRUE. This update will remove the error and rather throw a warning message. --- R/simulate.r | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/simulate.r b/R/simulate.r index 24f66ba8..520a3baf 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -116,8 +116,9 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, stop("The `serial` argument must be a function (see details in ?chain_sim()).") } if (!missing(tree) && tree == FALSE) { - stop("The `serial` argument can't be used with `tree==FALSE`.") - } + warning("`serial` can't be used with `tree = FALSE`; Setting `tree = TRUE` internally.") + tree <- TRUE + } tree <- TRUE } else if (!missing(tf)) { stop("The `tf` argument needs a `serial` argument.") From aca877129aa216775d58034d588ab422187aa3cb Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 27 Jan 2023 11:36:45 +0000 Subject: [PATCH 061/828] Tests and updated error messaging (#19) * fixed partial argument in an old test * fixed a failing test * revised some error messages * added more unit tests to improve coverage --- R/simulate.r | 8 +++---- tests/testthat/tests-sim.r | 41 +++++++++++++++++++++++++++++++++--- vignettes/references.bib.sav | 23 ++++++++++++++++++++ 3 files changed, 64 insertions(+), 8 deletions(-) create mode 100644 vignettes/references.bib.sav diff --git a/R/simulate.r b/R/simulate.r index 520a3baf..f7df35a4 100644 --- a/R/simulate.r +++ b/R/simulate.r @@ -97,7 +97,6 @@ #' } chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, tree = FALSE, serial, t0 = 0, tf = Inf, ...) { - stat <- match.arg(stat) ## first, get random function as given by `offspring` @@ -116,12 +115,11 @@ chain_sim <- function(n, offspring, stat = c("size", "length"), infinite = Inf, stop("The `serial` argument must be a function (see details in ?chain_sim()).") } if (!missing(tree) && tree == FALSE) { - warning("`serial` can't be used with `tree = FALSE`; Setting `tree = TRUE` internally.") - tree <- TRUE - } + stop("If `serial` is specified, then `tree` cannot be set to `FALSE`.") + } tree <- TRUE } else if (!missing(tf)) { - stop("The `tf` argument needs a `serial` argument.") + stop("If `tf` is specified, `serial` must be specified too.") } stat_track <- rep(1, n) ## track length or size (depending on `stat`) diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index 348c4bf3..3c74acb4 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -8,12 +8,33 @@ test_that("Chains can be simulated", infinite=10))) expect_false(any(is.finite(chain_sim(n=2, "pois", "length", lambda=0.5, infinite=1)))) + expect_no_error(chain_sim(n = 2, offspring = 'pois', "size", lambda = 0.9, + tree = TRUE) + ) }) test_that("Errors are thrown", { expect_error(chain_sim(n=2, "dummy"), "does not exist") expect_error(chain_sim(n=2, "lnorm", meanlog=log(1.6)), "integer") + expect_error(chain_sim(n = 2, offspring = pois, "length", lambda = 0.9), + "not found" + ) + expect_error(chain_sim(n = 2, offspring = 'pois', "size", lambda = 0.9, + serial = c(1:2), "must be a function") + ) + expect_error(chain_sim(n = 2, offspring = c(1, 2), "length", lambda = 0.9), + "not a character string") + expect_error(chain_sim(n = 2, offspring = list(1, 2), "length", lambda = 0.9), + "not a character string") + expect_error(chain_sim(n = 2, offspring = 'pois', "size", lambda = 0.9, + serial = function(x) rpois(x, 0.9), tree = FALSE), + "If `serial` is specified, then `tree` cannot be set to `FALSE`." + ) + expect_error(chain_sim(n = 2, offspring = 'pois', "size", lambda = 0.9, + tf = 5, tree = FALSE), + "If `tf` is specified, `serial` must be specified too." + ) }) context("Simulating from a branching process model @@ -38,7 +59,7 @@ test_that("Chains can be simulated", chain_sim_susc( "nbinom", mn_offspring = 2, - disp = 1.5, + disp_offspring = 1.5, serial = function(x) 3, pop = 100 ) @@ -90,7 +111,7 @@ test_that("Errors are thrown", mn_offspring = 3, serial = function(x) 3, pop = 100), - "'arg' should be one of \"pois\", \"nbinom\"") + paste0("'arg' should be one of ", dQuote('pois'), ', ', dQuote('nbinom'))) expect_error( chain_sim_susc( "nbinom", @@ -110,4 +131,18 @@ test_that("Errors are thrown", ), "argument \"disp_offspring\" is missing, with no default") -}) \ No newline at end of file +}) + +test_that('warnings work as expected', { + expect_warning( + chain_sim_susc( + "pois", + mn_offspring = 3, + disp_offspring = 1, + serial = function(x) 3, + pop = 100 + ), + "argument disp_offspring not used for + poisson offspring distribution." + ) +}) diff --git a/vignettes/references.bib.sav b/vignettes/references.bib.sav new file mode 100644 index 00000000..f0c72507 --- /dev/null +++ b/vignettes/references.bib.sav @@ -0,0 +1,23 @@ +@Article{farrington2003branching, + author = {Farrington, CP and Kanaan, MN and Gay, NJ}, + journal = {Biostatistics}, + title = {Branching process models for surveillance of infectious diseases controlled by mass vaccination}, + year = {2003}, + number = {2}, + pages = {279--295}, + volume = {4}, + publisher = {Oxford University Press}, +} + +@Article{blumberg2013comparing, + author = {Blumberg, Seth and Lloyd-Smith, James O}, + journal = {Epidemics}, + title = {Comparing methods for estimating R0 from the size distribution of subcritical transmission chains}, + year = {2013}, + number = {3}, + pages = {131--145}, + volume = {5}, + publisher = {Elsevier}, +} + +@Comment{jabref-meta: databaseType:bibtex;} From 96583748cf1a4330c22889021f997ea88e3ea3f7 Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 27 Jan 2023 12:00:42 +0000 Subject: [PATCH 062/828] fixed covr badge pointing to master (#26) --- README.Rmd | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.Rmd b/README.Rmd index 38d313ed..5405893f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -14,7 +14,7 @@ knitr::opts_chunk$set( ``` [![R-CMD-check](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml) -[![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/master/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) +[![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/main/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) diff --git a/README.md b/README.md index f64355ad..b1cbaa53 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![R-CMD-check](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml) -[![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/master/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) +[![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/main/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) `bpmodels` is an R package to simulate and analyse the size and length From 1d5a90861f151887770a87746e625601f260f0fa Mon Sep 17 00:00:00 2001 From: James Azam Date: Thu, 5 Jan 2023 17:24:36 +0000 Subject: [PATCH 063/828] gitignore --- .gitignore | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.gitignore b/.gitignore index 8b89fb81..2d674619 100644 --- a/.gitignore +++ b/.gitignore @@ -29,9 +29,3 @@ vignettes/*.pdf rsconnect/ /doc/ /Meta/ - -.Rbuildignore - -*.Rproj - -*.bib.sav From 9b28b1491a4b366aa32dcbead4e4e87a277a8485 Mon Sep 17 00:00:00 2001 From: jamesmbaazam Date: Fri, 13 Jan 2023 12:45:15 +0000 Subject: [PATCH 064/828] removed references.bib.sav --- vignettes/references.bib.sav | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 vignettes/references.bib.sav diff --git a/vignettes/references.bib.sav b/vignettes/references.bib.sav deleted file mode 100644 index f0c72507..00000000 --- a/vignettes/references.bib.sav +++ /dev/null @@ -1,23 +0,0 @@ -@Article{farrington2003branching, - author = {Farrington, CP and Kanaan, MN and Gay, NJ}, - journal = {Biostatistics}, - title = {Branching process models for surveillance of infectious diseases controlled by mass vaccination}, - year = {2003}, - number = {2}, - pages = {279--295}, - volume = {4}, - publisher = {Oxford University Press}, -} - -@Article{blumberg2013comparing, - author = {Blumberg, Seth and Lloyd-Smith, James O}, - journal = {Epidemics}, - title = {Comparing methods for estimating R0 from the size distribution of subcritical transmission chains}, - year = {2013}, - number = {3}, - pages = {131--145}, - volume = {5}, - publisher = {Elsevier}, -} - -@Comment{jabref-meta: databaseType:bibtex;} From bf1ddecd709f5e4a76ced68359eb3065f0dfbaf1 Mon Sep 17 00:00:00 2001 From: James Azam Date: Sat, 14 Jan 2023 21:13:59 +0000 Subject: [PATCH 065/828] skeletal version of vignette about projecting incidence --- vignettes/projecting_incidence.Rmd | 139 +++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 vignettes/projecting_incidence.Rmd diff --git a/vignettes/projecting_incidence.Rmd b/vignettes/projecting_incidence.Rmd new file mode 100644 index 00000000..08c1ea4c --- /dev/null +++ b/vignettes/projecting_incidence.Rmd @@ -0,0 +1,139 @@ +--- +title: "Projecting future disease incidence given early outbreak data" +author: "James Azam, Sebastian Funk" +date: '2023-01-13' +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) + +library("bpmodels") +library("readr") +library("lubridate") +library('dplyr') +library('ggplot2') +``` + +## Description +Branching processes can be used to project future disease incidence given early +outbreak data. `bpmodels` can simulate branching processes using its `chain_sim()` function. + +## Disease data + +Let's create some early outbreak data, assuming the cases have a negative binomial \ +distribution + +```{r data_generation, message=FALSE} +set.seed(12) +cases_df <- data.frame(date = as.Date('2023-01-01') + seq_len(12), + cases = rnbinom(12, size = 7.5, mu = 5) + ) +head(cases_df) +ggplot(cases_df, aes(x = date, y = cases)) + geom_col() +``` + +## Preparing the inputs + +```{r input_prep, message=FALSE} +# We will create a vector of starting times for each case, using the time of the index cases as the reference point +cases_df$days_since_index <- as.integer(cases_df$date - min(cases_df$date)) + +#'Disaggregate the time series +case_times <- unlist(mapply(function(x, y) rep(x, times = ifelse(y == 0, 1, y)), + cases_df$days_since_index, + cases_df$cases + ) + ) + + + +#' Date to end simulation (14 day projection in this case) +projection_window <- 14 #2 week ahead projection +project_to_date <- max(cases_df$days_since_index) + projection_window + + +#' Number of simulations and maximum chain size +sim_rep <- 1000 +cases_to_project <- 1000 + + +### Specifying the `serial` argument to `chain_sim()` +#' Assume serial interval follows log-normal distribution with mean, mu = 4.7, +#' and standard deviation, sigma = 2.9, then the desired standard deviation, si_sd, +#' and mean, si_mean, are +sigma = 2.9 +mu = 4.7 + +si_sd <- sqrt(log(1 + (sigma/mu)^2)) +si_mean <- log((mu^2)/(sqrt(sigma^2 + mu^2))) #the desired mean + +#' serial interval function +serial_interval <- function(sample_size = 1) { + si <- rlnorm(sample_size, meanlog = si_mean, sdlog = si_sd) + return(si) +} +``` + +## Simulations +```{r simulations, message=FALSE} +## Chain log-likelihood simulation +sim_chain_sizes <- lapply(seq_len(sim_rep), + function(sim){chain_sim( + n = length(case_times), + offspring = "nbinom", + mu = 2.0, + size = 0.38, + stat = "size", + infinite = cases_to_project, + serial = serial_interval, + t0 = case_times, + tf = project_to_date, + tree = TRUE + ) |> + mutate(sim = sim)} + ) + +sim_output <- do.call(rbind, sim_chain_sizes) +``` + + +### Post-processing +```{r post_processing} +ref_date <- min(cases_df$date) + +incidence_ts <- sim_output |> + mutate(day = floor(time)) |> + group_by(sim, day) |> + summarise(cases = n()) |> + ungroup() + + +## Median cases by date. +median_daily_cases <- incidence_ts |> + group_by(day)|> + summarise(median_cases = median(cases)) |> + ungroup()|> + arrange(day) |> + mutate(date = ymd(ref_date) + 0:(project_to_date - 1)) + +``` + + +## Visualization +```{r visualisation} +# Visualization +cases_plot <- ggplot(data = median_daily_cases) + + geom_col(aes(x = date, y = median_cases), + fill = "tomato3", + size = 1 + ) + + scale_y_continuous(breaks = seq(0, max(median_daily_cases$median_cases) + 20, 20), + labels = seq(0, max(median_daily_cases$median_cases) + 20, 20) + ) + + labs(x = 'Date', y = 'Daily cases (median)') + + theme_minimal(base_size = 14) + +print(cases_plot) +``` + From 744fe768aa204862aeb2285ea619f38069fe345d Mon Sep 17 00:00:00 2001 From: James Azam Date: Tue, 17 Jan 2023 14:03:39 +0000 Subject: [PATCH 066/828] added a bib file for the references --- vignettes/references.bib | 240 +++++++++++++++++++++++++++++++++++---- 1 file changed, 217 insertions(+), 23 deletions(-) diff --git a/vignettes/references.bib b/vignettes/references.bib index 64afb555..c19f323b 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -1,23 +1,217 @@ -@Article{Farrington2003, - author = {Farrington, CP and Kanaan, MN and Gay, NJ}, - journal = {Biostatistics}, - title = {Branching process models for surveillance of infectious diseases controlled by mass vaccination}, - year = {2003}, - number = {2}, - pages = {279--295}, - volume = {4}, - publisher = {Oxford University Press}, -} - -@Article{Blumberg2013, - author = {Blumberg, Seth and Lloyd-Smith, James O}, - journal = {Epidemics}, - title = {Comparing methods for estimating R0 from the size distribution of subcritical transmission chains}, - year = {2013}, - number = {3}, - pages = {131--145}, - volume = {5}, - publisher = {Elsevier}, -} - -@Comment{jabref-meta: databaseType:bibtex;} +@article{Farrington2003, +abstract = {Mass vaccination programmes aim to maintain the effective reproduction number R of an infection below unity. We describe methods for monitoring the value of R using surveillance data. The models are based on branching processes in which R is identified with the offspring mean. We derive unconditional likelihoods for the offspring mean using data on outbreak size and outbreak duration. We also discuss Bayesian methods, implemented by Metropolis-Hastings sampling. We investigate by simulation the validity of the models with respect to depletion of susceptibles and under-ascertainment of cases. The methods are illustrated using surveillance data on measles in the USA.}, +author = {Farrington, C. P. and Kanaan, M. N. and Gay, N. J.}, +doi = {10.1093/biostatistics/4.2.279}, +issn = {14654644}, +journal = {Biostatistics (Oxford, England)}, +number = {2}, +pages = {279--295}, +title = {{Branching process models for surveillance of infectious diseases controlled by mass vaccination.}}, +volume = {4}, +year = {2003} +} +@article{Jacob2010, +abstract = {Branching processes are stochastic individual-based processes leading consequently to a bottom-up approach. In addition, since the state variables are random integer variables (representing population sizes), the extinction occurs at random finite time on the extinction set, thus leading to fine and realistic predictions. Starting from the simplest and well-known single-type Bienaym{\'{e}}-Galton-Watson branching process that was used by several authors for approximating the beginning of an epidemic, we then present a general branching model with age and population dependent individual transitions. However contrary to the classical Bienaym{\'{e}}-Galton-Watson or asymptotically Bienaym{\'{e}}-Galton-Watson setting, where the asymptotic behavior of the process, as time tends to infinity, is well understood, the asymptotic behavior of this general process is a new question. Here we give some solutions for dealing with this problem depending on whether the initial population size is large or small, and whether the disease is rare or non-rare when the initial population size is large.}, +author = {Jacob, Christine}, +doi = {10.3390/ijerph7031204}, +issn = {16604601}, +journal = {International Journal of Environmental Research and Public Health}, +keywords = {Age-dependence,Branching process,Epidemic size,Extinction time,Population-dependence}, +number = {3}, +pages = {1186--1204}, +title = {{Branching processes: Their role in epidemiology}}, +volume = {7}, +year = {2010} +} +@article{Blumberg2013, +abstract = {Many diseases exhibit subcritical transmission (i.e. 01. In stochastic epidemic theory, there are also thresholds that predict a major outbreak. In the case of a single infectious group, if ℛ(0)>1 and i infectious individuals are introduced into a susceptible population, then the probability of a major outbreak is approximately 1-(1/ℛ(0))( i ). With multiple infectious groups from which the disease could emerge, this result no longer holds. Stochastic thresholds for multiple groups depend on the number of individuals within each group, i ( j ), j=1, {\ldots}, n, and on the probability of disease extinction for each group, q ( j ). It follows from multitype branching processes that the probability of a major outbreak is approximately [Formula: see text]. In this investigation, we summarize some of the deterministic and stochastic threshold theory, illustrate how to calculate the stochastic thresholds, and derive some new relationships between the deterministic and stochastic thresholds.}, +author = {Allen, Linda J.S. and Lahodny, Glenn E.}, +doi = {10.1080/17513758.2012.665502}, +issn = {17513758}, +journal = {Journal of Biological Dynamics}, +keywords = {multitype branching processes,reproduction numbers}, +number = {2}, +pages = {590--611}, +title = {{Extinction thresholds in deterministic and stochastic epidemic models}}, +volume = {6}, +year = {2012} +} +@article{Blumberg2013a, +abstract = {For many infectious disease processes such as emerging zoonoses and vaccine-preventable diseases, 0 Date: Tue, 17 Jan 2023 15:59:43 +0000 Subject: [PATCH 067/828] updated vignette --- vignettes/projecting_incidence.Rmd | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/vignettes/projecting_incidence.Rmd b/vignettes/projecting_incidence.Rmd index 08c1ea4c..9af16902 100644 --- a/vignettes/projecting_incidence.Rmd +++ b/vignettes/projecting_incidence.Rmd @@ -3,6 +3,7 @@ title: "Projecting future disease incidence given early outbreak data" author: "James Azam, Sebastian Funk" date: '2023-01-13' output: html_document +bibliography: vignettes/references.bib --- ```{r setup, include=FALSE} @@ -16,13 +17,16 @@ library('ggplot2') ``` ## Description -Branching processes can be used to project future disease incidence given early -outbreak data. `bpmodels` can simulate branching processes using its `chain_sim()` function. +Branching processes can be used to project disease incidence provided we have some +information on the distribution of times between successive cases (serial interval), +and the distribution of secondary cases produced by a single individual (offspring +distribution). Such simulations can be achieved in `bpmodels` with the `chain_sim()` function. ## Disease data -Let's create some early outbreak data, assuming the cases have a negative binomial \ -distribution +Let's create an outbreak dataset, assuming the cases are sampled from a negative binomial +distribution with mean = 5 and dispersion = 7.5. These parameter values are arbitrarily +chosen for illustrative purposes. ```{r data_generation, message=FALSE} set.seed(12) @@ -65,11 +69,11 @@ cases_to_project <- 1000 sigma = 2.9 mu = 4.7 -si_sd <- sqrt(log(1 + (sigma/mu)^2)) -si_mean <- log((mu^2)/(sqrt(sigma^2 + mu^2))) #the desired mean +si_sd <- sqrt(log(1 + (sigma/mu)^2)) #log standard deviation +si_mean <- log((mu^2)/(sqrt(sigma^2 + mu^2))) #log mean #' serial interval function -serial_interval <- function(sample_size = 1) { +serial_interval <- function(sample_size) { si <- rlnorm(sample_size, meanlog = si_mean, sdlog = si_sd) return(si) } From ebcfb54c6573517c6730e71608fdf453fe91bc86 Mon Sep 17 00:00:00 2001 From: James Azam Date: Wed, 18 Jan 2023 10:29:08 +0000 Subject: [PATCH 068/828] setting up the new vignette --- vignettes/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 vignettes/.gitignore diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 00000000..097b2416 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R From 6381a9584d4d6eb0afdce8b2ba9eb391f9adfe1d Mon Sep 17 00:00:00 2001 From: James Azam Date: Wed, 18 Jan 2023 15:38:13 +0000 Subject: [PATCH 069/828] updated DESC packages to include vignette requirements --- DESCRIPTION | 5 +++- vignettes/projecting_incidence.Rmd | 44 +++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 86bd2ff4..7284e3f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,10 @@ Suggests: rmarkdown, bookdown, testthat, - truncdist + truncdist, + dplyr, + ggplot2, + lubridate VignetteBuilder: knitr Encoding: UTF-8 diff --git a/vignettes/projecting_incidence.Rmd b/vignettes/projecting_incidence.Rmd index 9af16902..9256f35f 100644 --- a/vignettes/projecting_incidence.Rmd +++ b/vignettes/projecting_incidence.Rmd @@ -1,19 +1,30 @@ --- title: "Projecting future disease incidence given early outbreak data" author: "James Azam, Sebastian Funk" -date: '2023-01-13' -output: html_document -bibliography: vignettes/references.bib +output: + bookdown::html_vignette2: + fig_caption: yes + code_folding: show +pkgdown: + as_is: true +bibliography: references.bib +link-citations: true +vignette: > + %\VignetteIndexEntry{Projecting future disease incidence given early outbreak data} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console --- ```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) +knitr::opts_chunk$set(echo = TRUE, + message = FALSE, + warning = FALSE, + collapse = TRUE, + comment = "#>" + ) -library("bpmodels") -library("readr") -library("lubridate") -library('dplyr') -library('ggplot2') ``` ## Description @@ -22,6 +33,15 @@ information on the distribution of times between successive cases (serial interv and the distribution of secondary cases produced by a single individual (offspring distribution). Such simulations can be achieved in `bpmodels` with the `chain_sim()` function. +Let's load the required packages + +```{r loading_packages, include=TRUE} +library("bpmodels") +library('dplyr') +library('ggplot2') +library('lubridate') +``` + ## Disease data Let's create an outbreak dataset, assuming the cases are sampled from a negative binomial @@ -34,7 +54,11 @@ cases_df <- data.frame(date = as.Date('2023-01-01') + seq_len(12), cases = rnbinom(12, size = 7.5, mu = 5) ) head(cases_df) -ggplot(cases_df, aes(x = date, y = cases)) + geom_col() + +ggplot(cases_df, + aes(x = date, y = cases) + ) + + geom_col(fill = 'tomato3', size = 1) ``` ## Preparing the inputs From 6db065e5237f7d8b2119df9ad53faf2525324846 Mon Sep 17 00:00:00 2001 From: James Azam Date: Thu, 26 Jan 2023 16:35:43 +0000 Subject: [PATCH 070/828] added a bibliography of branching process applciations to outbreaks --- vignettes/articles/bp_literature.Rmd | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 vignettes/articles/bp_literature.Rmd diff --git a/vignettes/articles/bp_literature.Rmd b/vignettes/articles/bp_literature.Rmd new file mode 100644 index 00000000..780062f3 --- /dev/null +++ b/vignettes/articles/bp_literature.Rmd @@ -0,0 +1,24 @@ +--- +title: "Applications of branching process models to outbreak modelling" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +## Single-type models + +- Blumberg S, Lloyd-Smith J. Comparing methods for estimating R0 from the size distribution of sub- critical transmission chains. Epidemics. 2013; 5(3):131–45. doi: https://doi.org/10.1016/j.epidem.2013.05.002 PMID: 24021520 + +- Blumberg S, Lloyd-Smith JO. Inference of R0 and transmission heterogeneity from the size distribution of stuttering chains. PLoS Comput Biol. 2013; 9(5):e1002993. doi: https://doi.org/10.1371/journal.pcbi.1002993 PMID: 23658504 + +- Farrington C, Kanaan M, Gay N. Branching process models for surveillance of infectious diseases con- trolled by mass vaccination. Biostatistics. 2003; 4(2):279. doi: https://doi.org/10.1093/biostatistics/4.2.279 PMID: 12925522 + +- Nishiura H, Yan P, Sleeman CK, Mode CJ. Estimating the transmission potential of supercritical pro- cesses based on the final size distribution of minor outbreaks. J Theor Biol. 2012; 294:48–55. doi: https://doi.org/10.1016/j.jtbi.2011.10.039 PMID: 22079419 + +## Multi-type models + +- Kucharski, A. J., & Edmunds, W. J. (2015). Characterizing the Transmission Potential of Zoonotic Infections from Minor Outbreaks. PLoS Computational Biology, 11(4), 1–17. https://doi.org/10.1371/journal.pcbi.1004154 From 195c29f34431c7c7770f3f01e17ddc452532f292 Mon Sep 17 00:00:00 2001 From: James Azam Date: Thu, 26 Jan 2023 16:40:38 +0000 Subject: [PATCH 071/828] added a vignette on branching process theory --- vignettes/articles/bp_theory.Rmd | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 vignettes/articles/bp_theory.Rmd diff --git a/vignettes/articles/bp_theory.Rmd b/vignettes/articles/bp_theory.Rmd new file mode 100644 index 00000000..fc1903a8 --- /dev/null +++ b/vignettes/articles/bp_theory.Rmd @@ -0,0 +1,19 @@ +--- +title: "Model and chain likelihood definitions" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Branching process model definition + +This is a work in progress to document how the single and multi-type models used in this +package are defined. + +# Likelihoods +This is a work in progress to document the derivation of analytical solutions +to the likelihoods used here. \ No newline at end of file From 9dd7d0e9826a22c9ee45ddf36481b4fc1781b0ed Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 27 Jan 2023 22:13:36 +0000 Subject: [PATCH 072/828] removed CI files and code of conduct --- CODE_OF_CONDUCT.md | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 CODE_OF_CONDUCT.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 24aa0a3c..00000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,25 +0,0 @@ -# Contributor Code of Conduct - -As contributors and maintainers of this project, we pledge to respect all people who -contribute through reporting issues, posting feature requests, updating documentation, -submitting pull requests or patches, and other activities. - -We are committed to making participation in this project a harassment-free experience for -everyone, regardless of level of experience, gender, gender identity and expression, -sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. - -Examples of unacceptable behavior by participants include the use of sexual language or -imagery, derogatory comments or personal attacks, trolling, public or private harassment, -insults, or other unprofessional conduct. - -Project maintainers have the right and responsibility to remove, edit, or reject comments, -commits, code, wiki edits, issues, and other contributions that are not aligned to this -Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed -from the project team. - -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by -opening an issue or contacting one or more of the project maintainers. - -This Code of Conduct is adapted from the Contributor Covenant -(http://contributor-covenant.org), version 1.0.0, available at -http://contributor-covenant.org/version/1/0/0/ From 86b229ba21a93792f26a41fe2eb79dde30de0d7f Mon Sep 17 00:00:00 2001 From: James Azam Date: Mon, 30 Jan 2023 10:41:25 +0000 Subject: [PATCH 073/828] removed draft vignette on branching process theory --- vignettes/articles/bp_theory.Rmd | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 vignettes/articles/bp_theory.Rmd diff --git a/vignettes/articles/bp_theory.Rmd b/vignettes/articles/bp_theory.Rmd deleted file mode 100644 index fc1903a8..00000000 --- a/vignettes/articles/bp_theory.Rmd +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: "Model and chain likelihood definitions" ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -# Branching process model definition - -This is a work in progress to document how the single and multi-type models used in this -package are defined. - -# Likelihoods -This is a work in progress to document the derivation of analytical solutions -to the likelihoods used here. \ No newline at end of file From 3c33e3563af291a1a71df1a84f59588f3b09bd7e Mon Sep 17 00:00:00 2001 From: James Azam Date: Mon, 30 Jan 2023 10:48:25 +0000 Subject: [PATCH 074/828] moved introduction vignette to README quick start --- README.Rmd | 154 ++++++++++++++++++++++++++++++++++++++++- README.md | 196 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 343 insertions(+), 7 deletions(-) diff --git a/README.Rmd b/README.Rmd index 5405893f..2c055924 100644 --- a/README.Rmd +++ b/README.Rmd @@ -12,21 +12,169 @@ knitr::opts_chunk$set( out.width = "100%" ) ``` + +# _bpmodels_: Methods for analysing the size and length of chains from branching process models + [![R-CMD-check](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml) [![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/main/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) + ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` -`bpmodels` is an R package to simulate and analyse the size and length of branching processes with a given offspring distribution. +`bpmodels` is an R package to simulate and analyse the size and length of +branching processes with a given offspring distribution. # Installation The latest development version of the `bpmodels` package can be installed via ```{r eval=FALSE} -devtools::install_github('sbfnk/bpmodels') +devtools::install_github('epiverse-trace/bpmodels') +``` + +# Quick start + +To load the package, use + +```{r echo=FALSE} +suppressWarnings(library('bpmodels')) +``` + +At the heart of the package are the `chains_ll()` and `chains_sim()` functions. + +## Calculating log-likelihoods + +The `chains_ll()` function calculates the log-likelihood of a distribution of +chain sizes or lengths given an offspring distribution and its associated +parameters. + +If we have observed a distribution of chains of sizes $1, 1, 4, 7$, we can +calculate the log-likelihood of this observed chain by assuming the offspring +per generation is Poisson distributed with a mean number of $0.5$. + +To do this, we run + +```{r} +set.seed(13) +chain_sizes <- c(1, 1, 4, 7) # example of observed chain sizes +chain_ll(x = chain_sizes, offspring = "pois", stat = "size", lambda = 0.5) ``` -Please note that the 'bpmodels' project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. +The first argument of `chain_ll()` is the size (or length) distribution to +analyse. The second argument, `offspring`, specifies the offspring +distribution. This is given as a function used to generate random offspring. +It can be any probability distribution implemented in `R`, that is, one that +has a corresponding function for generating random numbers beginning with the +letter `r`. In the case of the example above, since random Poisson numbers are +generated in `R` using a function called `rpois()`, the string to pass to the +`offspring` argument is `"pois"`. + +The third argument, `stat`, determines whether to analyse chain sizes +(`"size"`, the default if this argument is not specified) or lengths +(`"length"`). Lastly, any named arguments not recognised by `chain_ll()` +are interpreted as parameters of the corresponding probability distribution, +here `lambda = 0.5` as the mean of the Poisson distribution (see the `R` help +page for the [Poisson distribution](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/Poisson.html) for more information). + +# Imperfect observations + +By default, `chain_ll` assumes perfect observation, where `obs_prob = 1` +(See `?chain_ll`). If observations are imperfect, the `chain_ll()` function has +an `obs_prob` argument that can be used to determine the likelihood. In that +case, true chain sizes or lengths are simulated repeatedly (the number of times +given by the `nsim_obs` argument), and the likelihood calculated for each of +these simulations. + +For example, if the probability of observing each case is $30%$, we use + +```{r} +chain_sizes <- c(1, 1, 4, 7) # example of observed chain sizes +ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda = 0.5, + nsim_obs = 10) +summary(ll) +``` + +This returns `10` likelihood values (because `nsim_obs = 10`), which can be +averaged to come up with an overall likelihood estimate. + +To find out about usage of the `chains_ll()` function, you can use the `R` help +file + +```{r eval=FALSE} +?chains_ll +``` + +## Simulating branching processes + +To simulate a branching process, we use the `chain_sim()` function. This function +follows the same syntax as `chain_ll()`. + +Below, we are simulating $5$ chains, assuming the offspring are generated using +a Poisson distribution with mean, `lambda = 5`. By default, `chain_sim()` returns +a vector of chain sizes/lengths. However, to override that so that a tree of +infectees and infectors is returned, we need to specify a function for the serial +interval and set `tree = TRUE` + +```{r} +chain_sim(n = 5, offspring = "pois", stat = "size", lambda = 0.5) +``` + +### Simulating trees +To simulate a tree of branching processes, we do specify the serial interval +generation function and set `tree = TRUE` as follows: + +```{r} +set.seed(13) + +serial_interval <- function(n){rlnorm(n, meanlog = 0.58, sdlog = 1.58)} + +chains_df <- chain_sim(n = 5, offspring = 'pois', lambda = 0.5, stat = 'length', + infinite = 100, serial = serial_interval) + +chains_df +``` + + +# Methodology + +If the probability distribution of chain sizes or lengths has an analytical +solution, this will be used (size distribution: Poisson and negative binomial; +length distribution: Poisson and geometric). + +If an analytical solution does not exist, simulations are used to approximate +this probability distributions (using a linear approximation to the cumulative +distribution for unobserved sizes/lengths). The argument `nsim_offspring` is +used to specify the number of simulations to be used for this approximation. + +For example, to get offspring drawn from a binomial distribution with +probability `prob = 0.5`, we run + +```{r} +chain_ll(chain_sizes, "binom", "size", size = 1, prob = 0.5, nsim_offspring = 100) +``` + +## Package vignettes + +Specific use cases of _bpmodels_ can be found in the [online documentation as package vignettes](https://epiverse-trace.github.io/bpmodels/), under "Articles". + +## Reporting bugs + +To report a bug please open an [issue](https://github.com/epiverse-trace/bpmodels/issues/new/choose). + +## Contribute + +We welcome contributions to enhance the package's functionalities. If you wish to +do so, please follow the [package contributing guide](https://github.com/epiverse-trace/.github/blob/main/CONTRIBUTING.md). + +## Code of conduct + +Please note that the _bpmodels_ project is released with a [Contributor Code of Conduct](https://github.com/epiverse-trace/.github/blob/main/CODE_OF_CONDUCT.md). +By contributing to this project, you agree to abide by its terms. + +## Citing this package + +```{r message=FALSE, warning=FALSE} +citation("bpmodels") +``` \ No newline at end of file diff --git a/README.md b/README.md index b1cbaa53..b2dccbf9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ +# *bpmodels*: Methods for analysing the size and length of chains from branching process models + [![R-CMD-check](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml) @@ -14,9 +16,195 @@ The latest development version of the `bpmodels` package can be installed via ``` r -devtools::install_github('sbfnk/bpmodels') +devtools::install_github('epiverse-trace/bpmodels') +``` + +# Quick start + +To load the package, use + +At the heart of the package are the `chains_ll()` and `chains_sim()` +functions. + +## Calculating log-likelihoods + +The `chains_ll()` function calculates the log-likelihood of a +distribution of chain sizes or lengths given an offspring distribution +and its associated parameters. + +If we have observed a distribution of chains of sizes $1, 1, 4, 7$, we +can calculate the log-likelihood of this observed chain by assuming the +offspring per generation is Poisson distributed with a mean number of +$0.5$. + +To do this, we run + +``` r +set.seed(13) +chain_sizes <- c(1, 1, 4, 7) # example of observed chain sizes +chain_ll(x = chain_sizes, offspring = "pois", stat = "size", lambda = 0.5) +#> [1] -8.607 +``` + +The first argument of `chain_ll()` is the size (or length) distribution +to analyse. The second argument, `offspring`, specifies the offspring +distribution. This is given as a function used to generate random +offspring. It can be any probability distribution implemented in `R`, +that is, one that has a corresponding function for generating random +numbers beginning with the letter `r`. In the case of the example above, +since random Poisson numbers are generated in `R` using a function +called `rpois()`, the string to pass to the `offspring` argument is +`"pois"`. + +The third argument, `stat`, determines whether to analyse chain sizes +(`"size"`, the default if this argument is not specified) or lengths +(`"length"`). Lastly, any named arguments not recognised by `chain_ll()` +are interpreted as parameters of the corresponding probability +distribution, here `lambda = 0.5` as the mean of the Poisson +distribution (see the `R` help page for the [Poisson +distribution](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/Poisson.html) +for more information). + +# Imperfect observations + +By default, `chain_ll` assumes perfect observation, where `obs_prob = 1` +(See `?chain_ll`). If observations are imperfect, the `chain_ll()` +function has an `obs_prob` argument that can be used to determine the +likelihood. In that case, true chain sizes or lengths are simulated +repeatedly (the number of times given by the `nsim_obs` argument), and +the likelihood calculated for each of these simulations. + +For example, if the probability of observing each case is $30%$, we use + +``` r +chain_sizes <- c(1, 1, 4, 7) # example of observed chain sizes +ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda = 0.5, + nsim_obs = 10) +summary(ll) +#> Min. 1st Qu. Median Mean 3rd Qu. Max. +#> -32.1 -26.5 -24.1 -24.9 -22.5 -19.1 +``` + +This returns `10` likelihood values (because `nsim_obs = 10`), which can +be averaged to come up with an overall likelihood estimate. + +To find out about usage of the `chains_ll()` function, you can use the +`R` help file + +``` r +?chains_ll +``` + +## Simulating branching processes + +To simulate a branching process, we use the `chain_sim()` function. This +function follows the same syntax as `chain_ll()`. + +Below, we are simulating $5$ chains, assuming the offspring are +generated using a Poisson distribution with mean, `lambda = 5`. By +default, `chain_sim()` returns a vector of chain sizes/lengths. However, +to override that so that a tree of infectees and infectors is returned, +we need to specify a function for the serial interval and set +`tree = TRUE` + +``` r +chain_sim(n = 5, offspring = "pois", stat = "size", lambda = 0.5) +#> [1] 5 1 1 1 1 +``` + +### Simulating trees + +To simulate a tree of branching processes, we do specify the serial +interval generation function and set `tree = TRUE` as follows: + +``` r +set.seed(13) + +serial_interval <- function(n){rlnorm(n, meanlog = 0.58, sdlog = 1.58)} + +chains_df <- chain_sim(n = 5, offspring = 'pois', lambda = 0.5, stat = 'length', + infinite = 100, serial = serial_interval) + +chains_df +#> n id ancestor generation time +#> 1 1 1 NA 1 0.00000 +#> 2 2 1 NA 1 0.00000 +#> 3 3 1 NA 1 0.00000 +#> 4 4 1 NA 1 0.00000 +#> 5 5 1 NA 1 0.00000 +#> 6 1 2 1 2 0.04772 +#> 7 5 2 1 2 5.57573 +#> 8 5 3 1 2 0.11454 +#> 9 1 3 2 3 2.64367 +#> 10 5 4 2 3 6.57843 +#> 11 1 4 3 4 2.96098 +#> 12 5 5 4 4 10.28370 +#> 13 5 6 5 5 10.37883 +``` + +# Methodology + +If the probability distribution of chain sizes or lengths has an +analytical solution, this will be used (size distribution: Poisson and +negative binomial; length distribution: Poisson and geometric). + +If an analytical solution does not exist, simulations are used to +approximate this probability distributions (using a linear approximation +to the cumulative distribution for unobserved sizes/lengths). The +argument `nsim_offspring` is used to specify the number of simulations +to be used for this approximation. + +For example, to get offspring drawn from a binomial distribution with +probability `prob = 0.5`, we run + +``` r +chain_ll(chain_sizes, "binom", "size", size = 1, prob = 0.5, nsim_offspring = 100) +#> [1] -8.761 ``` -Please note that the ‘bpmodels’ project is released with a [Contributor -Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, -you agree to abide by its terms. +## Package vignettes + +Specific use cases of *bpmodels* can be found in the [online +documentation as package +vignettes](https://epiverse-trace.github.io/bpmodels/), under +“Articles”. + +## Reporting bugs + +To report a bug please open an +[issue](https://github.com/epiverse-trace/bpmodels/issues/new/choose). + +## Contribute + +We welcome contributions to enhance the package’s functionalities. If +you wish to do so, please follow the [package contributing +guide](https://github.com/epiverse-trace/.github/blob/main/CONTRIBUTING.md). + +## Code of conduct + +Please note that the *bpmodels* project is released with a [Contributor +Code of +Conduct](https://github.com/epiverse-trace/.github/blob/main/CODE_OF_CONDUCT.md). +By contributing to this project, you agree to abide by its terms. + +## Citing this package + +``` r +citation("bpmodels") +#> +#> To cite package 'bpmodels' in publications use: +#> +#> Funk S, Finger F (2023). _bpmodels: Analysing chain statistics using +#> branching process models_. R package version 0.1.0, +#> . +#> +#> A BibTeX entry for LaTeX users is +#> +#> @Manual{, +#> title = {bpmodels: Analysing chain statistics using branching process models}, +#> author = {Sebastian Funk and Flavio Finger}, +#> year = {2023}, +#> note = {R package version 0.1.0}, +#> url = {https://github.com/sbfnk/bpmodels}, +#> } +``` From 67644e860cc4ead304ff486f0c9ba517e5323345 Mon Sep 17 00:00:00 2001 From: James Azam Date: Wed, 1 Feb 2023 12:29:42 +0000 Subject: [PATCH 075/828] updated README --- README.Rmd | 7 +++++++ README.md | 49 ++++++++++++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/README.Rmd b/README.Rmd index 2c055924..92e69135 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,8 +16,15 @@ knitr::opts_chunk$set( # _bpmodels_: Methods for analysing the size and length of chains from branching process models +![CRAN/METACRAN](https://img.shields.io/cran/v/bpmodels) +![GitHub R package version](https://img.shields.io/github/r-package/v/epiverse-trace/bpmodels) +![GitHub all releases](https://img.shields.io/github/downloads/epiverse-trace/bpmodels/total?style=flat) +![GitHub issues](https://img.shields.io/github/issues/epiverse-trace/bpmodels) [![R-CMD-check](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml) [![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/main/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) +![GitHub contributors](https://img.shields.io/github/contributors/epiverse-trace/bpmodels) +![GitHub commit activity](https://img.shields.io/github/commit-activity/m/epiverse-trace/bpmodels) +![GitHub](https://img.shields.io/github/license/epiverse-trace/bpmodels) ```{r setup, include=FALSE} diff --git a/README.md b/README.md index b2dccbf9..d0f94754 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,20 @@ +![CRAN/METACRAN](https://img.shields.io/cran/v/bpmodels) ![GitHub R +package +version](https://img.shields.io/github/r-package/v/epiverse-trace/bpmodels) +![GitHub all +releases](https://img.shields.io/github/downloads/epiverse-trace/bpmodels/total?style=flat) +![GitHub +issues](https://img.shields.io/github/issues/epiverse-trace/bpmodels) [![R-CMD-check](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/bpmodels/actions/workflows/R-CMD-check.yaml) [![codecov](https://codecov.io/github/epiverse-trace/bpmodels/branch/main/graphs/badge.svg)](https://codecov.io/github/epiverse-trace/bpmodels) +![GitHub +contributors](https://img.shields.io/github/contributors/epiverse-trace/bpmodels) +![GitHub commit +activity](https://img.shields.io/github/commit-activity/m/epiverse-trace/bpmodels) +![GitHub](https://img.shields.io/github/license/epiverse-trace/bpmodels) `bpmodels` is an R package to simulate and analyse the size and length @@ -43,7 +55,7 @@ To do this, we run set.seed(13) chain_sizes <- c(1, 1, 4, 7) # example of observed chain sizes chain_ll(x = chain_sizes, offspring = "pois", stat = "size", lambda = 0.5) -#> [1] -8.607 +#> [1] -8.607196 ``` The first argument of `chain_ll()` is the size (or length) distribution @@ -82,7 +94,7 @@ ll <- chain_ll(chain_sizes, "pois", "size", obs_prob = 0.3, lambda = 0.5, nsim_obs = 10) summary(ll) #> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> -32.1 -26.5 -24.1 -24.9 -22.5 -19.1 +#> -32.09 -26.52 -24.06 -24.94 -22.49 -19.14 ``` This returns `10` likelihood values (because `nsim_obs = 10`), which can @@ -126,20 +138,20 @@ chains_df <- chain_sim(n = 5, offspring = 'pois', lambda = 0.5, stat = 'length', infinite = 100, serial = serial_interval) chains_df -#> n id ancestor generation time -#> 1 1 1 NA 1 0.00000 -#> 2 2 1 NA 1 0.00000 -#> 3 3 1 NA 1 0.00000 -#> 4 4 1 NA 1 0.00000 -#> 5 5 1 NA 1 0.00000 -#> 6 1 2 1 2 0.04772 -#> 7 5 2 1 2 5.57573 -#> 8 5 3 1 2 0.11454 -#> 9 1 3 2 3 2.64367 -#> 10 5 4 2 3 6.57843 -#> 11 1 4 3 4 2.96098 -#> 12 5 5 4 4 10.28370 -#> 13 5 6 5 5 10.37883 +#> n id ancestor generation time +#> 1 1 1 NA 1 0.00000000 +#> 2 2 1 NA 1 0.00000000 +#> 3 3 1 NA 1 0.00000000 +#> 4 4 1 NA 1 0.00000000 +#> 5 5 1 NA 1 0.00000000 +#> 6 1 2 1 2 0.04771887 +#> 7 5 2 1 2 5.57573333 +#> 8 5 3 1 2 0.11454421 +#> 9 1 3 2 3 2.64367236 +#> 10 5 4 2 3 6.57843219 +#> 11 1 4 3 4 2.96098160 +#> 12 5 5 4 4 10.28370183 +#> 13 5 6 5 5 10.37883069 ``` # Methodology @@ -159,7 +171,7 @@ probability `prob = 0.5`, we run ``` r chain_ll(chain_sizes, "binom", "size", size = 1, prob = 0.5, nsim_offspring = 100) -#> [1] -8.761 +#> [1] -8.760539 ``` ## Package vignettes @@ -194,7 +206,7 @@ citation("bpmodels") #> #> To cite package 'bpmodels' in publications use: #> -#> Funk S, Finger F (2023). _bpmodels: Analysing chain statistics using +#> Funk S, Finger F (????). _bpmodels: Analysing chain statistics using #> branching process models_. R package version 0.1.0, #> . #> @@ -203,7 +215,6 @@ citation("bpmodels") #> @Manual{, #> title = {bpmodels: Analysing chain statistics using branching process models}, #> author = {Sebastian Funk and Flavio Finger}, -#> year = {2023}, #> note = {R package version 0.1.0}, #> url = {https://github.com/sbfnk/bpmodels}, #> } From f923b96f8f7e8617d94932950444cb70bf3105ab Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 3 Feb 2023 21:37:31 +0000 Subject: [PATCH 076/828] added more dependencies --- DESCRIPTION | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7284e3f5..0c6510f2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,18 +15,22 @@ Description: Provides methods to analyse and simulate the size and length License: MIT + file LICENSE URL: https://github.com/sbfnk/bpmodels BugReports: https://github.com/sbfnk/bpmodels/issues +Depends: + R (>= 2.10) Suggests: + bookdown, covr, + dplyr, + ggplot2, knitr, + lubridate, rmarkdown, bookdown, testthat, - truncdist, - dplyr, - ggplot2, - lubridate + truncdist VignetteBuilder: knitr Encoding: UTF-8 +LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 From a5881a028410f75bda970abf7c67ff507cadc7fc Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 3 Feb 2023 21:38:09 +0000 Subject: [PATCH 077/828] added more references to bib lib --- vignettes/references.bib | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/vignettes/references.bib b/vignettes/references.bib index c19f323b..ae7dc255 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -1,3 +1,19 @@ +@article{abbott2020, + title={The transmissibility of novel Coronavirus in the early stages of the 2019-20 outbreak in Wuhan: Exploring initial point-source exposure sizes and durations using scenario analysis}, + author={Abbott, Sam and Hellewell, Joel and Munday, James and Funk, Sebastian and CMMID nCoV working group and others}, + journal={Wellcome open research}, + volume={5}, + year={2020}, + publisher={The Wellcome Trust} +} + +@article{marivate2020, + title={Use of available data to inform the COVID-19 outbreak in South Africa: a case study}, + author={Marivate, Vukosi and Combrink, Herkulaas MvE}, + journal={arXiv preprint arXiv:2004.04813}, + year={2020} +} + @article{Farrington2003, abstract = {Mass vaccination programmes aim to maintain the effective reproduction number R of an infection below unity. We describe methods for monitoring the value of R using surveillance data. The models are based on branching processes in which R is identified with the offspring mean. We derive unconditional likelihoods for the offspring mean using data on outbreak size and outbreak duration. We also discuss Bayesian methods, implemented by Metropolis-Hastings sampling. We investigate by simulation the validity of the models with respect to depletion of susceptibles and under-ascertainment of cases. The methods are illustrated using surveillance data on measles in the USA.}, author = {Farrington, C. P. and Kanaan, M. N. and Gay, N. J.}, From dae4dea54b2edbaa31536cba658b2484e64fdfb6 Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 3 Feb 2023 21:40:20 +0000 Subject: [PATCH 078/828] added the covid-19 SA data and associated doc --- R/data.R | 19 +++++++++++++++++++ data-raw/covid19_sa.R | 17 +++++++++++++++++ data/covid19_sa.rda | Bin 0 -> 227 bytes man/covid19_sa.Rd | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) create mode 100644 R/data.R create mode 100644 data-raw/covid19_sa.R create mode 100644 data/covid19_sa.rda create mode 100644 man/covid19_sa.Rd diff --git a/R/data.R b/R/data.R new file mode 100644 index 00000000..e134c1c3 --- /dev/null +++ b/R/data.R @@ -0,0 +1,19 @@ +#' COVID-19 Data Repository for South Africa +#' +#' An aggregated subset of the COVID-19 Data Repository for South Africa created, +#' maintained and hosted by Data Science for Social Impact research group, +#' led by Dr. Vukosi Marivate ... +#' +#' The data is originally provided as a linelist but has been subsetted and +#' cleaned in `data-raw/covid19_sa.R`. +#' +#' @format ## `covid19_sa` +#' A data frame with 19 rows and 2 columns: +#' \describe{ +#' \item{date}{Date case was reported} +#' \item{cases}{Number of cases} +#' ... +#' } +#' @source +#' Further details in `data-raw/covid19_sa.R`. +"covid19_sa" \ No newline at end of file diff --git a/data-raw/covid19_sa.R b/data-raw/covid19_sa.R new file mode 100644 index 00000000..40afe3be --- /dev/null +++ b/data-raw/covid19_sa.R @@ -0,0 +1,17 @@ +## code to prepare `covid_sa` dataset + +data_url <- 'https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_timeline_confirmed.csv' + +#Read the data in using the url +covid19_sa <- read.csv(data_url) + +#Clean and subset the data we need +covid19_sa <- covid19_sa %>% + dplyr::select(date) %>% + dplyr::mutate(date = lubridate::dmy(date)) %>% + dplyr::filter(date <= lubridate::dmy('20-03-2020')) %>% + dplyr::group_by(date) %>% + dplyr::summarise(cases = n()) %>% + dplyr::ungroup() + +usethis::use_data(covid19_sa, overwrite = TRUE) diff --git a/data/covid19_sa.rda b/data/covid19_sa.rda new file mode 100644 index 0000000000000000000000000000000000000000..43295c0cf928039ee39e9cd1d6c7adea1c3b70e1 GIT binary patch literal 227 zcmV<903829T4*^jL0KkKS$q4essI3sf5ZRtNB{rNIm`5RRBkATR{K- literal 0 HcmV?d00001 diff --git a/man/covid19_sa.Rd b/man/covid19_sa.Rd new file mode 100644 index 00000000..f772efb3 --- /dev/null +++ b/man/covid19_sa.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{covid19_sa} +\alias{covid19_sa} +\title{COVID-19 Data Repository for South Africa} +\format{ +\subsection{\code{covid19_sa}}{ + +A data frame with 19 rows and 2 columns: +\describe{ +\item{date}{Date case was reported} +\item{cases}{Number of cases} +... +} +} +} +\source{ +\url{https://github.com/dsfsi/covid19za} +Further details in \code{data-raw/covid19_sa.R}. +} +\usage{ +covid19_sa +} +\description{ +An aggregated subset of the COVID-19 Data Repository for South Africa created, +maintained and hosted by Data Science for Social Impact research group, +led by Dr. Vukosi Marivate ... +} +\details{ +The data is originally provided as a linelist but has been subsetted and +cleaned in \code{data-raw/covid19_sa.R}. +} +\keyword{datasets} From 5ab97bf7bbe02df900d50143191f4a50200073e9 Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 3 Feb 2023 21:42:44 +0000 Subject: [PATCH 079/828] changed the vignette title --- vignettes/projecting_incidence.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/projecting_incidence.Rmd b/vignettes/projecting_incidence.Rmd index 9256f35f..3cde3865 100644 --- a/vignettes/projecting_incidence.Rmd +++ b/vignettes/projecting_incidence.Rmd @@ -1,5 +1,5 @@ --- -title: "Projecting future disease incidence given early outbreak data" +title: "Projecting COVID-19 incidence using early outbreak data" author: "James Azam, Sebastian Funk" output: bookdown::html_vignette2: @@ -10,7 +10,7 @@ pkgdown: bibliography: references.bib link-citations: true vignette: > - %\VignetteIndexEntry{Projecting future disease incidence given early outbreak data} + %\VignetteIndexEntry{Projecting COVID-19 incidence using early outbreak data} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} editor_options: From 5c37fc92452140aa95526171bdea6b927154b73b Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 3 Feb 2023 21:44:00 +0000 Subject: [PATCH 080/828] updated section on specifying serial interval --- vignettes/projecting_incidence.Rmd | 109 +++++++++++++++++------------ 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/vignettes/projecting_incidence.Rmd b/vignettes/projecting_incidence.Rmd index 3cde3865..b25d09ca 100644 --- a/vignettes/projecting_incidence.Rmd +++ b/vignettes/projecting_incidence.Rmd @@ -22,79 +22,100 @@ knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, collapse = TRUE, - comment = "#>" + comment = "#>", + dpi = 300 ) ``` -## Description -Branching processes can be used to project disease incidence provided we have some -information on the distribution of times between successive cases (serial interval), -and the distribution of secondary cases produced by a single individual (offspring -distribution). Such simulations can be achieved in `bpmodels` with the `chain_sim()` function. +## Overview +Branching processes can be used to project infectious disease trends provided +we have some information on the distribution of times between +successive cases (serial interval), and the distribution of secondary cases +produced by a single individual (offspring distribution). Such simulations can be achieved in `bpmodels` with the `chain_sim()` function. @Pearson2020, and +@abbott2020 illustrate its application to COVID-19. + +The purpose of this vignette is to use early data on COVID-19 in South Africa [@marivate2020] to illustrate how `bpmodels` can be used to forecast +an outbreak. + Let's load the required packages -```{r loading_packages, include=TRUE} +```{r packages, include=TRUE} library("bpmodels") library('dplyr') library('ggplot2') library('lubridate') ``` -## Disease data +### The data + +We will get and clean the first $15$ days of the COVID-19 +outbreak in South Africa to seed the simulation for this example. -Let's create an outbreak dataset, assuming the cases are sampled from a negative binomial -distribution with mean = 5 and dispersion = 7.5. These parameter values are arbitrarily -chosen for illustrative purposes. +```{r data, message=FALSE} +data_url <- 'https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_timeline_confirmed.csv' -```{r data_generation, message=FALSE} -set.seed(12) -cases_df <- data.frame(date = as.Date('2023-01-01') + seq_len(12), - cases = rnbinom(12, size = 7.5, mu = 5) - ) -head(cases_df) +#Read the data in using the url +covid19_sa <- read.csv(data_url) -ggplot(cases_df, - aes(x = date, y = cases) - ) + - geom_col(fill = 'tomato3', size = 1) +# Subset the first 15 days and count the number of cases per date +covid19_sa <- covid19_sa %>% + dplyr::select(date) %>% + dplyr::mutate(date = lubridate::dmy(date)) %>% + dplyr::filter(date <= lubridate::dmy("20-03-2020")) %>% + dplyr::group_by(date) %>% + dplyr::summarise(cases = n()) %>% + dplyr::ungroup() ``` -## Preparing the inputs +### Preparing the inputs -```{r input_prep, message=FALSE} -# We will create a vector of starting times for each case, using the time of the index cases as the reference point -cases_df$days_since_index <- as.integer(cases_df$date - min(cases_df$date)) +```{r linelist_gen, message=FALSE} +days_since_index <- as.integer(covid19_sa$date - min(covid19_sa$date)) -#'Disaggregate the time series -case_times <- unlist(mapply(function(x, y) rep(x, times = ifelse(y == 0, 1, y)), - cases_df$days_since_index, - cases_df$cases - ) - ) +start_times <- unlist(mapply( + function(x, y) rep(x, times = ifelse(y == 0, 1, y)), + days_since_index, + covid19_sa$cases +)) +``` + +Additionally, `chain_sim()` requires other inputs, which we will specify below: +```{r input_prep2, message=FALSE} #' Date to end simulation (14 day projection in this case) -projection_window <- 14 #2 week ahead projection -project_to_date <- max(cases_df$days_since_index) + projection_window +projection_window <- 14 # 14 days/ 2-week ahead projection + +projection_end_day <- max(days_since_index) + projection_window + +#' Number of simulations +sim_rep <- 100 + +#' Maximum chain size allowed +chain_threshold <- 1000 + +``` + +#### Serial interval + +We also assume based on COVID-19 literature that the +serial interval, $si$, is lognormal distributed as follows: +$ E[\text{si}] = \ln \left( \dfrac{\mu^2}{(\sqrt{\mu^2 + \sigma^2}} \right)$ -#' Number of simulations and maximum chain size -sim_rep <- 1000 -cases_to_project <- 1000 +$\text{SD} [\text{si}] = \sqrt {\ln \left(1 + \dfrac{\sigma^2}{\mu^2} \right)}$ +with $\mu = 4.7$ and standard deviation $\sigma = 2.9$. -### Specifying the `serial` argument to `chain_sim()` -#' Assume serial interval follows log-normal distribution with mean, mu = 4.7, -#' and standard deviation, sigma = 2.9, then the desired standard deviation, si_sd, -#' and mean, si_mean, are -sigma = 2.9 -mu = 4.7 +```{r input_prep3, message=FALSE} +mu <- 4.7 +sigma <- 2.9 -si_sd <- sqrt(log(1 + (sigma/mu)^2)) #log standard deviation -si_mean <- log((mu^2)/(sqrt(sigma^2 + mu^2))) #log mean +si_sd <- sqrt(log(1 + (sigma / mu)^2)) # log standard deviation +si_mean <- log((mu^2) / (sqrt(sigma^2 + mu^2))) # log mean #' serial interval function serial_interval <- function(sample_size) { From 0bc64c48529d502c0e8cd192f9473a1798d930f3 Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 3 Feb 2023 21:45:39 +0000 Subject: [PATCH 081/828] changed x- & y-axis, theme label customizations --- vignettes/projecting_incidence.Rmd | 142 ++++++++++++++++++++--------- 1 file changed, 100 insertions(+), 42 deletions(-) diff --git a/vignettes/projecting_incidence.Rmd b/vignettes/projecting_incidence.Rmd index b25d09ca..e62f695d 100644 --- a/vignettes/projecting_incidence.Rmd +++ b/vignettes/projecting_incidence.Rmd @@ -124,65 +124,123 @@ serial_interval <- function(sample_size) { } ``` -## Simulations +#### Offspring distribution + +We assume an offspring distribution that is distributed as a negative binomial with $R = 2.5$ [@abbott2020] and $k = 0.58$. In this parameterization, R represents the $\mathcal{R_0}$, which is defined as the average number of cases produced by a single individual in an entirely susceptible population. The parameter $k$ represents superspreading, that is, the degree of heterogeneity in transmission by single individuals. + +### Simulations +To summarize the simulation set up, for each of the `r sim_rep` simulations, we want to project cases over a `r projection_window` day period since the last case, assuming that no chain would exceed `r chain_threshold`. + +#### Model assumptions + +`chain_sim()` makes the following simplifying assumptions: + +1. All cases are observed +1. There is no reporting delay +1. Reporting rate is constant through the course of the epidemic +1. No interventions have been implemented +1. Population is homogeneous and well-mixed + ```{r simulations, message=FALSE} -## Chain log-likelihood simulation -sim_chain_sizes <- lapply(seq_len(sim_rep), - function(sim){chain_sim( - n = length(case_times), - offspring = "nbinom", - mu = 2.0, - size = 0.38, - stat = "size", - infinite = cases_to_project, - serial = serial_interval, - t0 = case_times, - tf = project_to_date, - tree = TRUE - ) |> - mutate(sim = sim)} - ) - -sim_output <- do.call(rbind, sim_chain_sizes) +set.seed(1234) + + +sim_chain_sizes <- lapply( + seq_len(sim_rep), + function(sim) { + chain_sim( + n = length(start_times), + offspring = "nbinom", + mu = 2.5, + size = 0.58, + stat = "size", + infinite = chain_threshold, + serial = serial_interval, + t0 = start_times, + tf = projection_end_day, + tree = TRUE + ) %>% + mutate(sim = sim) + } +) + +sim_output <- do.call(rbind, sim_chain_sizes) + +head(sim_output) ``` +From the simulated data, we count the median daily cases across +all simulations and overlay that over a plot of all the projections through time. + +#### Post-processing -### Post-processing ```{r post_processing} -ref_date <- min(cases_df$date) +index_date <- min(covid19_sa$date) -incidence_ts <- sim_output |> - mutate(day = floor(time)) |> - group_by(sim, day) |> - summarise(cases = n()) |> +# Daily number of cases for each simulation +incidence_ts <- sim_output %>% + mutate(day = ceiling(time)) %>% + group_by(sim, day) %>% + summarise(cases = n()) %>% ungroup() +# Add dates +incidence_ts <- incidence_ts %>% + group_by(sim) %>% + mutate(date = index_date + (0:(n() - 1))) %>% + ungroup() + +## Median daily number of cases aggregated across all simulations +median_daily_cases <- incidence_ts %>% + group_by(day) %>% + summarise(median_cases = median(cases)) %>% + ungroup() %>% + arrange(day) -## Median cases by date. -median_daily_cases <- incidence_ts |> - group_by(day)|> - summarise(median_cases = median(cases)) |> - ungroup()|> - arrange(day) |> - mutate(date = ymd(ref_date) + 0:(project_to_date - 1)) +# Add dates +median_daily_cases <- median_daily_cases %>% + mutate(date = index_date + 0:projection_end_day) %>% + ungroup() ``` -## Visualization -```{r visualisation} +### Visualization + +```{r viz, fig.cap ="Projected COVID-19 epidemiological trend. Gray lines represent individual simulation results and red dots represent the median daily cases across all simulations.", fig.width=2.0, fig.height=1.8} # Visualization -cases_plot <- ggplot(data = median_daily_cases) + - geom_col(aes(x = date, y = median_cases), - fill = "tomato3", - size = 1 +cases_plot <- ggplot(data = incidence_ts) + + geom_line(aes( + x = date, + y = cases, + group = sim + ), + color = "grey", + linewidth = 1.2, + alpha = 0.25 + ) + + geom_point( + data = median_daily_cases, + aes( + x = date, + y = median_cases + ), + color = "tomato3", + size = 0.75 + ) + + scale_x_continuous( + breaks = seq(min(incidence_ts$date), max(incidence_ts$date), 10), + labels = seq(min(incidence_ts$date), max(incidence_ts$date), 10) ) + - scale_y_continuous(breaks = seq(0, max(median_daily_cases$median_cases) + 20, 20), - labels = seq(0, max(median_daily_cases$median_cases) + 20, 20) + scale_y_continuous( + breaks = seq(0, max(incidence_ts$cases) + 200, 100), + labels = seq(0, max(incidence_ts$cases) + 200, 100) ) + - labs(x = 'Date', y = 'Daily cases (median)') + - theme_minimal(base_size = 14) + labs(x = "Date", y = "Daily cases (median)") + + theme_minimal(base_size = 4) + + NULL print(cases_plot) ``` +### References From 3b16119f80faa6385df2ed9b9583a9ba41851f94 Mon Sep 17 00:00:00 2001 From: James Azam Date: Mon, 6 Feb 2023 11:50:25 +0000 Subject: [PATCH 082/828] generated basic package-level documentation. --- R/bpmodels-package.R | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 R/bpmodels-package.R diff --git a/R/bpmodels-package.R b/R/bpmodels-package.R new file mode 100644 index 00000000..a65cf643 --- /dev/null +++ b/R/bpmodels-package.R @@ -0,0 +1,6 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +## usethis namespace: end +NULL From ff087bfee4ff844c236630d4411e09921578b6f4 Mon Sep 17 00:00:00 2001 From: James Azam Date: Mon, 6 Feb 2023 15:09:16 +0000 Subject: [PATCH 083/828] updated bib lib --- vignettes/references.bib | 439 ++++++++++++++++++++------------------- 1 file changed, 227 insertions(+), 212 deletions(-) diff --git a/vignettes/references.bib b/vignettes/references.bib index ae7dc255..7cec9b45 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -1,233 +1,248 @@ @article{abbott2020, - title={The transmissibility of novel Coronavirus in the early stages of the 2019-20 outbreak in Wuhan: Exploring initial point-source exposure sizes and durations using scenario analysis}, - author={Abbott, Sam and Hellewell, Joel and Munday, James and Funk, Sebastian and CMMID nCoV working group and others}, - journal={Wellcome open research}, - volume={5}, - year={2020}, - publisher={The Wellcome Trust} + title = {The transmissibility of novel Coronavirus in the early stages of the 2019-20 outbreak in Wuhan: Exploring initial point-source exposure sizes and durations using scenario analysis}, + author = {Abbott, Sam and Hellewell, Joel and Munday, James and Funk, Sebastian and CMMID nCoV working group and others}, + journal = {Wellcome open research}, + volume = {5}, + year = {2020}, + publisher = {The Wellcome Trust} } - -@article{marivate2020, - title={Use of available data to inform the COVID-19 outbreak in South Africa: a case study}, - author={Marivate, Vukosi and Combrink, Herkulaas MvE}, - journal={arXiv preprint arXiv:2004.04813}, - year={2020} +@article{Alene2021, + abstract = {Background: Understanding the epidemiological parameters that determine the transmission dynamics of COVID-19 is essential for public health intervention. Globally, a number of studies were conducted to estimate the average serial interval and incubation period of COVID-19. Combining findings of existing studies that estimate the average serial interval and incubation period of COVID-19 significantly improves the quality of evidence. Hence, this study aimed to determine the overall average serial interval and incubation period of COVID-19. Methods: We followed the PRISMA checklist to present this study. A comprehensive search strategy was carried out from international electronic databases (Google Scholar, PubMed, Science Direct, Web of Science, CINAHL, and Cochrane Library) by two experienced reviewers (MAA and DBK) authors between the 1st of June and the 31st of July 2020. All observational studies either reporting the serial interval or incubation period in persons diagnosed with COVID-19 were included in this study. Heterogeneity across studies was assessed using the I2 and Higgins test. The NOS adapted for cross-sectional studies was used to evaluate the quality of studies. A random effect Meta-analysis was employed to determine the pooled estimate with 95% (CI). Microsoft Excel was used for data extraction and R software was used for analysis. Results: We combined a total of 23 studies to estimate the overall mean serial interval of COVID-19. The mean serial interval of COVID-19 ranged from 4. 2 to 7.5 days. Our meta-analysis showed that the weighted pooled mean serial interval of COVID-19 was 5.2 (95%CI: 4.9–5.5) days. Additionally, to pool the mean incubation period of COVID-19, we included 14 articles. The mean incubation period of COVID-19 also ranged from 4.8 to 9 days. Accordingly, the weighted pooled mean incubation period of COVID-19 was 6.5 (95%CI: 5.9–7.1) days. Conclusions: This systematic review and meta-analysis showed that the weighted pooled mean serial interval and incubation period of COVID-19 were 5.2, and 6.5 days, respectively. In this study, the average serial interval of COVID-19 is shorter than the average incubation period, which suggests that substantial numbers of COVID-19 cases will be attributed to presymptomatic transmission.}, + author = {Alene, Muluneh and Yismaw, Leltework and Assemie, Moges Agazhe and Ketema, Daniel Bekele and Gietaneh, Wodaje and Birhan, Tilahun Yemanu}, + doi = {10.1186/s12879-021-05950-x}, + issn = {14712334}, + journal = {BMC Infectious Diseases}, + keywords = {COVID-19,Incubation period,Meta-analysis,Serial interval}, + number = {1}, + pages = {1--9}, + pmid = {33706702}, + publisher = {BMC Infectious Diseases}, + title = {{Serial interval and incubation period of COVID-19: a systematic review and meta-analysis}}, + volume = {21}, + year = {2021} } -@article{Farrington2003, -abstract = {Mass vaccination programmes aim to maintain the effective reproduction number R of an infection below unity. We describe methods for monitoring the value of R using surveillance data. The models are based on branching processes in which R is identified with the offspring mean. We derive unconditional likelihoods for the offspring mean using data on outbreak size and outbreak duration. We also discuss Bayesian methods, implemented by Metropolis-Hastings sampling. We investigate by simulation the validity of the models with respect to depletion of susceptibles and under-ascertainment of cases. The methods are illustrated using surveillance data on measles in the USA.}, -author = {Farrington, C. P. and Kanaan, M. N. and Gay, N. J.}, -doi = {10.1093/biostatistics/4.2.279}, -issn = {14654644}, -journal = {Biostatistics (Oxford, England)}, -number = {2}, -pages = {279--295}, -title = {{Branching process models for surveillance of infectious diseases controlled by mass vaccination.}}, -volume = {4}, -year = {2003} -} -@article{Jacob2010, -abstract = {Branching processes are stochastic individual-based processes leading consequently to a bottom-up approach. In addition, since the state variables are random integer variables (representing population sizes), the extinction occurs at random finite time on the extinction set, thus leading to fine and realistic predictions. Starting from the simplest and well-known single-type Bienaym{\'{e}}-Galton-Watson branching process that was used by several authors for approximating the beginning of an epidemic, we then present a general branching model with age and population dependent individual transitions. However contrary to the classical Bienaym{\'{e}}-Galton-Watson or asymptotically Bienaym{\'{e}}-Galton-Watson setting, where the asymptotic behavior of the process, as time tends to infinity, is well understood, the asymptotic behavior of this general process is a new question. Here we give some solutions for dealing with this problem depending on whether the initial population size is large or small, and whether the disease is rare or non-rare when the initial population size is large.}, -author = {Jacob, Christine}, -doi = {10.3390/ijerph7031204}, -issn = {16604601}, -journal = {International Journal of Environmental Research and Public Health}, -keywords = {Age-dependence,Branching process,Epidemic size,Extinction time,Population-dependence}, -number = {3}, -pages = {1186--1204}, -title = {{Branching processes: Their role in epidemiology}}, -volume = {7}, -year = {2010} +@article{Allen2012, + abstract = {The basic reproduction number, ℛ(0), one of the most well-known thresholds in deterministic epidemic theory, predicts a disease outbreak if ℛ(0)>1. In stochastic epidemic theory, there are also thresholds that predict a major outbreak. In the case of a single infectious group, if ℛ(0)>1 and i infectious individuals are introduced into a susceptible population, then the probability of a major outbreak is approximately 1-(1/ℛ(0))( i ). With multiple infectious groups from which the disease could emerge, this result no longer holds. Stochastic thresholds for multiple groups depend on the number of individuals within each group, i ( j ), j=1, {\ldots}, n, and on the probability of disease extinction for each group, q ( j ). It follows from multitype branching processes that the probability of a major outbreak is approximately [Formula: see text]. In this investigation, we summarize some of the deterministic and stochastic threshold theory, illustrate how to calculate the stochastic thresholds, and derive some new relationships between the deterministic and stochastic thresholds.}, + author = {Allen, Linda J.S. and Lahodny, Glenn E.}, + doi = {10.1080/17513758.2012.665502}, + issn = {17513758}, + journal = {Journal of Biological Dynamics}, + keywords = {multitype branching processes,reproduction numbers}, + number = {2}, + pages = {590--611}, + title = {{Extinction thresholds in deterministic and stochastic epidemic models}}, + volume = {6}, + year = {2012} } + @article{Blumberg2013, -abstract = {Many diseases exhibit subcritical transmission (i.e. 01. In stochastic epidemic theory, there are also thresholds that predict a major outbreak. In the case of a single infectious group, if ℛ(0)>1 and i infectious individuals are introduced into a susceptible population, then the probability of a major outbreak is approximately 1-(1/ℛ(0))( i ). With multiple infectious groups from which the disease could emerge, this result no longer holds. Stochastic thresholds for multiple groups depend on the number of individuals within each group, i ( j ), j=1, {\ldots}, n, and on the probability of disease extinction for each group, q ( j ). It follows from multitype branching processes that the probability of a major outbreak is approximately [Formula: see text]. In this investigation, we summarize some of the deterministic and stochastic threshold theory, illustrate how to calculate the stochastic thresholds, and derive some new relationships between the deterministic and stochastic thresholds.}, -author = {Allen, Linda J.S. and Lahodny, Glenn E.}, -doi = {10.1080/17513758.2012.665502}, -issn = {17513758}, -journal = {Journal of Biological Dynamics}, -keywords = {multitype branching processes,reproduction numbers}, -number = {2}, -pages = {590--611}, -title = {{Extinction thresholds in deterministic and stochastic epidemic models}}, -volume = {6}, -year = {2012} +@article{Farrington1999, + abstract = {We consider the distribution of the number of generations to extinction in subcritical branching processes, with particular emphasis on applications to the spread of infectious diseases. We derive the generation distributions for processes with Bernoulli, geometric and Poisson offspring, and discuss some of their distributional and inferential properties. We present applications to the spread of infection in highly vaccinated populations, outbreaks of enteric fever, and person-to-person transmission of human monkeypox.}, + author = {Farrington, C. P. and Grant, A. D.}, + doi = {10.1239/jap/1032374633}, + issn = {00219002}, + journal = {Journal of Applied Probability}, + keywords = {Branching process,Epidemic model,Extinction,Generation distribution,Maximum likelihood estimation,Power series family}, + number = {3}, + pages = {771--779}, + title = {{The distribution of time to extinction in subcritical branching processes: Applications to outbreaks of infectious disease}}, + volume = {36}, + year = {1999} } -@article{Blumberg2013a, -abstract = {For many infectious disease processes such as emerging zoonoses and vaccine-preventable diseases, 0