-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Rankin_summer
authored and
Rankin_summer
committed
Jun 26, 2020
1 parent
35c2fff
commit 5c1fdb7
Showing
15 changed files
with
2,285 additions
and
283 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
{% set name = "bloatectomy" %} | ||
{% set version = "0.0.12" %} | ||
|
||
package: | ||
name: "{{ name|lower }}" | ||
version: "{{ version }}" | ||
|
||
source: | ||
url: "https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz" | ||
sha256: 4f84db2264a3e337b13bde94c1154e1e46f9efaa3c501b26b5ee67f095f73363 | ||
|
||
build: | ||
number: 0 | ||
script: "{{ PYTHON }} -m pip install . -vv" | ||
|
||
requirements: | ||
host: | ||
- pip | ||
- python | ||
run: | ||
- python | ||
|
||
test: | ||
imports: | ||
- bloatectomy | ||
|
||
about: | ||
home: "https://github.com/MIT-LCP/bloatectomy" | ||
license: GPL-3.0-or-later | ||
license_family: GPL3 | ||
license_file: LICENSE | ||
summary: "Bloatectomy: a method for the identification and removal of duplicate text in the bloated notes of electronic health records and other documents." | ||
doc_url: | ||
dev_url: | ||
|
||
extra: | ||
recipe-maintainers: | ||
- 1fmusic |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
\relax | ||
\providecommand\hyper@newdestlabel[2]{} | ||
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} | ||
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined | ||
\global\let\oldcontentsline\contentsline | ||
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} | ||
\global\let\oldnewlabel\newlabel | ||
\gdef\newlabel#1#2{\newlabelxx{#1}#2} | ||
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} | ||
\AtEndDocument{\ifx\hyper@anchor\@undefined | ||
\let\contentsline\oldcontentsline | ||
\let\newlabel\oldnewlabel | ||
\fi} | ||
\fi} | ||
\global\let\hyper@last\relax | ||
\gdef\HyperFirstAtBeginDocument#1{#1} | ||
\providecommand\HyField@AuxAddToFields[1]{} | ||
\providecommand\HyField@AuxAddToCoFields[2]{} | ||
\citation{mimiciii} | ||
\citation{mimiciiidata} | ||
\citation{physionet} | ||
\@writefile{toc}{\contentsline {chapter}{Bloatectomy}{0}{}} | ||
\@writefile{toc}{\contentsline {title}{Bloatectomy:}{1}{chapter.1}\protected@file@percent } | ||
\@writefile{toc}{\authcount {3}} | ||
\@writefile{toc}{\contentsline {author}{Summer K Rankin$^{*}$\unskip {} \and Roselie Bright\unskip {} \and Katherine Dowdy\unskip {}}{1}{chapter.1}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1.1}\protected@file@percent } | ||
\citation{Dean:2018} | ||
\citation{Cohen:2013} | ||
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces \textbf {Graphical Abstract}}}{2}{figure.1.1}\protected@file@percent } | ||
\newlabel{abstract}{{1}{2}{\textbf {Graphical Abstract}}{figure.1.1}{}} | ||
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces \textbf {Progressively longer nurse\IeC {\textquoteright }s notes over one shift.} In this figure, we used a manual method to highlight identical sentences with unique colors. The original sentences are in bold font as well. This and similar figures in this document are purposely small and low resolution to provide further patient and provider privacy protection without disturbing our point about duplicate text.}}{2}{figure.1.2}\protected@file@percent } | ||
\newlabel{notes1}{{2}{2}{\textbf {Progressively longer nurse’s notes over one shift.} In this figure, we used a manual method to highlight identical sentences with unique colors. The original sentences are in bold font as well. This and similar figures in this document are purposely small and low resolution to provide further patient and provider privacy protection without disturbing our point about duplicate text}{figure.1.2}{}} | ||
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces \textbf { Example of two physicians\IeC {\textquoteright } notes from the same time period.} In this figure, we used a manual method to highlight identical sentences with unique colors. The original sentences are in bold font as well.}}{3}{figure.1.3}\protected@file@percent } | ||
\newlabel{notes2}{{3}{3}{\textbf { Example of two physicians’ notes from the same time period.} In this figure, we used a manual method to highlight identical sentences with unique colors. The original sentences are in bold font as well}{figure.1.3}{}} | ||
\citation{Su:2008} | ||
\citation{copyfind} | ||
\citation{Ceglarek:2013} | ||
\citation{Thielke:2007} | ||
\citation{Wrenn:2010} | ||
\citation{Altschul:1990} | ||
\citation{Cohen:2013} | ||
\citation{Cohen:2013} | ||
\citation{Gabriel:2018} | ||
\citation{Lancichinetti:2015} | ||
\citation{Cohen:2014} | ||
\citation{Zhang:2011} | ||
\citation{Welch:1984} | ||
\citation{python3} | ||
\@writefile{toc}{\contentsline {section}{\numberline {2}Duplicate Detection}{5}{section.1.2}\protected@file@percent } | ||
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces \emph {High-level flowchart of the Bloatectomy method.}}}{5}{figure.1.4}\protected@file@percent } | ||
\newlabel{flowchart}{{4}{5}{\emph {High-level flowchart of the Bloatectomy method.}}{figure.1.4}{}} | ||
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Document Selection}{5}{subsection.1.2.1}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Code and Example}{5}{subsection.1.2.2}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {subsubsection}{Create Sentence Tokens}{6}{section*.3}\protected@file@percent } | ||
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces \emph {The regular expression used for the first tokenization.}}}{6}{figure.1.5}\protected@file@percent } | ||
\newlabel{regex1}{{5}{6}{\emph {The regular expression used for the first tokenization.}}{figure.1.5}{}} | ||
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Tokens after first tokenization}}{7}{table.1.1}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {subsubsection}{Create List Token}{7}{section*.4}\protected@file@percent } | ||
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces \emph {The regular expression used to calculate the second tokenization.}}}{7}{figure.1.6}\protected@file@percent } | ||
\newlabel{regex2}{{6}{7}{\emph {The regular expression used to calculate the second tokenization.}}{figure.1.6}{}} | ||
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Tokens after second tokenization}}{8}{table.1.2}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {subsubsection}{Assign Either New or Old Hash Number}{8}{section*.5}\protected@file@percent } | ||
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Hash Table of Tokens}}{9}{table.1.3}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {subsubsection}{Use the Hash Table of Tokens}{9}{section*.6}\protected@file@percent } | ||
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Marked Table of Tokens}}{10}{table.1.4}\protected@file@percent } | ||
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces \emph { Highlighted duplicates in the output from the example text.} }}{10}{figure.1.7}\protected@file@percent } | ||
\newlabel{output}{{7}{10}{\emph { Highlighted duplicates in the output from the example text.}}{figure.1.7}{}} | ||
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Parameter Adjustments}{10}{subsection.1.2.3}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {section}{\numberline {3}Results}{11}{section.1.3}\protected@file@percent } | ||
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces \emph { The same nurse\IeC {\textquoteright }s notes from \ref {notes1}, after Bloatectomy.} }}{11}{figure.1.8}\protected@file@percent } | ||
\newlabel{output_notes1}{{8}{11}{\emph { The same nurse’s notes from \ref {notes1}, after Bloatectomy.}}{figure.1.8}{}} | ||
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces \emph { The same physicians' notes from \ref {notes2}, after Bloatectomy.} }}{11}{figure.1.9}\protected@file@percent } | ||
\newlabel{output_notes2}{{9}{11}{\emph { The same physicians' notes from \ref {notes2}, after Bloatectomy.}}{figure.1.9}{}} | ||
\@writefile{toc}{\contentsline {section}{\numberline {4}Conclusions}{12}{section.1.4}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {section}{\numberline {5}Installation}{12}{section.1.5}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {section}{\numberline {6}Examples}{12}{section.1.6}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {section}{\numberline {7}Parameters}{14}{section.1.7}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {subsubsection}{Acknowledgements}{15}{section*.7}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {subsubsection}{Funding}{15}{section*.8}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {subsubsection}{Conflict of Interest}{15}{section*.9}\protected@file@percent } | ||
\bibcite{Dean:2018}{1} | ||
\bibcite{March:2016}{2} | ||
\bibcite{Tsou:2017}{3} | ||
\bibcite{Corwin:2004}{4} | ||
\bibcite{Cohen:2013}{5} | ||
\bibcite{Cohen:2014}{6} | ||
\bibcite{Ceglarek:2013}{7} | ||
\bibcite{Carson:2012}{8} | ||
\bibcite{copyfind}{9} | ||
\bibcite{aws}{10} | ||
\bibcite{Altschul:1990}{11} | ||
\bibcite{mimiciii}{12} | ||
\bibcite{mimiciiidata}{13} | ||
\@writefile{toc}{\contentsline {subsubsection}{Disclaimer}{16}{section*.10}\protected@file@percent } | ||
\@writefile{toc}{\contentsline {subsubsection}{Data Availability}{16}{section*.11}\protected@file@percent } | ||
\bibcite{physionet}{14} | ||
\bibcite{python3}{15} | ||
\bibcite{Lancichinetti:2015}{16} | ||
\bibcite{Mckinney:2010}{17} | ||
\bibcite{Meystre:2008}{18} | ||
\bibcite{Mikolov:2013}{19} | ||
\bibcite{Su:2008}{20} | ||
\bibcite{Thielke:2007}{21} | ||
\bibcite{Welch:1984}{22} | ||
\bibcite{Wrenn:2010}{23} | ||
\bibcite{Zhang:2011}{24} | ||
\bibcite{Gabriel:2018}{25} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
\begin{thebibliography}{} | ||
|
||
\end{thebibliography} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
This is BibTeX, Version 0.99d (TeX Live 2019) | ||
Capacity: max_strings=100000, hash_size=100000, hash_prime=85009 | ||
The top-level auxiliary file: bloatectomy_paper.aux | ||
The style file: plain.bst | ||
I couldn't open database file bloatecomy_paper.bib | ||
---line 63 of file bloatectomy_paper.aux | ||
: \bibdata{bloatecomy_paper | ||
: } | ||
I'm skipping whatever remains of this command | ||
I found no database files---while reading file bloatectomy_paper.aux | ||
Warning--I didn't find a database entry for "mimiciii" | ||
Warning--I didn't find a database entry for "mimiciiidata" | ||
Warning--I didn't find a database entry for "physionet" | ||
You've used 0 entries, | ||
2118 wiz_defined-function locations, | ||
500 strings with 4052 characters, | ||
and the built_in function-call counts, 18 in all, are: | ||
= -- 0 | ||
> -- 0 | ||
< -- 0 | ||
+ -- 0 | ||
- -- 0 | ||
* -- 2 | ||
:= -- 7 | ||
add.period$ -- 0 | ||
call.type$ -- 0 | ||
change.case$ -- 0 | ||
chr.to.int$ -- 0 | ||
cite$ -- 0 | ||
duplicate$ -- 0 | ||
empty$ -- 1 | ||
format.name$ -- 0 | ||
if$ -- 1 | ||
int.to.chr$ -- 0 | ||
int.to.str$ -- 0 | ||
missing$ -- 0 | ||
newline$ -- 3 | ||
num.names$ -- 0 | ||
pop$ -- 0 | ||
preamble$ -- 1 | ||
purify$ -- 0 | ||
quote$ -- 0 | ||
skip$ -- 1 | ||
stack$ -- 0 | ||
substring$ -- 0 | ||
swap$ -- 0 | ||
text.length$ -- 0 | ||
text.prefix$ -- 0 | ||
top$ -- 0 | ||
type$ -- 0 | ||
warning$ -- 0 | ||
while$ -- 0 | ||
width$ -- 0 | ||
write$ -- 2 | ||
(There were 2 error messages) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
\BOOKMARK [0][-]{chapter.1}{Bloatectomy:}{}% 1 | ||
\BOOKMARK [1][-]{section.1.1}{Introduction}{chapter.1}% 2 | ||
\BOOKMARK [1][-]{section.1.2}{Duplicate Detection}{chapter.1}% 3 | ||
\BOOKMARK [2][-]{subsection.1.2.1}{Document Selection}{section.1.2}% 4 | ||
\BOOKMARK [2][-]{subsection.1.2.2}{Code and Example}{section.1.2}% 5 | ||
\BOOKMARK [3][-]{section*.3}{Create Sentence Tokens}{subsection.1.2.2}% 6 | ||
\BOOKMARK [3][-]{section*.4}{Create List Token}{subsection.1.2.2}% 7 | ||
\BOOKMARK [3][-]{section*.5}{Assign Either New or Old Hash Number}{subsection.1.2.2}% 8 | ||
\BOOKMARK [3][-]{section*.6}{Use the Hash Table of Tokens}{subsection.1.2.2}% 9 | ||
\BOOKMARK [2][-]{subsection.1.2.3}{Parameter Adjustments}{section.1.2}% 10 | ||
\BOOKMARK [1][-]{section.1.3}{Results}{chapter.1}% 11 | ||
\BOOKMARK [1][-]{section.1.4}{Conclusions}{chapter.1}% 12 | ||
\BOOKMARK [1][-]{section.1.5}{Installation}{chapter.1}% 13 | ||
\BOOKMARK [1][-]{section.1.6}{Examples}{chapter.1}% 14 | ||
\BOOKMARK [1][-]{section.1.7}{Parameters}{chapter.1}% 15 | ||
\BOOKMARK [2][-]{section*.7}{Acknowledgements}{section.1.7}% 16 | ||
\BOOKMARK [3][-]{section*.8}{Funding}{section*.7}% 17 | ||
\BOOKMARK [3][-]{section*.9}{Conflict of Interest}{section*.7}% 18 | ||
\BOOKMARK [3][-]{section*.10}{Disclaimer}{section*.7}% 19 | ||
\BOOKMARK [3][-]{section*.11}{Data Availability}{section*.7}% 20 |
Binary file not shown.
Oops, something went wrong.