Merge pull request #14 from siemdejong/brain

Brain
siemdejong · May 26, 2023 · 82df5a8 · 82df5a8
2 parents 2585e26 + b454862
commit 82df5a8
Show file tree

Hide file tree

Showing 76 changed files with 902 additions and 163 deletions.
diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt
@@ -0,0 +1,47 @@
+SimCLR
+SwAV
+PMC-HHG
+AUPRG
+ImageNet
+AUPR
+CCMIL
+VarMIL
+kurtosis
+pilocytic
+astrocytoma
+mpp
+Pilocytic
+astrocytomas
+astrocytes
+mpp
+Lanczos
+EntropyMasker
+IoU
+Máxima
+ependymoma
+SCLICOM
+informedness
+imgclsmob
+DeepMIL
+TinyClinicalBERT
+Huggingface
+Optuna
+Torchmetrics
+pyprg
+Dataverse
+pilocytic
+Craniopharyngioma
+Ganglioglioma
+Ependymoma
+Dysembryoplastic
+neuroepithelial
+Subependymal
+SimCLR
+iso-accuracy
+iso-accuracies
+SCLICOM
+intratumor
+parenchyma
+OOV
+ELMo
+Informedness
diff --git a/.vscode/ltex.hiddenFalsePositives.en-US.txt b/.vscode/ltex.hiddenFalsePositives.en-US.txt
@@ -0,0 +1,16 @@
+{"rule":"COMMA_PARENTHESIS_WHITESPACE","sentence":"^\\QThe errors on the test AUPRG are 122 106 , which is most probably the result of a small dataset.\\E$"}
+{"rule":"COMMA_PARENTHESIS_WHITESPACE","sentence":"^\\QThe tiles that are presented to the model are 44.8 \\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q.8 .\\E$"}
+{"rule":"MORFOLOGIK_RULE_EN_US","sentence":"^\\QFlowchart for training of Self-supervised pre-training and CLInical COntext-aware Multi-instance learning (SCLICOM).\\E$"}
+{"rule":"AN_AND","sentence":"^\\QOne was pre-trained on ImageNet and another feature extractor has an He initialized ShuffleNetV2 backbone and was trained using SimCLR.\\E$"}
+{"rule":"WERE_VBB","sentence":"^\\QAll parameters other than BERT's were He initialized.\\E$"}
+{"rule":"MORFOLOGIK_RULE_EN_US","sentence":"^\\QGitHub/Dataverse/Figshare/etc.\\E$"}
+{"rule":"MORFOLOGIK_RULE_EN_US","sentence":"^\\Q[width=]pediatric-brain-tumours/images/classifier.svg [Multi-instance learning classification] Extracted features (tile features in this work) are presented to a multi-layer perceptron (MLP) with learnable weights.\\E$"}
+{"rule":"MORFOLOGIK_RULE_EN_US","sentence":"^\\Q[width=]pediatric-brain-tumours/images/explainer.svg [Tile importances] Visualizing tile importances.\\E$"}
+{"rule":"MORFOLOGIK_RULE_EN_US","sentence":"^\\Q[width=]pediatric-brain-tumours/images/ccmil.svg [Clinical Context Multi-Instance Learning aggregator.]\\E$"}
+{"rule":"MORFOLOGIK_RULE_EN_US","sentence":"^\\QThe model is named SCLICOM (from Self-supervised pre-training and CLInical COntext-aware Multi-instance learning).\\E$"}
+{"rule":"HE_THE","sentence":"^\\QThree different masking algorithm designed for HE pathology\\E$"}
+{"rule":"COMMA_COMPOUND_SENTENCE_2","sentence":"^\\QFor every mask, the IoU is calculated and its mean is reported.\\E$"}
+{"rule":"A_NNS","sentence":"^\\QImproved FESI \\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q is an improvement of FESI where the input image is change to LAB color space and the L and A channels are changed to maximum intensity.\\E$"}
+{"rule":"MORFOLOGIK_RULE_EN_US","sentence":"^\\QFor EntropyMasker, the local entropy was calculated with a disk with a radius of 5 px as structure element.\\E$"}
+{"rule":"COMMA_PARENTHESIS_WHITESPACE","sentence":"^\\QThe loss function is typically the normalized-temperature cross-entropy loss (NT-Xent) and is defined as \\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q, where \\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q is the similarity , \\E(?:Dummy|Ina|Jimmy-)[0-9]+$"}
+{"rule":"COMMA_PARENTHESIS_WHITESPACE","sentence":"^\\QThe loss function is typically the normalized-temperature cross-entropy loss (NT-Xent) and is defined as \\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q, where \\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q is the similarity , \\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q \\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q the temperature to scale the similarity with, and \\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q the indicator function which maps all elements that satify it subscript to 1.\\E$"}
diff --git a/ANN/theory.tex b/ANN/theory.tex
@@ -34,7 +34,7 @@
 \section[CNN building blocks]{The building blocks of convolutional neural networks}
 
 \subsection{Artificial neural network}
-Neural network
+Neural network and backpropagation.
 
 \subsection{Convolutional layers}
 To distinguish a neural network from a convolutional neural network (CNN), at least one layer must be a convolution.

diff --git a/README.md b/README.md
@@ -169,15 +169,15 @@ Yet to be adapted to this study.
 
 - [ ] General introduction
     - [ ] Link skin and brain project
-    - [ ] Mention TRIPOD-AI
+    - [x] Mention TRIPOD-AI
 
 - [ ] Theoretical background of convolutional neural networks
 
 - [ ] Skinstression
     - [ ] Abstract
-    - [ ] Introduction
-        - [ ] Background (diagnostic + rationale for dev/val + purpose)
-        - [ ] Objectives (development + validation)
+    - [x] Introduction
+        - [x] Background (diagnostic + rationale for dev/val + purpose)
+        - [x] Objectives (development + validation)
     <!-- - [ ] Theory
         - [ ] Searching for a simple skin strain-stress model
             - [ ] Measurements
@@ -257,28 +257,26 @@ Yet to be adapted to this study.
     - [ ] Funding?
     - [ ] References
 
-- [ ] Pediatric brain tumours
+- [ ] Pediatric brain tumors
     - [ ] Abstract
     - [ ] Introduction
         - [ ] Background (diagnostic + rationale for dev/val + purpose)
         - [ ] Objectives (development + validation)
     - [ ] Theory
-      - [ ] Tumours
-      - [ ] Feature extraction
+      - [x] Feature extraction
       - [x] MIL
-        - [ ] Classical
+        - [x] Classical
         - [x] DeepMIL
         - [x] VarMIL
       - [x] Model performance
-        - [x] F1
-        - [x] PR-AUC
+        - [x] ROC Curve
+        - [x] PR Curve
+        - [x] PRG Curve
         - [x] IoU
-        - [ ] Informedness
-      - [ ] Sensitivity (recall)
     - [ ] Methods
-        - [ ] Sources of data
-            - [ ] source of data of training/val/test
-            - [ ] origin of data
+        - [x] Sources of data
+            - [x] source of data of training/val/test
+            - [x] origin of data
             - [x] dates of data collection
         - [x] Participants (study setting + eligibility + no specific treatment)
             - [x] study setting: tertiary care, Princess maxima center
@@ -294,14 +292,14 @@ Yet to be adapted to this study.
               - [ ] (optionally) denoising
               - [ ] ...
             - [ ] data augmentation
-        - [ ] Masking (mini study)
+        - [x] Masking (mini study)
         - [ ] Outcome of model
             - [x] What is predicted?
             - [x] How is prediction assessed?
             - [ ] (Why choosing this outcome measurement if alternatives exist?)
         - [ ] Predictors
             - [ ] Alternatives for predictors
-              - [ ] pathologist decision
+              - [x] pathologist decision
               - [ ] genetic marker
             - [ ] how does pathologist make decision?
             - [ ] source of predictors + known biases
@@ -315,18 +313,17 @@ Yet to be adapted to this study.
             - [ ] handling of predictors
             - [ ] Pre-selection of predictors prior to model building
             - [ ] rescaling/transformation on predictors
-            - [ ] type of model, building model + predictor selection + internal validation
-            - [ ] model ensembling techniques (if used)
-            - [ ] detailed model description
-            - [ ] initialization of model parameters
-              - [ ] simclr pretrain
-              - [ ] imagenet
+            - [x] type of model, building model + predictor selection + internal validation
+            - [x] detailed model description
+            - [x] initialization of model parameters
+              - [x] simclr pretrain
+              - [x] imagenet
             - [x] training approaches (hyperparameters, number of models trained, used datasets)
               - [x] hyperparameters trained on one split
               - [x] 5 splits, 5 models
             - [x] Measures to assess model performance + model comparison
-              - [x] PR-AUC
-              - [x] F1
+              - [x] AUPR
+              - [x] AUPRG
               - [x] simclr init vs imagenet init vs ...
             - [ ] model updating arising from validation
             - [x] how final model is selected
@@ -339,39 +336,32 @@ Yet to be adapted to this study.
               - [x] Pytorch (Lightning)
             - [x] setup used
     - [ ] Results
-        - [ ] Participants (flow, demographics, comparison train/val/test (predictor distributions and images))
-        - [ ] Model dev and per participant outcome in
-            - [ ] Hyperparameter tuning
-            - [ ] Training
-            - [ ] Testing
+        - [x] Participants (flow, demographics, comparison train/val/test (predictor distributions and images))
         - [ ] Model specification (present model + explain how it must be used)
         - [ ] Model performance
-            - [ ] F1/PR-AUC WITH confidence interval over splits
-            - [ ] results of analysis on performance errors
-        - [ ] Model updating (performance per update)
-        - [ ] Usability
+            - [x] AUPRG WITH confidence interval over splits
+            - [x] results of analysis on performance errors
+            - [x] Attention maps
+            - [x] Loss curves
+            - [x] nearest neighbours simclr
+            - [x] tsne simclr
+        - [x] Usability
             - [x] how and when in the clinical pathway to use the prediction AI
             - [x] how will the AI be integrated into the target setting + requirements (on-/offsite)
             - [x] how will poor data be assessed when implementing AI model
             - [x] any human interaction needed for data to be used with the model + expertise of users
-        - [ ] Sensitivity analysis
-          - [ ] Multiple splits
-          - [ ] identify input that increase output uncertainty
-          - [ ] Search for reasons behind misclassifications
-          - [ ] Model simplification (imagenet init enough?)
-          - [ ] Communicate with pathologist why model (doesn't) work(s) with explainability (attention of tiles)
-          - [ ] ...
-    - [ ] Discussion
-        - [ ] Limitations
-        - [ ] Interpretation (dev/val data performance + overall interpretation considering objectives/limitations/similar study results/other evidence)
-        - [ ] Implications
-            - [ ] potential use (also in a general way)
-            - [ ] how will clinical practice be different if using the AI and how will it be used
-    - [ ] Supplementary information
+    - [x] Discussion
+        - [x] Limitations
+          - [x] bad data? noise exclusion
+          - [x] overfitting fold 1
+        - [x] Interpretation (dev/val data performance + overall interpretation considering objectives/limitations/similar study results/other evidence)
+        - [x] Implications
+            - [x] potential use (also in a general way)
+            - [x] how will clinical practice be different if using the AI and how will it be used
+    - [x] Supplementary information
         - [ ] Data?
-        - [ ] Code
-    - [ ] Funding?
-    - [ ] References
+        - [x] Code
+    - [x] References
 
 - [ ] Discussion and conclusion
     - [ ] Discussion

diff --git a/frontbackmatter/titlepage.tex b/frontbackmatter/titlepage.tex
@@ -36,8 +36,7 @@
         Report Master Project Physics and Astronomy \\
         \emph{track Biophysics and Biophotonics} \\
         60 EC \\
-        % Conducted between 05--09--2022 and xx--xx--2023
-        Conducted between \DTMdisplaydate{2022}{09}{05}{-1} and DD-MM-YYYY%\DTMdisplaydate{2023}{07}{01}{-1}
+        Conducted between \DTMdisplaydate{2022}{9}{5}{-1} and \DTMdisplaydate{2023}{6}{30}{-1}
 
         \vfill
 
@@ -50,7 +49,7 @@
         \vfill
 
         % Change this date upon submission to fix the date.
-        \today
+        \DTMdisplaydate{2023}{6}{30}{-1}
 
         \vfill
 

diff --git a/library.bib b/library.bib
@@ -312,6 +312,8 @@ @article{Deng2022
   publisher  = {arXiv}
 }
 
+@article{Bankhead2017, title={QuPath: Open source software for digital pathology image analysis}, volume={7}, DOI={10.1038/s41598-017-17204-5}, number={1}, journal={Scientific Reports}, author={Bankhead, Peter and Loughrey, Maurice B. and Fernández, José A. and Dombrowski, Yvonne and McArt, Darragh G. and Dunne, Philip D. and McQuaid, Stephen and Gray, Ronan T. and Murray, Liam J. and Coleman, Helen G. and et al.}, year={2017}} 
+
 @article{Wang2022,
   author     = {Wang, Zhikang and Bi, Yue and Pan, Tong and Wang, Xiaoyu and Bain, Chris and Bassed, Richard and Imoto, Seiya and Yao, Jianhua and Song, Jiangning},
   date       = {2022},
@@ -338,6 +340,8 @@ @article{Shao2021
   publisher  = {arXiv}
 }
 
+@article{Bug2015, title={Foreground extraction for histopathological whole slide imaging}, DOI={10.1007/978-3-662-46224-9_72}, journal={Informatik aktuell}, author={Bug, Daniel and Feuerhake, Friedrich and Merhof, Dorit}, year={2015}, pages={419–424}} 
+@article{Song2023, title={An automatic entropy method to efficiently mask histology whole-slide images}, volume={13}, DOI={10.1038/s41598-023-29638-1}, number={1}, journal={Scientific Reports}, author={Song, Yipei and Cisternino, Francesco and Mekke, Joost M. and de Borst, Gert J. and de Kleijn, Dominique P. and Pasterkamp, Gerard and Vink, Aryan and Glastonbury, Craig A. and van der Laan, Sander W. and Miller, Clint L.}, year={2023}} 
 @article{Ba2016,
   author     = {Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey E.},
   date       = {2016},
@@ -351,6 +355,15 @@ @article{Ba2016
   publisher  = {arXiv}
 }
 
+@misc{Riasatian2020,
+      title={A Comparative Study of U-Net Topologies for Background Removal in Histopathology Images}, 
+      author={Abtin Riasatian and Maral Rasoolijaberi and Morteza Babaei and H. R. Tizhoosh},
+      year={2020},
+      eprint={2006.06531},
+      archivePrefix={arXiv},
+      primaryClass={eess.IV}
+}
+
 @article{Niu2019,
   author     = {Niu, Sheng-Yong and Guo, Lun-Zhang and Li, Yue and Wang, Tzung-Dau and Tsao, Yu and Liu, Tzu-Ming},
   date       = {2019},
@@ -485,6 +498,25 @@ @article{He2019
   publisher  = {arXiv}
 }
 
+@article{Caron2020,
+  author       = {Mathilde Caron and
+                  Ishan Misra and
+                  Julien Mairal and
+                  Priya Goyal and
+                  Piotr Bojanowski and
+                  Armand Joulin},
+  title        = {Unsupervised Learning of Visual Features by Contrasting Cluster Assignments},
+  journal      = {CoRR},
+  volume       = {abs/2006.09882},
+  year         = {2020},
+  url          = {https://arxiv.org/abs/2006.09882},
+  eprinttype    = {arXiv},
+  eprint       = {2006.09882},
+  timestamp    = {Tue, 23 Jun 2020 17:57:22 +0200},
+  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-09882.bib},
+  bibsource    = {dblp computer science bibliography, https://dblp.org}
+}
+
 @article{Chen2020c,
   author     = {Chen, Xinlei and Fan, Haoqi and Girshick, Ross and He, Kaiming},
   date       = {2020},
@@ -556,7 +588,8 @@ @Article{Khattak2019
   file         = {:khattak_survey_2019 - A Survey of Word Embeddings for Clinical Text.html:URL},
   keywords     = {Word embeddings, Clinical data, Natural language processing},
 }
-
+@article{Orr2020, title={Pathology, diagnostics, and classification of Medulloblastoma}, volume={30}, DOI={10.1111/bpa.12837}, number={3}, journal={Brain Pathology}, author={Orr, Brent A.}, year={2020}, pages={664–678}} 
+@article{Vasile2017, title={Human astrocytes: Structure and functions in the healthy brain}, volume={222}, DOI={10.1007/s00429-017-1383-5}, number={5}, journal={Brain Structure and Function}, author={Vasile, Flora and Dossi, Elena and Rouach, Nathalie}, year={2017}, pages={2017–2029}} 
 
 @article{Schirris2022,
   author       = {Schirris, Yoni and Gavves, Efstratios and Nederlof, Iris and Horlings, Hugo Mark and Teuwen, Jonas},
@@ -860,6 +893,70 @@ @software{Detlefsen2022
 year = {2022}
 }
 
+@inproceedings{Wolf2020,
+author = {Wolf, Thomas and Debut, Lysandre and Sanh, Victor and Chaumond, Julien and Delangue, Clement and Moi, Anthony and Cistac, Perric and Ma, Clara and Jernite, Yacine and Plu, Julien and Xu, Canwen and Le Scao, Teven and Gugger, Sylvain and Drame, Mariama and Lhoest, Quentin and Rush, Alexander M.},
+month = oct,
+pages = {38--45},
+publisher = {Association for Computational Linguistics},
+title = {{Transformers: State-of-the-Art Natural Language Processing}},
+url = {https://www.aclweb.org/anthology/2020.emnlp-demos.6},
+year = {2020}
+}
+
+@misc{imgclsmob2023,
+  author = {imgclsmob contributors},
+  title = {imgclsmob},
+  year = {2023},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/osmr/imgclsmob}}
+}
+
+@misc{Rohanian2023,
+  doi = {10.48550/ARXIV.2302.04725},
+  url = {https://arxiv.org/abs/2302.04725},
+  author = {Rohanian, Omid and Nouriborji, Mohammadmahdi and Jauncey, Hannah and Kouchaki, Samaneh and Group, ISARIC Clinical Characterisation and Clifton, Lei and Merson, Laura and Clifton, David A.},
+  keywords = {Computation and Language (cs.CL), Artificial Intelligence (cs.AI), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences, I.2.7, 68T50},
+  title = {Lightweight Transformers for Clinical Natural Language Processing},
+  publisher = {arXiv},
+  year = {2023},
+  copyright = {arXiv.org perpetual, non-exclusive license}
+}
+
+@incollection{Flach2015,
+title = {Precision-Recall-Gain Curves: PR Analysis Done Right},
+author = {Flach, Peter and Kull, Meelis},
+booktitle = {Advances in Neural Information Processing Systems 28},
+editor = {C. Cortes and N. D. Lawrence and D. D. Lee and M. Sugiyama and R. Garnett},
+pages = {838--846},
+year = {2015},
+publisher = {Curran Associates, Inc.},
+url = {http://papers.nips.cc/paper/5867-precision-recall-gain-curves-pr-analysis-done-right.pdf}
+}
+
+@software{Laan2022,
+  author       = {Sander W. van der Laan and
+                  CG and
+                  Yipei Song},
+  title        = {CirculatoryHealth/EntropyMasker: v1.0.1},
+  month        = sep,
+  year         = 2022,
+  publisher    = {Zenodo},
+  version      = {v1.0.1},
+  doi          = {10.5281/zenodo.7050041},
+  url          = {https://doi.org/10.5281/zenodo.7050041}
+}
+
+@software{Teuwen2023,
+author = {Netherlands Cancer Institute and contributors},
+% doi = {},
+license = {Apache-2.0},
+month = mar,
+title = {{Deep Learning Utilities for Pathology}},
+url = {https://github.com/NKI-AI/dlup},
+year = {2023}
+}
+
 @article{Akiba2019,
   author     = {Takuya Akiba and
                 Shotaro Sano and