diff --git a/G/GX11_Matrix_Inverse_for_Replication.ipynb b/G/GX11_Matrix_Inverse_for_Replication.ipynb new file mode 100644 index 0000000..28c9eab --- /dev/null +++ b/G/GX11_Matrix_Inverse_for_Replication.ipynb @@ -0,0 +1,2047 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 504, + "id": "20d8a525-8121-417b-8bd8-632e75beef12", + "metadata": {}, + "outputs": [], + "source": [ + "### Script to apply feature factor weights identified in Munich Data (Azimuth Annotation) on Groningen data for computation of factor values\n", + "### Replication Script" + ] + }, + { + "cell_type": "markdown", + "id": "528702ab-7be4-42ec-a5a5-8fa5647ddf6b", + "metadata": { + "tags": [] + }, + "source": [ + "# Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 505, + "id": "51def42a-4a18-4c27-bf37-ee082b4a1363", + "metadata": {}, + "outputs": [], + "source": [ + "library(dplyr)\n", + "library(stringr)\n", + "library(Seurat)" + ] + }, + { + "cell_type": "code", + "execution_count": 506, + "id": "6dc51e69-70e6-4bf2-a7d1-b5fad935cc12", + "metadata": {}, + "outputs": [], + "source": [ + "library(MOFA2)" + ] + }, + { + "cell_type": "code", + "execution_count": 507, + "id": "20ea0b90-9663-46a8-9bdf-d994aa28b25c", + "metadata": {}, + "outputs": [], + "source": [ + "library(ggplot2)\n", + "library(reticulate)\n", + "library(reshape2)\n", + "library(ggpubr)" + ] + }, + { + "cell_type": "markdown", + "id": "3d97489e-c741-4853-8df6-01a1b57959af", + "metadata": {}, + "source": [ + "# Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 508, + "id": "aca812f2-f0bf-43ba-ba91-3f012033d097", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = '../data/current'" + ] + }, + { + "cell_type": "code", + "execution_count": 509, + "id": "ac48c1d3-04a7-424f-bcb3-7120a31b7f51", + "metadata": {}, + "outputs": [], + "source": [ + "output_path = '../results/current'" + ] + }, + { + "cell_type": "markdown", + "id": "0d2a3050-f8f6-48b8-be0f-e2a92053947d", + "metadata": {}, + "source": [ + "# Load data" + ] + }, + { + "cell_type": "markdown", + "id": "aacd4ca7-cfaf-4205-8b1c-eb3e1f58d607", + "metadata": {}, + "source": [ + "## Munich factor data" + ] + }, + { + "cell_type": "code", + "execution_count": 510, + "id": "e222c0b3-6316-47ff-bf05-6fd2fd88bd20", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A data.frame: 6 × 21
Factor1Factor2Factor3Factor4Factor5Factor6Factor7Factor8Factor9Factor10Factor12Factor13Factor14Factor15Factor16Factor17Factor18Factor19Factor20sample_id
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr>
1 0.2822028-1.2559541-0.53390026 1.1204467-0.466692231.2484388 0.20759032-0.364617945-0.40242258 0.78832135 0.05325919 0.27738239 0.13303365-0.76325230-0.04660508 0.95971046 0.431779125-0.46741569-0.45888943k1
2 0.6928825-1.0380557 0.33933545 0.2294335 0.105209400.8011520 0.36775010 1.417721825-0.58040927 0.04100113 0.22233569 0.31122340 0.04224790 0.14198857-0.04503017-0.04460783-0.186394597-0.26701984-0.03163122k10
3-1.4468008-0.3123073 0.68857816-0.6941110 0.080076711.1839352-0.19331847 0.036830866 0.21506213 0.03104541-0.70126043 0.17834780-0.05727359 0.05895828 0.14746604-0.09273922 0.172580802 0.02947317-0.08184409k11
4-2.5657673-0.7162750 0.99399349-1.0877413-0.897433120.5699462-0.08501748-0.168697397 0.03162881-0.03488845-1.09175933 0.48647681-0.39887633-0.04494189 0.54622186 0.16467157-0.149100299 0.43129440-0.05209181k12
5-0.2657781-0.1293536 0.30498721-1.0304524 0.045345611.3639342 0.02831011-0.003340543 0.13941860 0.16450688-0.33919680-0.66542364 0.03761243 0.15980442 0.11184416 0.03747477 0.099951109 0.24887446 0.27424704k13
6 1.0376091-1.4776762-0.03257904-0.5101523 0.120843200.1672229 0.24861033 1.302995675 0.07186483 0.02188782 0.56497282-0.05101193-0.29314109 0.26959618-0.70380578 0.07279563 0.004502305 0.38742801 0.05504769k14
\n" + ], + "text/latex": [ + "A data.frame: 6 × 21\n", + "\\begin{tabular}{r|lllllllllllllllllllll}\n", + " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & ⋯ & Factor12 & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20 & sample\\_id\\\\\n", + " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t1 & 0.2822028 & -1.2559541 & -0.53390026 & 1.1204467 & -0.46669223 & 1.2484388 & 0.20759032 & -0.364617945 & -0.40242258 & 0.78832135 & ⋯ & 0.05325919 & 0.27738239 & 0.13303365 & -0.76325230 & -0.04660508 & 0.95971046 & 0.431779125 & -0.46741569 & -0.45888943 & k1 \\\\\n", + "\t2 & 0.6928825 & -1.0380557 & 0.33933545 & 0.2294335 & 0.10520940 & 0.8011520 & 0.36775010 & 1.417721825 & -0.58040927 & 0.04100113 & ⋯ & 0.22233569 & 0.31122340 & 0.04224790 & 0.14198857 & -0.04503017 & -0.04460783 & -0.186394597 & -0.26701984 & -0.03163122 & k10\\\\\n", + "\t3 & -1.4468008 & -0.3123073 & 0.68857816 & -0.6941110 & 0.08007671 & 1.1839352 & -0.19331847 & 0.036830866 & 0.21506213 & 0.03104541 & ⋯ & -0.70126043 & 0.17834780 & -0.05727359 & 0.05895828 & 0.14746604 & -0.09273922 & 0.172580802 & 0.02947317 & -0.08184409 & k11\\\\\n", + "\t4 & -2.5657673 & -0.7162750 & 0.99399349 & -1.0877413 & -0.89743312 & 0.5699462 & -0.08501748 & -0.168697397 & 0.03162881 & -0.03488845 & ⋯ & -1.09175933 & 0.48647681 & -0.39887633 & -0.04494189 & 0.54622186 & 0.16467157 & -0.149100299 & 0.43129440 & -0.05209181 & k12\\\\\n", + "\t5 & -0.2657781 & -0.1293536 & 0.30498721 & -1.0304524 & 0.04534561 & 1.3639342 & 0.02831011 & -0.003340543 & 0.13941860 & 0.16450688 & ⋯ & -0.33919680 & -0.66542364 & 0.03761243 & 0.15980442 & 0.11184416 & 0.03747477 & 0.099951109 & 0.24887446 & 0.27424704 & k13\\\\\n", + "\t6 & 1.0376091 & -1.4776762 & -0.03257904 & -0.5101523 & 0.12084320 & 0.1672229 & 0.24861033 & 1.302995675 & 0.07186483 & 0.02188782 & ⋯ & 0.56497282 & -0.05101193 & -0.29314109 & 0.26959618 & -0.70380578 & 0.07279563 & 0.004502305 & 0.38742801 & 0.05504769 & k14\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 6 × 21\n", + "\n", + "| | Factor1 <dbl> | Factor2 <dbl> | Factor3 <dbl> | Factor4 <dbl> | Factor5 <dbl> | Factor6 <dbl> | Factor7 <dbl> | Factor8 <dbl> | Factor9 <dbl> | Factor10 <dbl> | ⋯ ⋯ | Factor12 <dbl> | Factor13 <dbl> | Factor14 <dbl> | Factor15 <dbl> | Factor16 <dbl> | Factor17 <dbl> | Factor18 <dbl> | Factor19 <dbl> | Factor20 <dbl> | sample_id <chr> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| 1 | 0.2822028 | -1.2559541 | -0.53390026 | 1.1204467 | -0.46669223 | 1.2484388 | 0.20759032 | -0.364617945 | -0.40242258 | 0.78832135 | ⋯ | 0.05325919 | 0.27738239 | 0.13303365 | -0.76325230 | -0.04660508 | 0.95971046 | 0.431779125 | -0.46741569 | -0.45888943 | k1 |\n", + "| 2 | 0.6928825 | -1.0380557 | 0.33933545 | 0.2294335 | 0.10520940 | 0.8011520 | 0.36775010 | 1.417721825 | -0.58040927 | 0.04100113 | ⋯ | 0.22233569 | 0.31122340 | 0.04224790 | 0.14198857 | -0.04503017 | -0.04460783 | -0.186394597 | -0.26701984 | -0.03163122 | k10 |\n", + "| 3 | -1.4468008 | -0.3123073 | 0.68857816 | -0.6941110 | 0.08007671 | 1.1839352 | -0.19331847 | 0.036830866 | 0.21506213 | 0.03104541 | ⋯ | -0.70126043 | 0.17834780 | -0.05727359 | 0.05895828 | 0.14746604 | -0.09273922 | 0.172580802 | 0.02947317 | -0.08184409 | k11 |\n", + "| 4 | -2.5657673 | -0.7162750 | 0.99399349 | -1.0877413 | -0.89743312 | 0.5699462 | -0.08501748 | -0.168697397 | 0.03162881 | -0.03488845 | ⋯ | -1.09175933 | 0.48647681 | -0.39887633 | -0.04494189 | 0.54622186 | 0.16467157 | -0.149100299 | 0.43129440 | -0.05209181 | k12 |\n", + "| 5 | -0.2657781 | -0.1293536 | 0.30498721 | -1.0304524 | 0.04534561 | 1.3639342 | 0.02831011 | -0.003340543 | 0.13941860 | 0.16450688 | ⋯ | -0.33919680 | -0.66542364 | 0.03761243 | 0.15980442 | 0.11184416 | 0.03747477 | 0.099951109 | 0.24887446 | 0.27424704 | k13 |\n", + "| 6 | 1.0376091 | -1.4776762 | -0.03257904 | -0.5101523 | 0.12084320 | 0.1672229 | 0.24861033 | 1.302995675 | 0.07186483 | 0.02188782 | ⋯ | 0.56497282 | -0.05101193 | -0.29314109 | 0.26959618 | -0.70380578 | 0.07279563 | 0.004502305 | 0.38742801 | 0.05504769 | k14 |\n", + "\n" + ], + "text/plain": [ + " Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 \n", + "1 0.2822028 -1.2559541 -0.53390026 1.1204467 -0.46669223 1.2484388\n", + "2 0.6928825 -1.0380557 0.33933545 0.2294335 0.10520940 0.8011520\n", + "3 -1.4468008 -0.3123073 0.68857816 -0.6941110 0.08007671 1.1839352\n", + "4 -2.5657673 -0.7162750 0.99399349 -1.0877413 -0.89743312 0.5699462\n", + "5 -0.2657781 -0.1293536 0.30498721 -1.0304524 0.04534561 1.3639342\n", + "6 1.0376091 -1.4776762 -0.03257904 -0.5101523 0.12084320 0.1672229\n", + " Factor7 Factor8 Factor9 Factor10 ⋯ Factor12 Factor13 \n", + "1 0.20759032 -0.364617945 -0.40242258 0.78832135 ⋯ 0.05325919 0.27738239\n", + "2 0.36775010 1.417721825 -0.58040927 0.04100113 ⋯ 0.22233569 0.31122340\n", + "3 -0.19331847 0.036830866 0.21506213 0.03104541 ⋯ -0.70126043 0.17834780\n", + "4 -0.08501748 -0.168697397 0.03162881 -0.03488845 ⋯ -1.09175933 0.48647681\n", + "5 0.02831011 -0.003340543 0.13941860 0.16450688 ⋯ -0.33919680 -0.66542364\n", + "6 0.24861033 1.302995675 0.07186483 0.02188782 ⋯ 0.56497282 -0.05101193\n", + " Factor14 Factor15 Factor16 Factor17 Factor18 Factor19 \n", + "1 0.13303365 -0.76325230 -0.04660508 0.95971046 0.431779125 -0.46741569\n", + "2 0.04224790 0.14198857 -0.04503017 -0.04460783 -0.186394597 -0.26701984\n", + "3 -0.05727359 0.05895828 0.14746604 -0.09273922 0.172580802 0.02947317\n", + "4 -0.39887633 -0.04494189 0.54622186 0.16467157 -0.149100299 0.43129440\n", + "5 0.03761243 0.15980442 0.11184416 0.03747477 0.099951109 0.24887446\n", + "6 -0.29314109 0.26959618 -0.70380578 0.07279563 0.004502305 0.38742801\n", + " Factor20 sample_id\n", + "1 -0.45888943 k1 \n", + "2 -0.03163122 k10 \n", + "3 -0.08184409 k11 \n", + "4 -0.05209181 k12 \n", + "5 0.27424704 k13 \n", + "6 0.05504769 k14 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "factor_data_mu = read.csv(paste0(data_path, \"/results/G-Analysis/Factor_Data_V_AZIMUTH_INTEGRATED_FALSE.csv\"))\n", + "head(factor_data_mu) # alternative: Azimuth" + ] + }, + { + "cell_type": "markdown", + "id": "e943149d-edbb-4728-ad84-5ce37af2f04d", + "metadata": {}, + "source": [ + "## Munich feature data" + ] + }, + { + "cell_type": "code", + "execution_count": 518, + "id": "5d76cc1d-c487-476e-8f07-a751703dc1ff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A data.frame: 6 × 22
Factor1Factor2Factor3Factor4Factor5Factor6Factor7Factor8Factor9Factor10Factor13Factor14Factor15Factor16Factor17Factor18Factor19Factor20typevariable_name
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr><chr>
1-0.020691979 0.429515363 0.027756312-0.02712267 0.010438305-0.41947963 0.01169812-0.11248843 0.28675204 0.0143456634 0.030966401 0.100109005 0.88420419-0.002553659-0.0002680875-0.009308180 0.003408993-0.002008598clinical_dataCK
2-0.001361949 0.379039595 0.067498675-0.06765357-0.004929694-0.35290690 0.01561328-0.01496201 0.37774439-0.0004131968 0.003047890 0.116174613 0.13072112-0.002865083-0.0003610348-0.008757576 0.004192790-0.010507527clinical_dataCK_MB
3-0.052856754 0.028641115-0.891960581 0.08775674-0.024844550-0.40121965-0.08041434-0.01238686-0.02412622-0.0140435079 0.008406062 0.176649898 0.49146629-0.007964040-0.0036451100 0.014286835-0.002485743 0.009543655clinical_dataCRP
4-0.026301185 0.327099511-0.314584680 0.08342660 0.014994718-0.54065063 0.01323074-0.31178849 0.18761051-0.0154090508 0.117386061 0.216599600 0.78670880-0.007199139 0.0443780006-0.003896878 0.004977378-0.003467445clinical_dataTroponin
5 0.015548781-0.004530304 0.082117144-0.02945236-0.213756108-0.10763285 0.07139866 0.03609601-0.03866970 0.0178796863 0.262153876 0.006848431-0.05089458-0.020458287-0.1043864240 0.075482525-0.037997785-0.029645511cytokine BCA1__CXCL13
6 0.015036488-0.190978857 0.001049874 0.01339136 0.028743482 0.01589764 0.07016133-0.02435615 0.01165956-0.1114046521-0.001984003-0.050574208-0.04242095-0.004868774-0.1907216033 0.364199683 0.085955948-0.028747833cytokine CTACK__CCL27
\n" + ], + "text/latex": [ + "A data.frame: 6 × 22\n", + "\\begin{tabular}{r|lllllllllllllllllllll}\n", + " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & ⋯ & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20 & type & variable\\_name\\\\\n", + " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t1 & -0.020691979 & 0.429515363 & 0.027756312 & -0.02712267 & 0.010438305 & -0.41947963 & 0.01169812 & -0.11248843 & 0.28675204 & 0.0143456634 & ⋯ & 0.030966401 & 0.100109005 & 0.88420419 & -0.002553659 & -0.0002680875 & -0.009308180 & 0.003408993 & -0.002008598 & clinical\\_data & CK \\\\\n", + "\t2 & -0.001361949 & 0.379039595 & 0.067498675 & -0.06765357 & -0.004929694 & -0.35290690 & 0.01561328 & -0.01496201 & 0.37774439 & -0.0004131968 & ⋯ & 0.003047890 & 0.116174613 & 0.13072112 & -0.002865083 & -0.0003610348 & -0.008757576 & 0.004192790 & -0.010507527 & clinical\\_data & CK\\_MB \\\\\n", + "\t3 & -0.052856754 & 0.028641115 & -0.891960581 & 0.08775674 & -0.024844550 & -0.40121965 & -0.08041434 & -0.01238686 & -0.02412622 & -0.0140435079 & ⋯ & 0.008406062 & 0.176649898 & 0.49146629 & -0.007964040 & -0.0036451100 & 0.014286835 & -0.002485743 & 0.009543655 & clinical\\_data & CRP \\\\\n", + "\t4 & -0.026301185 & 0.327099511 & -0.314584680 & 0.08342660 & 0.014994718 & -0.54065063 & 0.01323074 & -0.31178849 & 0.18761051 & -0.0154090508 & ⋯ & 0.117386061 & 0.216599600 & 0.78670880 & -0.007199139 & 0.0443780006 & -0.003896878 & 0.004977378 & -0.003467445 & clinical\\_data & Troponin \\\\\n", + "\t5 & 0.015548781 & -0.004530304 & 0.082117144 & -0.02945236 & -0.213756108 & -0.10763285 & 0.07139866 & 0.03609601 & -0.03866970 & 0.0178796863 & ⋯ & 0.262153876 & 0.006848431 & -0.05089458 & -0.020458287 & -0.1043864240 & 0.075482525 & -0.037997785 & -0.029645511 & cytokine & BCA1\\_\\_CXCL13\\\\\n", + "\t6 & 0.015036488 & -0.190978857 & 0.001049874 & 0.01339136 & 0.028743482 & 0.01589764 & 0.07016133 & -0.02435615 & 0.01165956 & -0.1114046521 & ⋯ & -0.001984003 & -0.050574208 & -0.04242095 & -0.004868774 & -0.1907216033 & 0.364199683 & 0.085955948 & -0.028747833 & cytokine & CTACK\\_\\_CCL27\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 6 × 22\n", + "\n", + "| | Factor1 <dbl> | Factor2 <dbl> | Factor3 <dbl> | Factor4 <dbl> | Factor5 <dbl> | Factor6 <dbl> | Factor7 <dbl> | Factor8 <dbl> | Factor9 <dbl> | Factor10 <dbl> | ⋯ ⋯ | Factor13 <dbl> | Factor14 <dbl> | Factor15 <dbl> | Factor16 <dbl> | Factor17 <dbl> | Factor18 <dbl> | Factor19 <dbl> | Factor20 <dbl> | type <chr> | variable_name <chr> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| 1 | -0.020691979 | 0.429515363 | 0.027756312 | -0.02712267 | 0.010438305 | -0.41947963 | 0.01169812 | -0.11248843 | 0.28675204 | 0.0143456634 | ⋯ | 0.030966401 | 0.100109005 | 0.88420419 | -0.002553659 | -0.0002680875 | -0.009308180 | 0.003408993 | -0.002008598 | clinical_data | CK |\n", + "| 2 | -0.001361949 | 0.379039595 | 0.067498675 | -0.06765357 | -0.004929694 | -0.35290690 | 0.01561328 | -0.01496201 | 0.37774439 | -0.0004131968 | ⋯ | 0.003047890 | 0.116174613 | 0.13072112 | -0.002865083 | -0.0003610348 | -0.008757576 | 0.004192790 | -0.010507527 | clinical_data | CK_MB |\n", + "| 3 | -0.052856754 | 0.028641115 | -0.891960581 | 0.08775674 | -0.024844550 | -0.40121965 | -0.08041434 | -0.01238686 | -0.02412622 | -0.0140435079 | ⋯ | 0.008406062 | 0.176649898 | 0.49146629 | -0.007964040 | -0.0036451100 | 0.014286835 | -0.002485743 | 0.009543655 | clinical_data | CRP |\n", + "| 4 | -0.026301185 | 0.327099511 | -0.314584680 | 0.08342660 | 0.014994718 | -0.54065063 | 0.01323074 | -0.31178849 | 0.18761051 | -0.0154090508 | ⋯ | 0.117386061 | 0.216599600 | 0.78670880 | -0.007199139 | 0.0443780006 | -0.003896878 | 0.004977378 | -0.003467445 | clinical_data | Troponin |\n", + "| 5 | 0.015548781 | -0.004530304 | 0.082117144 | -0.02945236 | -0.213756108 | -0.10763285 | 0.07139866 | 0.03609601 | -0.03866970 | 0.0178796863 | ⋯ | 0.262153876 | 0.006848431 | -0.05089458 | -0.020458287 | -0.1043864240 | 0.075482525 | -0.037997785 | -0.029645511 | cytokine | BCA1__CXCL13 |\n", + "| 6 | 0.015036488 | -0.190978857 | 0.001049874 | 0.01339136 | 0.028743482 | 0.01589764 | 0.07016133 | -0.02435615 | 0.01165956 | -0.1114046521 | ⋯ | -0.001984003 | -0.050574208 | -0.04242095 | -0.004868774 | -0.1907216033 | 0.364199683 | 0.085955948 | -0.028747833 | cytokine | CTACK__CCL27 |\n", + "\n" + ], + "text/plain": [ + " Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 \n", + "1 -0.020691979 0.429515363 0.027756312 -0.02712267 0.010438305 -0.41947963\n", + "2 -0.001361949 0.379039595 0.067498675 -0.06765357 -0.004929694 -0.35290690\n", + "3 -0.052856754 0.028641115 -0.891960581 0.08775674 -0.024844550 -0.40121965\n", + "4 -0.026301185 0.327099511 -0.314584680 0.08342660 0.014994718 -0.54065063\n", + "5 0.015548781 -0.004530304 0.082117144 -0.02945236 -0.213756108 -0.10763285\n", + "6 0.015036488 -0.190978857 0.001049874 0.01339136 0.028743482 0.01589764\n", + " Factor7 Factor8 Factor9 Factor10 ⋯ Factor13 Factor14 \n", + "1 0.01169812 -0.11248843 0.28675204 0.0143456634 ⋯ 0.030966401 0.100109005\n", + "2 0.01561328 -0.01496201 0.37774439 -0.0004131968 ⋯ 0.003047890 0.116174613\n", + "3 -0.08041434 -0.01238686 -0.02412622 -0.0140435079 ⋯ 0.008406062 0.176649898\n", + "4 0.01323074 -0.31178849 0.18761051 -0.0154090508 ⋯ 0.117386061 0.216599600\n", + "5 0.07139866 0.03609601 -0.03866970 0.0178796863 ⋯ 0.262153876 0.006848431\n", + "6 0.07016133 -0.02435615 0.01165956 -0.1114046521 ⋯ -0.001984003 -0.050574208\n", + " Factor15 Factor16 Factor17 Factor18 Factor19 Factor20 \n", + "1 0.88420419 -0.002553659 -0.0002680875 -0.009308180 0.003408993 -0.002008598\n", + "2 0.13072112 -0.002865083 -0.0003610348 -0.008757576 0.004192790 -0.010507527\n", + "3 0.49146629 -0.007964040 -0.0036451100 0.014286835 -0.002485743 0.009543655\n", + "4 0.78670880 -0.007199139 0.0443780006 -0.003896878 0.004977378 -0.003467445\n", + "5 -0.05089458 -0.020458287 -0.1043864240 0.075482525 -0.037997785 -0.029645511\n", + "6 -0.04242095 -0.004868774 -0.1907216033 0.364199683 0.085955948 -0.028747833\n", + " type variable_name\n", + "1 clinical_data CK \n", + "2 clinical_data CK_MB \n", + "3 clinical_data CRP \n", + "4 clinical_data Troponin \n", + "5 cytokine BCA1__CXCL13 \n", + "6 cytokine CTACK__CCL27 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "feature_data_mu = read.csv(paste0(data_path, \"/results/G-Analysis/Weight_Data_V_AZIMUTH_INTEGRATED_FALSE.csv\"))\n", + "head(feature_data_mu) # alternative: Munich" + ] + }, + { + "cell_type": "markdown", + "id": "9b591949-a52d-4494-b5ef-a8dab5c856bc", + "metadata": { + "tags": [] + }, + "source": [ + "## Pseudobulk and Normalized Input Data Groningen" + ] + }, + { + "cell_type": "code", + "execution_count": 519, + "id": "1f8da877-10dc-45b9-9b73-0808553504e6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A data.frame: 3 × 5
Xsample_idvariablevaluetype
<int><chr><chr><dbl><chr>
11Baseline.TEST_1 B.cell__ACTB-0.1451209single_cell
22Baseline.TEST_12B.cell__ACTB 0.2310092single_cell
33Baseline.TEST_14B.cell__ACTB-0.2933812single_cell
\n" + ], + "text/latex": [ + "A data.frame: 3 × 5\n", + "\\begin{tabular}{r|lllll}\n", + " & X & sample\\_id & variable & value & type\\\\\n", + " & & & & & \\\\\n", + "\\hline\n", + "\t1 & 1 & Baseline.TEST\\_1 & B.cell\\_\\_ACTB & -0.1451209 & single\\_cell\\\\\n", + "\t2 & 2 & Baseline.TEST\\_12 & B.cell\\_\\_ACTB & 0.2310092 & single\\_cell\\\\\n", + "\t3 & 3 & Baseline.TEST\\_14 & B.cell\\_\\_ACTB & -0.2933812 & single\\_cell\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 3 × 5\n", + "\n", + "| | X <int> | sample_id <chr> | variable <chr> | value <dbl> | type <chr> |\n", + "|---|---|---|---|---|---|\n", + "| 1 | 1 | Baseline.TEST_1 | B.cell__ACTB | -0.1451209 | single_cell |\n", + "| 2 | 2 | Baseline.TEST_12 | B.cell__ACTB | 0.2310092 | single_cell |\n", + "| 3 | 3 | Baseline.TEST_14 | B.cell__ACTB | -0.2933812 | single_cell |\n", + "\n" + ], + "text/plain": [ + " X sample_id variable value type \n", + "1 1 Baseline.TEST_1 B.cell__ACTB -0.1451209 single_cell\n", + "2 2 Baseline.TEST_12 B.cell__ACTB 0.2310092 single_cell\n", + "3 3 Baseline.TEST_14 B.cell__ACTB -0.2933812 single_cell" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1] \"2023-04-25 16:06:43 UTC\"\n" + ] + } + ], + "source": [ + "path = paste0(output_path, '/Reproduction_GR/Combined_Data_V_AZIMUTH_REPRODUCTION_v2','.csv')\n", + "input_gr = read.csv(path)\n", + "head(input_gr, n=3)\n", + "print(file.info(path)$mtime)" + ] + }, + { + "cell_type": "code", + "execution_count": 520, + "id": "a0d58dc9-25ed-4e81-ac9c-6e667faeabfb", + "metadata": {}, + "outputs": [], + "source": [ + "input_gr$feature = input_gr$variable" + ] + }, + { + "cell_type": "markdown", + "id": "4514aed7-d20f-4751-9b56-bd0348eaa8be", + "metadata": {}, + "source": [ + "## Outcome classification Groningen" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0dfe35b4", + "metadata": {}, + "outputs": [], + "source": [ + "classification_gr = read.csv(paste0(data_path, \"/results/Reproduction_GR/groningen_output_class.csv\"))" + ] + }, + { + "cell_type": "markdown", + "id": "e21f7bb2-a81a-435a-9fb9-5134fb28e839", + "metadata": {}, + "source": [ + "# Analyze" + ] + }, + { + "cell_type": "markdown", + "id": "0e13ebb1-928a-4f76-850e-7fe3062d67fe", + "metadata": {}, + "source": [ + "## Match Features" + ] + }, + { + "cell_type": "code", + "execution_count": 522, + "id": "332502ac-dec8-4dce-9385-d5730a7e48de", + "metadata": {}, + "outputs": [], + "source": [ + "### Ensure that in both datasets feature annotations including cell-type have same format" + ] + }, + { + "cell_type": "code", + "execution_count": 523, + "id": "5ec775f1-b707-43bc-9e83-dab914842c14", + "metadata": {}, + "outputs": [], + "source": [ + "input_gr$cell_type = str_replace(input_gr$variable,\n", + " '__.*', '')" + ] + }, + { + "cell_type": "code", + "execution_count": 524, + "id": "df6575ed-a213-483a-8b5e-68db56940787", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
  1. 'B.cell'
  2. 'CD4_CTL'
  3. 'CD4_Naive'
  4. 'CD4_TCM'
  5. 'CD4_TEM'
  6. 'CD8_Naive'
  7. 'CD8_TEM'
  8. 'cDC2'
  9. 'cMono'
  10. 'MAIT'
  11. 'ncMono'
  12. 'NKdim'
  13. 'Treg'
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 'B.cell'\n", + "\\item 'CD4\\_CTL'\n", + "\\item 'CD4\\_Naive'\n", + "\\item 'CD4\\_TCM'\n", + "\\item 'CD4\\_TEM'\n", + "\\item 'CD8\\_Naive'\n", + "\\item 'CD8\\_TEM'\n", + "\\item 'cDC2'\n", + "\\item 'cMono'\n", + "\\item 'MAIT'\n", + "\\item 'ncMono'\n", + "\\item 'NKdim'\n", + "\\item 'Treg'\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 'B.cell'\n", + "2. 'CD4_CTL'\n", + "3. 'CD4_Naive'\n", + "4. 'CD4_TCM'\n", + "5. 'CD4_TEM'\n", + "6. 'CD8_Naive'\n", + "7. 'CD8_TEM'\n", + "8. 'cDC2'\n", + "9. 'cMono'\n", + "10. 'MAIT'\n", + "11. 'ncMono'\n", + "12. 'NKdim'\n", + "13. 'Treg'\n", + "\n", + "\n" + ], + "text/plain": [ + " [1] \"B.cell\" \"CD4_CTL\" \"CD4_Naive\" \"CD4_TCM\" \"CD4_TEM\" \"CD8_Naive\"\n", + " [7] \"CD8_TEM\" \"cDC2\" \"cMono\" \"MAIT\" \"ncMono\" \"NKdim\" \n", + "[13] \"Treg\" " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "unique(input_gr$cell_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 525, + "id": "13a36331-18d3-4146-b31d-633122f61d96", + "metadata": {}, + "outputs": [], + "source": [ + "## remove non-sc features from Munich data" + ] + }, + { + "cell_type": "code", + "execution_count": 526, + "id": "ed1a479e-c9c4-48a4-92dc-539e2f50c788", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "
A data.frame: 2 × 22
Factor1Factor2Factor3Factor4Factor5Factor6Factor7Factor8Factor9Factor10Factor13Factor14Factor15Factor16Factor17Factor18Factor19Factor20typevariable_name
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr><chr>
1-0.0206919790.42951540.02775631-0.02712267 0.010438305-0.41947960.01169812-0.112488430.2867520 0.01434566340.030966400.10010900.8842042-0.002553659-0.0002680875-0.0093081800.003408993-0.002008598clinical_dataCK
2-0.0013619490.37903960.06749868-0.06765357-0.004929694-0.35290690.01561328-0.014962010.3777444-0.00041319680.003047890.11617460.1307211-0.002865083-0.0003610348-0.0087575760.004192790-0.010507527clinical_dataCK_MB
\n" + ], + "text/latex": [ + "A data.frame: 2 × 22\n", + "\\begin{tabular}{r|lllllllllllllllllllll}\n", + " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & ⋯ & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20 & type & variable\\_name\\\\\n", + " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t1 & -0.020691979 & 0.4295154 & 0.02775631 & -0.02712267 & 0.010438305 & -0.4194796 & 0.01169812 & -0.11248843 & 0.2867520 & 0.0143456634 & ⋯ & 0.03096640 & 0.1001090 & 0.8842042 & -0.002553659 & -0.0002680875 & -0.009308180 & 0.003408993 & -0.002008598 & clinical\\_data & CK \\\\\n", + "\t2 & -0.001361949 & 0.3790396 & 0.06749868 & -0.06765357 & -0.004929694 & -0.3529069 & 0.01561328 & -0.01496201 & 0.3777444 & -0.0004131968 & ⋯ & 0.00304789 & 0.1161746 & 0.1307211 & -0.002865083 & -0.0003610348 & -0.008757576 & 0.004192790 & -0.010507527 & clinical\\_data & CK\\_MB\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 2 × 22\n", + "\n", + "| | Factor1 <dbl> | Factor2 <dbl> | Factor3 <dbl> | Factor4 <dbl> | Factor5 <dbl> | Factor6 <dbl> | Factor7 <dbl> | Factor8 <dbl> | Factor9 <dbl> | Factor10 <dbl> | ⋯ ⋯ | Factor13 <dbl> | Factor14 <dbl> | Factor15 <dbl> | Factor16 <dbl> | Factor17 <dbl> | Factor18 <dbl> | Factor19 <dbl> | Factor20 <dbl> | type <chr> | variable_name <chr> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| 1 | -0.020691979 | 0.4295154 | 0.02775631 | -0.02712267 | 0.010438305 | -0.4194796 | 0.01169812 | -0.11248843 | 0.2867520 | 0.0143456634 | ⋯ | 0.03096640 | 0.1001090 | 0.8842042 | -0.002553659 | -0.0002680875 | -0.009308180 | 0.003408993 | -0.002008598 | clinical_data | CK |\n", + "| 2 | -0.001361949 | 0.3790396 | 0.06749868 | -0.06765357 | -0.004929694 | -0.3529069 | 0.01561328 | -0.01496201 | 0.3777444 | -0.0004131968 | ⋯ | 0.00304789 | 0.1161746 | 0.1307211 | -0.002865083 | -0.0003610348 | -0.008757576 | 0.004192790 | -0.010507527 | clinical_data | CK_MB |\n", + "\n" + ], + "text/plain": [ + " Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 \n", + "1 -0.020691979 0.4295154 0.02775631 -0.02712267 0.010438305 -0.4194796\n", + "2 -0.001361949 0.3790396 0.06749868 -0.06765357 -0.004929694 -0.3529069\n", + " Factor7 Factor8 Factor9 Factor10 ⋯ Factor13 Factor14 \n", + "1 0.01169812 -0.11248843 0.2867520 0.0143456634 ⋯ 0.03096640 0.1001090\n", + "2 0.01561328 -0.01496201 0.3777444 -0.0004131968 ⋯ 0.00304789 0.1161746\n", + " Factor15 Factor16 Factor17 Factor18 Factor19 Factor20 \n", + "1 0.8842042 -0.002553659 -0.0002680875 -0.009308180 0.003408993 -0.002008598\n", + "2 0.1307211 -0.002865083 -0.0003610348 -0.008757576 0.004192790 -0.010507527\n", + " type variable_name\n", + "1 clinical_data CK \n", + "2 clinical_data CK_MB " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
  1. 'clinical_data'
  2. 'cytokine'
  3. 'neutrophil'
  4. 'proteomics'
  5. 'Bcell'
  6. 'CD14Mono'
  7. 'CD16Mono'
  8. 'CD4CTL'
  9. 'CD4Naive'
  10. 'CD4TCM'
  11. 'CD4TEM'
  12. 'CD8Naive'
  13. 'CD8TEM'
  14. 'cDC2'
  15. 'gdT'
  16. 'NK'
  17. 'Treg'
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 'clinical\\_data'\n", + "\\item 'cytokine'\n", + "\\item 'neutrophil'\n", + "\\item 'proteomics'\n", + "\\item 'Bcell'\n", + "\\item 'CD14Mono'\n", + "\\item 'CD16Mono'\n", + "\\item 'CD4CTL'\n", + "\\item 'CD4Naive'\n", + "\\item 'CD4TCM'\n", + "\\item 'CD4TEM'\n", + "\\item 'CD8Naive'\n", + "\\item 'CD8TEM'\n", + "\\item 'cDC2'\n", + "\\item 'gdT'\n", + "\\item 'NK'\n", + "\\item 'Treg'\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 'clinical_data'\n", + "2. 'cytokine'\n", + "3. 'neutrophil'\n", + "4. 'proteomics'\n", + "5. 'Bcell'\n", + "6. 'CD14Mono'\n", + "7. 'CD16Mono'\n", + "8. 'CD4CTL'\n", + "9. 'CD4Naive'\n", + "10. 'CD4TCM'\n", + "11. 'CD4TEM'\n", + "12. 'CD8Naive'\n", + "13. 'CD8TEM'\n", + "14. 'cDC2'\n", + "15. 'gdT'\n", + "16. 'NK'\n", + "17. 'Treg'\n", + "\n", + "\n" + ], + "text/plain": [ + " [1] \"clinical_data\" \"cytokine\" \"neutrophil\" \"proteomics\" \n", + " [5] \"Bcell\" \"CD14Mono\" \"CD16Mono\" \"CD4CTL\" \n", + " [9] \"CD4Naive\" \"CD4TCM\" \"CD4TEM\" \"CD8Naive\" \n", + "[13] \"CD8TEM\" \"cDC2\" \"gdT\" \"NK\" \n", + "[17] \"Treg\" " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "head(feature_data_mu,2)\n", + "unique(feature_data_mu$type)\n", + "feature_data_mu = feature_data_mu[\n", + " !feature_data_mu$type %in% c('clinical_data', 'cytokine', 'neutrophil', 'proteomics'),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 527, + "id": "696f5865-ddc3-4f05-a9fe-d055f981300b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "7694" + ], + "text/latex": [ + "7694" + ], + "text/markdown": [ + "7694" + ], + "text/plain": [ + "[1] 7694" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "length(unique(feature_data_mu$variable_name))" + ] + }, + { + "cell_type": "code", + "execution_count": 528, + "id": "72283372-7aa9-48f3-9335-192a6d508af7", + "metadata": {}, + "outputs": [], + "source": [ + "## Adjust feature names to align with Groningen Mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 529, + "id": "b621bd4a-a38d-4fc1-a6a9-0fd505111f39", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
  1. 'B.cell'
  2. 'CD14.Mono'
  3. 'CD16.Mono'
  4. 'CD4.CTL'
  5. 'CD4.Naive'
  6. 'CD4.TCM'
  7. 'CD4.TEM'
  8. 'CD8.Naive'
  9. 'CD8.TEM'
  10. 'cDC2'
  11. 'gdT'
  12. 'NK'
  13. 'Treg'
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 'B.cell'\n", + "\\item 'CD14.Mono'\n", + "\\item 'CD16.Mono'\n", + "\\item 'CD4.CTL'\n", + "\\item 'CD4.Naive'\n", + "\\item 'CD4.TCM'\n", + "\\item 'CD4.TEM'\n", + "\\item 'CD8.Naive'\n", + "\\item 'CD8.TEM'\n", + "\\item 'cDC2'\n", + "\\item 'gdT'\n", + "\\item 'NK'\n", + "\\item 'Treg'\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 'B.cell'\n", + "2. 'CD14.Mono'\n", + "3. 'CD16.Mono'\n", + "4. 'CD4.CTL'\n", + "5. 'CD4.Naive'\n", + "6. 'CD4.TCM'\n", + "7. 'CD4.TEM'\n", + "8. 'CD8.Naive'\n", + "9. 'CD8.TEM'\n", + "10. 'cDC2'\n", + "11. 'gdT'\n", + "12. 'NK'\n", + "13. 'Treg'\n", + "\n", + "\n" + ], + "text/plain": [ + " [1] \"B.cell\" \"CD14.Mono\" \"CD16.Mono\" \"CD4.CTL\" \"CD4.Naive\" \"CD4.TCM\" \n", + " [7] \"CD4.TEM\" \"CD8.Naive\" \"CD8.TEM\" \"cDC2\" \"gdT\" \"NK\" \n", + "[13] \"Treg\" " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "
A data.frame: 2 × 24
Factor1Factor2Factor3Factor4Factor5Factor6Factor7Factor8Factor9Factor10Factor15Factor16Factor17Factor18Factor19Factor20typevariable_namecell_typefeature
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr><chr><chr><chr>
1452-0.17926582 0.239352130.1632959-0.047585950.13904403-0.069057551.10853020.0121312190.3152998 0.04606748-0.01115103 0.054059598-0.2188138-0.1313958-0.03007437 0.1566817BcellB.cell__ACTB B.cellB.cell__ACTB
1453-0.04518202-0.019961260.1115015-0.012374200.07304104-0.071862890.66820530.0029942290.1844658-0.03832849-0.02316688-0.006085256-0.5234638-0.3893344-0.17256498-0.4521462BcellB.cell__ACTG1B.cellB.cell__ACTG1
\n" + ], + "text/latex": [ + "A data.frame: 2 × 24\n", + "\\begin{tabular}{r|lllllllllllllllllllll}\n", + " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & ⋯ & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20 & type & variable\\_name & cell\\_type & feature\\\\\n", + " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t1452 & -0.17926582 & 0.23935213 & 0.1632959 & -0.04758595 & 0.13904403 & -0.06905755 & 1.1085302 & 0.012131219 & 0.3152998 & 0.04606748 & ⋯ & -0.01115103 & 0.054059598 & -0.2188138 & -0.1313958 & -0.03007437 & 0.1566817 & Bcell & B.cell\\_\\_ACTB & B.cell & B.cell\\_\\_ACTB \\\\\n", + "\t1453 & -0.04518202 & -0.01996126 & 0.1115015 & -0.01237420 & 0.07304104 & -0.07186289 & 0.6682053 & 0.002994229 & 0.1844658 & -0.03832849 & ⋯ & -0.02316688 & -0.006085256 & -0.5234638 & -0.3893344 & -0.17256498 & -0.4521462 & Bcell & B.cell\\_\\_ACTG1 & B.cell & B.cell\\_\\_ACTG1\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 2 × 24\n", + "\n", + "| | Factor1 <dbl> | Factor2 <dbl> | Factor3 <dbl> | Factor4 <dbl> | Factor5 <dbl> | Factor6 <dbl> | Factor7 <dbl> | Factor8 <dbl> | Factor9 <dbl> | Factor10 <dbl> | ⋯ ⋯ | Factor15 <dbl> | Factor16 <dbl> | Factor17 <dbl> | Factor18 <dbl> | Factor19 <dbl> | Factor20 <dbl> | type <chr> | variable_name <chr> | cell_type <chr> | feature <chr> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| 1452 | -0.17926582 | 0.23935213 | 0.1632959 | -0.04758595 | 0.13904403 | -0.06905755 | 1.1085302 | 0.012131219 | 0.3152998 | 0.04606748 | ⋯ | -0.01115103 | 0.054059598 | -0.2188138 | -0.1313958 | -0.03007437 | 0.1566817 | Bcell | B.cell__ACTB | B.cell | B.cell__ACTB |\n", + "| 1453 | -0.04518202 | -0.01996126 | 0.1115015 | -0.01237420 | 0.07304104 | -0.07186289 | 0.6682053 | 0.002994229 | 0.1844658 | -0.03832849 | ⋯ | -0.02316688 | -0.006085256 | -0.5234638 | -0.3893344 | -0.17256498 | -0.4521462 | Bcell | B.cell__ACTG1 | B.cell | B.cell__ACTG1 |\n", + "\n" + ], + "text/plain": [ + " Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 \n", + "1452 -0.17926582 0.23935213 0.1632959 -0.04758595 0.13904403 -0.06905755\n", + "1453 -0.04518202 -0.01996126 0.1115015 -0.01237420 0.07304104 -0.07186289\n", + " Factor7 Factor8 Factor9 Factor10 ⋯ Factor15 Factor16 \n", + "1452 1.1085302 0.012131219 0.3152998 0.04606748 ⋯ -0.01115103 0.054059598\n", + "1453 0.6682053 0.002994229 0.1844658 -0.03832849 ⋯ -0.02316688 -0.006085256\n", + " Factor17 Factor18 Factor19 Factor20 type variable_name cell_type\n", + "1452 -0.2188138 -0.1313958 -0.03007437 0.1566817 Bcell B.cell__ACTB B.cell \n", + "1453 -0.5234638 -0.3893344 -0.17256498 -0.4521462 Bcell B.cell__ACTG1 B.cell \n", + " feature \n", + "1452 B.cell__ACTB \n", + "1453 B.cell__ACTG1" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "7694" + ], + "text/latex": [ + "7694" + ], + "text/markdown": [ + "7694" + ], + "text/plain": [ + "[1] 7694" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#### Azimuth\n", + "feature_data_mu$cell_type = str_replace(feature_data_mu$variable_name,\n", + " '__.*', '')\n", + "\n", + "unique(feature_data_mu$cell_type)\n", + "\n", + "feature_data_mu$feature = feature_data_mu$variable_name\n", + "\n", + "feature_data_mu$feature = str_replace(\n", + " feature_data_mu$feature, \"CD14.Mono\", 'cMono')\n", + "\n", + "\n", + "feature_data_mu$feature = str_replace(\n", + " feature_data_mu$feature, \"Bcell\", 'B.cell')\n", + "\n", + "feature_data_mu$feature = str_replace(\n", + " feature_data_mu$feature, \"CD16.Mono\", 'ncMono')\n", + "\n", + "feature_data_mu$feature = str_replace(\n", + " feature_data_mu$feature, \"CD4.TCM\", 'CD4_TCM')\n", + "\n", + "feature_data_mu$feature = str_replace(\n", + " feature_data_mu$feature, \"cDC2\", 'cDC2')\n", + "\n", + "feature_data_mu$feature = str_replace(\n", + " feature_data_mu$feature, \"NK\", 'NKdim')\n", + "\n", + "\n", + "\n", + "\n", + "head(feature_data_mu,2)\n", + "length(unique(feature_data_mu$feature))" + ] + }, + { + "cell_type": "code", + "execution_count": 530, + "id": "2f48f0a6-4b43-4016-b6f2-8e1278403689", + "metadata": {}, + "outputs": [], + "source": [ + "### Compare" + ] + }, + { + "cell_type": "code", + "execution_count": 531, + "id": "da55979d-3dea-4570-a5fe-b6110fb34a64", + "metadata": {}, + "outputs": [], + "source": [ + "#sort(unique(str_replace(feature_data_mu$feature, '__.*', ''))) # Munich\n", + "#sort(unique(input_gr$cell_type)) # Groningen" + ] + }, + { + "cell_type": "code", + "execution_count": 532, + "id": "494c2d51-eb8d-436c-9847-628b31128b14", + "metadata": {}, + "outputs": [], + "source": [ + "### Long format of mu feature data" + ] + }, + { + "cell_type": "code", + "execution_count": 533, + "id": "abffbf24-58f8-4232-8f3b-21fdf05be942", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using type, variable_name, cell_type, feature as id variables\n", + "\n" + ] + } + ], + "source": [ + "feature_data_mu = melt(feature_data_mu)" + ] + }, + { + "cell_type": "code", + "execution_count": 534, + "id": "ec80b0bc-a85b-4482-a5b8-bab7a3e7f58e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "
A data.frame: 2 × 6
typevariable_namecell_typefeaturevariablevalue
<chr><chr><chr><chr><fct><dbl>
1BcellB.cell__ACTB B.cellB.cell__ACTB Factor1-0.17926582
2BcellB.cell__ACTG1B.cellB.cell__ACTG1Factor1-0.04518202
\n" + ], + "text/latex": [ + "A data.frame: 2 × 6\n", + "\\begin{tabular}{r|llllll}\n", + " & type & variable\\_name & cell\\_type & feature & variable & value\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t1 & Bcell & B.cell\\_\\_ACTB & B.cell & B.cell\\_\\_ACTB & Factor1 & -0.17926582\\\\\n", + "\t2 & Bcell & B.cell\\_\\_ACTG1 & B.cell & B.cell\\_\\_ACTG1 & Factor1 & -0.04518202\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 2 × 6\n", + "\n", + "| | type <chr> | variable_name <chr> | cell_type <chr> | feature <chr> | variable <fct> | value <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| 1 | Bcell | B.cell__ACTB | B.cell | B.cell__ACTB | Factor1 | -0.17926582 |\n", + "| 2 | Bcell | B.cell__ACTG1 | B.cell | B.cell__ACTG1 | Factor1 | -0.04518202 |\n", + "\n" + ], + "text/plain": [ + " type variable_name cell_type feature variable value \n", + "1 Bcell B.cell__ACTB B.cell B.cell__ACTB Factor1 -0.17926582\n", + "2 Bcell B.cell__ACTG1 B.cell B.cell__ACTG1 Factor1 -0.04518202" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "head(feature_data_mu,2)" + ] + }, + { + "cell_type": "code", + "execution_count": 535, + "id": "4bfcb098-18c1-453d-a0c4-51e86e323905", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter MU feature weights for features also in GR data" + ] + }, + { + "cell_type": "code", + "execution_count": 536, + "id": "32765cc2-8611-4817-bdb3-68ce5b961ba4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "
A data.frame: 2 × 6
typevariable_namecell_typefeaturevariablevalue
<chr><chr><chr><chr><fct><dbl>
1BcellB.cell__ACTB B.cellB.cell__ACTB Factor1-0.1792658
24BcellB.cell__ARHGDIBB.cellB.cell__ARHGDIBFactor1 0.3273940
\n" + ], + "text/latex": [ + "A data.frame: 2 × 6\n", + "\\begin{tabular}{r|llllll}\n", + " & type & variable\\_name & cell\\_type & feature & variable & value\\\\\n", + " & & & & & & \\\\\n", + "\\hline\n", + "\t1 & Bcell & B.cell\\_\\_ACTB & B.cell & B.cell\\_\\_ACTB & Factor1 & -0.1792658\\\\\n", + "\t24 & Bcell & B.cell\\_\\_ARHGDIB & B.cell & B.cell\\_\\_ARHGDIB & Factor1 & 0.3273940\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 2 × 6\n", + "\n", + "| | type <chr> | variable_name <chr> | cell_type <chr> | feature <chr> | variable <fct> | value <dbl> |\n", + "|---|---|---|---|---|---|---|\n", + "| 1 | Bcell | B.cell__ACTB | B.cell | B.cell__ACTB | Factor1 | -0.1792658 |\n", + "| 24 | Bcell | B.cell__ARHGDIB | B.cell | B.cell__ARHGDIB | Factor1 | 0.3273940 |\n", + "\n" + ], + "text/plain": [ + " type variable_name cell_type feature variable value \n", + "1 Bcell B.cell__ACTB B.cell B.cell__ACTB Factor1 -0.1792658\n", + "24 Bcell B.cell__ARHGDIB B.cell B.cell__ARHGDIB Factor1 0.3273940" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "3230" + ], + "text/latex": [ + "3230" + ], + "text/markdown": [ + "3230" + ], + "text/plain": [ + "[1] 3230" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "feature_data_mu = feature_data_mu[feature_data_mu$feature %in% unique(input_gr$feature),]\n", + "head(feature_data_mu,2)\n", + "length(unique(feature_data_mu$feature))" + ] + }, + { + "cell_type": "code", + "execution_count": 537, + "id": "d6c05068-71a9-4abf-9918-52aa21030551", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
  1. 'B.cell'
  2. 'CD4_TCM'
  3. 'cDC2'
  4. 'cMono'
  5. 'ncMono'
  6. 'NKdim'
  7. 'Treg'
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 'B.cell'\n", + "\\item 'CD4\\_TCM'\n", + "\\item 'cDC2'\n", + "\\item 'cMono'\n", + "\\item 'ncMono'\n", + "\\item 'NKdim'\n", + "\\item 'Treg'\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 'B.cell'\n", + "2. 'CD4_TCM'\n", + "3. 'cDC2'\n", + "4. 'cMono'\n", + "5. 'ncMono'\n", + "6. 'NKdim'\n", + "7. 'Treg'\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] \"B.cell\" \"CD4_TCM\" \"cDC2\" \"cMono\" \"ncMono\" \"NKdim\" \"Treg\" " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sort(unique(str_replace(feature_data_mu$feature, '__.*', ''))) # check mapped cell-types" + ] + }, + { + "cell_type": "code", + "execution_count": 538, + "id": "e51e4854-d4ec-4589-91d2-1db3e80fa546", + "metadata": {}, + "outputs": [], + "source": [ + "feature_data_mu = feature_data_mu[feature_data_mu$cell_type != 'Treg',] # exclude not so well mapped cell-type" + ] + }, + { + "cell_type": "code", + "execution_count": 539, + "id": "33a617a4-c3a1-4798-bae5-79924cb40fec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "3148" + ], + "text/latex": [ + "3148" + ], + "text/markdown": [ + "3148" + ], + "text/plain": [ + "[1] 3148" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "length(unique(feature_data_mu$feature))" + ] + }, + { + "cell_type": "code", + "execution_count": 540, + "id": "a755b487-d640-46b3-9c69-64034ffd33d6", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter GR input features for features also in MU data" + ] + }, + { + "cell_type": "code", + "execution_count": 541, + "id": "92e5d5d6-4daa-4525-907e-bb0a328fecc4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "6353" + ], + "text/latex": [ + "6353" + ], + "text/markdown": [ + "6353" + ], + "text/plain": [ + "[1] 6353" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#head(input_gr)\n", + "length(unique(input_gr$feature))\n", + "input_gr = input_gr[input_gr$feature %in% unique(feature_data_mu$feature), ]\n", + "input_vis = input_gr\n", + "input_gr = input_gr %>% dcast(sample_id ~ feature, value.var = 'value')" + ] + }, + { + "cell_type": "code", + "execution_count": 542, + "id": "28410730-af80-43e5-ae10-41f92ee080b2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A data.frame: 6 × 3149
sample_idB.cell__ACTBB.cell__ARHGDIBB.cell__ARPC2B.cell__ARPC3B.cell__B2MB.cell__BIRC3B.cell__BTF3B.cell__BTG1B.cell__BTG2NKdim__YPEL5NKdim__YWHABNKdim__YWHAQNKdim__YWHAZNKdim__ZC3HAV1NKdim__ZEB2NKdim__ZFAS1NKdim__ZFP36NKdim__ZFP36L1NKdim__ZFP36L2
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
1Baseline.TEST_1 -0.1451209-0.2808230 0.2933812-1.3037827 0.35691713-0.29338123-0.5720211 1.6637928-1.3037827 1.6637928 0.70506589 0.4752408 0.9234567 0.6744898-0.02410453-1.0405662-0.94207577-0.80109453-0.7050659
2Baseline.TEST_12 0.2310092-0.0602920 0.3697907-0.3697907-0.29338123-0.08445798 0.4752408 0.1451209 0.1573107-0.6744898-0.09655862 0.8178173-0.3827258-0.7363159-0.44842548-1.8980287-0.07236971 1.36238339 0.8178173
3Baseline.TEST_14-0.2933812 0.1329527 1.6637928-0.6151411 0.96102726 1.53412054 1.3935235 1.0405662-1.0829150 1.7688250 0.34410246 0.7363159-0.1817624-1.5341205-0.61514110-0.1451209-1.06150263-1.76882504 0.5299395
4Baseline.TEST_15-1.0615026-0.4087920-0.7206029-0.2683089-0.08445798-0.38272581-1.2493462-1.0405662 1.1740260 0.6744898-0.58627265 0.6445316 0.6297684-0.3313441 0.60064430 0.2683089-1.15034938-0.04822307-0.8694238
5Baseline.TEST_17-1.1503494-1.4602158-0.5299395-0.9420758 0.78459288 0.52993955 1.5744450-0.4219289 1.0829150 0.7363159-1.19837970-0.1817624-1.1503494 0.1086734-0.39572530-1.3037827 0.20632319-1.66379279 1.4602158
6Baseline.TEST_18 1.1273007 0.6297684 2.3410271 0.2063232 1.61750540 0.68969722-1.5341205 0.4617916-1.5744450-0.3827258-0.73631592-0.3186394 1.3037827 0.9234567 1.49614688-1.2493462-0.73631592-1.30378267 1.4602158
\n" + ], + "text/latex": [ + "A data.frame: 6 × 3149\n", + "\\begin{tabular}{r|lllllllllllllllllllll}\n", + " & sample\\_id & B.cell\\_\\_ACTB & B.cell\\_\\_ARHGDIB & B.cell\\_\\_ARPC2 & B.cell\\_\\_ARPC3 & B.cell\\_\\_B2M & B.cell\\_\\_BIRC3 & B.cell\\_\\_BTF3 & B.cell\\_\\_BTG1 & B.cell\\_\\_BTG2 & ⋯ & NKdim\\_\\_YPEL5 & NKdim\\_\\_YWHAB & NKdim\\_\\_YWHAQ & NKdim\\_\\_YWHAZ & NKdim\\_\\_ZC3HAV1 & NKdim\\_\\_ZEB2 & NKdim\\_\\_ZFAS1 & NKdim\\_\\_ZFP36 & NKdim\\_\\_ZFP36L1 & NKdim\\_\\_ZFP36L2\\\\\n", + " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n", + "\\hline\n", + "\t1 & Baseline.TEST\\_1 & -0.1451209 & -0.2808230 & 0.2933812 & -1.3037827 & 0.35691713 & -0.29338123 & -0.5720211 & 1.6637928 & -1.3037827 & ⋯ & 1.6637928 & 0.70506589 & 0.4752408 & 0.9234567 & 0.6744898 & -0.02410453 & -1.0405662 & -0.94207577 & -0.80109453 & -0.7050659\\\\\n", + "\t2 & Baseline.TEST\\_12 & 0.2310092 & -0.0602920 & 0.3697907 & -0.3697907 & -0.29338123 & -0.08445798 & 0.4752408 & 0.1451209 & 0.1573107 & ⋯ & -0.6744898 & -0.09655862 & 0.8178173 & -0.3827258 & -0.7363159 & -0.44842548 & -1.8980287 & -0.07236971 & 1.36238339 & 0.8178173\\\\\n", + "\t3 & Baseline.TEST\\_14 & -0.2933812 & 0.1329527 & 1.6637928 & -0.6151411 & 0.96102726 & 1.53412054 & 1.3935235 & 1.0405662 & -1.0829150 & ⋯ & 1.7688250 & 0.34410246 & 0.7363159 & -0.1817624 & -1.5341205 & -0.61514110 & -0.1451209 & -1.06150263 & -1.76882504 & 0.5299395\\\\\n", + "\t4 & Baseline.TEST\\_15 & -1.0615026 & -0.4087920 & -0.7206029 & -0.2683089 & -0.08445798 & -0.38272581 & -1.2493462 & -1.0405662 & 1.1740260 & ⋯ & 0.6744898 & -0.58627265 & 0.6445316 & 0.6297684 & -0.3313441 & 0.60064430 & 0.2683089 & -1.15034938 & -0.04822307 & -0.8694238\\\\\n", + "\t5 & Baseline.TEST\\_17 & -1.1503494 & -1.4602158 & -0.5299395 & -0.9420758 & 0.78459288 & 0.52993955 & 1.5744450 & -0.4219289 & 1.0829150 & ⋯ & 0.7363159 & -1.19837970 & -0.1817624 & -1.1503494 & 0.1086734 & -0.39572530 & -1.3037827 & 0.20632319 & -1.66379279 & 1.4602158\\\\\n", + "\t6 & Baseline.TEST\\_18 & 1.1273007 & 0.6297684 & 2.3410271 & 0.2063232 & 1.61750540 & 0.68969722 & -1.5341205 & 0.4617916 & -1.5744450 & ⋯ & -0.3827258 & -0.73631592 & -0.3186394 & 1.3037827 & 0.9234567 & 1.49614688 & -1.2493462 & -0.73631592 & -1.30378267 & 1.4602158\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 6 × 3149\n", + "\n", + "| | sample_id <chr> | B.cell__ACTB <dbl> | B.cell__ARHGDIB <dbl> | B.cell__ARPC2 <dbl> | B.cell__ARPC3 <dbl> | B.cell__B2M <dbl> | B.cell__BIRC3 <dbl> | B.cell__BTF3 <dbl> | B.cell__BTG1 <dbl> | B.cell__BTG2 <dbl> | ⋯ ⋯ | NKdim__YPEL5 <dbl> | NKdim__YWHAB <dbl> | NKdim__YWHAQ <dbl> | NKdim__YWHAZ <dbl> | NKdim__ZC3HAV1 <dbl> | NKdim__ZEB2 <dbl> | NKdim__ZFAS1 <dbl> | NKdim__ZFP36 <dbl> | NKdim__ZFP36L1 <dbl> | NKdim__ZFP36L2 <dbl> |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| 1 | Baseline.TEST_1 | -0.1451209 | -0.2808230 | 0.2933812 | -1.3037827 | 0.35691713 | -0.29338123 | -0.5720211 | 1.6637928 | -1.3037827 | ⋯ | 1.6637928 | 0.70506589 | 0.4752408 | 0.9234567 | 0.6744898 | -0.02410453 | -1.0405662 | -0.94207577 | -0.80109453 | -0.7050659 |\n", + "| 2 | Baseline.TEST_12 | 0.2310092 | -0.0602920 | 0.3697907 | -0.3697907 | -0.29338123 | -0.08445798 | 0.4752408 | 0.1451209 | 0.1573107 | ⋯ | -0.6744898 | -0.09655862 | 0.8178173 | -0.3827258 | -0.7363159 | -0.44842548 | -1.8980287 | -0.07236971 | 1.36238339 | 0.8178173 |\n", + "| 3 | Baseline.TEST_14 | -0.2933812 | 0.1329527 | 1.6637928 | -0.6151411 | 0.96102726 | 1.53412054 | 1.3935235 | 1.0405662 | -1.0829150 | ⋯ | 1.7688250 | 0.34410246 | 0.7363159 | -0.1817624 | -1.5341205 | -0.61514110 | -0.1451209 | -1.06150263 | -1.76882504 | 0.5299395 |\n", + "| 4 | Baseline.TEST_15 | -1.0615026 | -0.4087920 | -0.7206029 | -0.2683089 | -0.08445798 | -0.38272581 | -1.2493462 | -1.0405662 | 1.1740260 | ⋯ | 0.6744898 | -0.58627265 | 0.6445316 | 0.6297684 | -0.3313441 | 0.60064430 | 0.2683089 | -1.15034938 | -0.04822307 | -0.8694238 |\n", + "| 5 | Baseline.TEST_17 | -1.1503494 | -1.4602158 | -0.5299395 | -0.9420758 | 0.78459288 | 0.52993955 | 1.5744450 | -0.4219289 | 1.0829150 | ⋯ | 0.7363159 | -1.19837970 | -0.1817624 | -1.1503494 | 0.1086734 | -0.39572530 | -1.3037827 | 0.20632319 | -1.66379279 | 1.4602158 |\n", + "| 6 | Baseline.TEST_18 | 1.1273007 | 0.6297684 | 2.3410271 | 0.2063232 | 1.61750540 | 0.68969722 | -1.5341205 | 0.4617916 | -1.5744450 | ⋯ | -0.3827258 | -0.73631592 | -0.3186394 | 1.3037827 | 0.9234567 | 1.49614688 | -1.2493462 | -0.73631592 | -1.30378267 | 1.4602158 |\n", + "\n" + ], + "text/plain": [ + " sample_id B.cell__ACTB B.cell__ARHGDIB B.cell__ARPC2 B.cell__ARPC3\n", + "1 Baseline.TEST_1 -0.1451209 -0.2808230 0.2933812 -1.3037827 \n", + "2 Baseline.TEST_12 0.2310092 -0.0602920 0.3697907 -0.3697907 \n", + "3 Baseline.TEST_14 -0.2933812 0.1329527 1.6637928 -0.6151411 \n", + "4 Baseline.TEST_15 -1.0615026 -0.4087920 -0.7206029 -0.2683089 \n", + "5 Baseline.TEST_17 -1.1503494 -1.4602158 -0.5299395 -0.9420758 \n", + "6 Baseline.TEST_18 1.1273007 0.6297684 2.3410271 0.2063232 \n", + " B.cell__B2M B.cell__BIRC3 B.cell__BTF3 B.cell__BTG1 B.cell__BTG2 ⋯\n", + "1 0.35691713 -0.29338123 -0.5720211 1.6637928 -1.3037827 ⋯\n", + "2 -0.29338123 -0.08445798 0.4752408 0.1451209 0.1573107 ⋯\n", + "3 0.96102726 1.53412054 1.3935235 1.0405662 -1.0829150 ⋯\n", + "4 -0.08445798 -0.38272581 -1.2493462 -1.0405662 1.1740260 ⋯\n", + "5 0.78459288 0.52993955 1.5744450 -0.4219289 1.0829150 ⋯\n", + "6 1.61750540 0.68969722 -1.5341205 0.4617916 -1.5744450 ⋯\n", + " NKdim__YPEL5 NKdim__YWHAB NKdim__YWHAQ NKdim__YWHAZ NKdim__ZC3HAV1\n", + "1 1.6637928 0.70506589 0.4752408 0.9234567 0.6744898 \n", + "2 -0.6744898 -0.09655862 0.8178173 -0.3827258 -0.7363159 \n", + "3 1.7688250 0.34410246 0.7363159 -0.1817624 -1.5341205 \n", + "4 0.6744898 -0.58627265 0.6445316 0.6297684 -0.3313441 \n", + "5 0.7363159 -1.19837970 -0.1817624 -1.1503494 0.1086734 \n", + "6 -0.3827258 -0.73631592 -0.3186394 1.3037827 0.9234567 \n", + " NKdim__ZEB2 NKdim__ZFAS1 NKdim__ZFP36 NKdim__ZFP36L1 NKdim__ZFP36L2\n", + "1 -0.02410453 -1.0405662 -0.94207577 -0.80109453 -0.7050659 \n", + "2 -0.44842548 -1.8980287 -0.07236971 1.36238339 0.8178173 \n", + "3 -0.61514110 -0.1451209 -1.06150263 -1.76882504 0.5299395 \n", + "4 0.60064430 0.2683089 -1.15034938 -0.04822307 -0.8694238 \n", + "5 -0.39572530 -1.3037827 0.20632319 -1.66379279 1.4602158 \n", + "6 1.49614688 -1.2493462 -0.73631592 -1.30378267 1.4602158 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
  1. 103
  2. 3149
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 103\n", + "\\item 3149\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 103\n", + "2. 3149\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 103 3149" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "head(input_gr)\n", + "dim(input_gr)" + ] + }, + { + "cell_type": "markdown", + "id": "d217dad6-5687-4d43-bb37-593c9d62fded", + "metadata": {}, + "source": [ + "## Prepare matrices" + ] + }, + { + "cell_type": "code", + "execution_count": 543, + "id": "a8702239-2b63-45ac-8962-d5b853544d13", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
  1. 103
  2. 3148
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 103\n", + "\\item 3148\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 103\n", + "2. 3148\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 103 3148" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "### Get matrices for matrix facorization approach\n", + "\n", + "## Groningen Matrix\n", + "#head(input_gr)\n", + "gr_ma = input_gr\n", + "rownames(gr_ma) = input_gr$sample_id\n", + "gr_ma$sample_id = NULL\n", + "gr_ma = as.matrix(gr_ma)\n", + "dim(gr_ma)\n", + "\n", + "## Munich Matrix\n", + "#head(feature_data_mu)\n", + "mu_ma = feature_data_mu %>% dcast(feature ~variable, value.var = 'value')\n", + "rownames(mu_ma) = mu_ma$feature\n", + "mu_ma$feature = NULL\n", + "mu_ma = as.matrix(mu_ma)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 544, + "id": "ff3bbc8b-c6ae-4f28-8607-594741a1eec4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
  1. 103
  2. 3148
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 103\n", + "\\item 3148\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 103\n", + "2. 3148\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 103 3148" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
  1. 3148
  2. 20
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 3148\n", + "\\item 20\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 3148\n", + "2. 20\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 3148 20" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "dim(gr_ma)\n", + "dim(mu_ma)" + ] + }, + { + "cell_type": "code", + "execution_count": 545, + "id": "704706c0-e484-41c6-99dc-5125e0130c35", + "metadata": {}, + "outputs": [], + "source": [ + "### Reorder to have same ordering of features" + ] + }, + { + "cell_type": "code", + "execution_count": 546, + "id": "02cdfc6e-ee58-4add-9212-eb6ef66731cd", + "metadata": {}, + "outputs": [], + "source": [ + "gr_ma = gr_ma[, order(colnames(gr_ma))]\n", + "mu_ma = mu_ma[order(rownames(mu_ma)),]" + ] + }, + { + "cell_type": "markdown", + "id": "07330aac-0598-4c8c-b92d-1db68a28f8ed", + "metadata": {}, + "source": [ + "## Calculate right inverse of MU feature matrix and apply" + ] + }, + { + "cell_type": "code", + "execution_count": 547, + "id": "94497574-ce99-4a04-8b2b-4a81d60fb860", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
  1. 3148
  2. 20
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 3148\n", + "\\item 20\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 3148\n", + "2. 20\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 3148 20" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
  1. 20
  2. 3148
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 20\n", + "\\item 3148\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 20\n", + "2. 3148\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 20 3148" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A matrix: 6 × 20 of type dbl
Factor1Factor2Factor3Factor4Factor5Factor6Factor7Factor8Factor9Factor10Factor11Factor12Factor13Factor14Factor15Factor16Factor17Factor18Factor19Factor20
B.cell__ACTB-0.002412865 1.100829e-030.0018519132-9.387865e-05 0.0197072832-1.171368e-03 0.004082177 3.549377e-04 0.0012770596-0.0004148242 0.017505496 8.850074e-04-5.435629e-04 9.115598e-04-1.148509e-05 2.181989e-04-1.067792e-03-0.0003216798-0.0005795758 0.0007411074
B.cell__ARHGDIB 0.002896903-3.862585e-040.0011406868 1.345452e-03 0.0149805072 1.937177e-04 0.001627769-2.946817e-04 0.0001059237 0.0006040866-0.014150740 2.117682e-04-2.951694e-04-5.412122e-04-1.305719e-04-5.945591e-05-2.291039e-03 0.0001882649-0.0002861554 0.0002756893
B.cell__ARPC2-0.001114492-6.099780e-050.0003215208 4.180816e-04 0.0209911475-3.725421e-04 0.003521452 2.003061e-04 0.0013498674-0.0020104846 0.018957908-6.100954e-04-6.614388e-05 2.495120e-04-2.905480e-04 1.102433e-03-2.069296e-03-0.0005396076 0.0004881568-0.0004772543
B.cell__ARPC3 0.001612616-1.623649e-050.0008018460 3.144724e-04 0.0432317206-2.770284e-03 0.002947859-2.522687e-04-0.0009838727-0.0028467784-0.022270218 2.108312e-05 7.876929e-04 2.812663e-03-3.912311e-04 2.077665e-03 7.316946e-05 0.0002948308 0.0005419943-0.0002239274
B.cell__B2M-0.001969751 9.628387e-040.0012608342 5.635957e-05-0.0039711670-4.673417e-04 0.003398598 5.528947e-05 0.0030406779-0.0009352855 0.006424656 1.930557e-03-1.285816e-04 6.238179e-05 1.034847e-04 9.638252e-05-2.213990e-03-0.0004096560 0.0010927847 0.0014692351
B.cell__BIRC3 0.001618585-5.811380e-050.0007165601-1.982736e-03 0.0008224573-9.228915e-05-0.002526311-1.446628e-04-0.0032944132 0.0005858396-0.042494057 1.831863e-03 3.520557e-04-9.515494e-04 1.258441e-03-4.166334e-04-4.329759e-03 0.0009212626 0.0013008816 0.0012679802
\n" + ], + "text/latex": [ + "A matrix: 6 × 20 of type dbl\n", + "\\begin{tabular}{r|llllllllllllllllllll}\n", + " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & Factor11 & Factor12 & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20\\\\\n", + "\\hline\n", + "\tB.cell\\_\\_ACTB & -0.002412865 & 1.100829e-03 & 0.0018519132 & -9.387865e-05 & 0.0197072832 & -1.171368e-03 & 0.004082177 & 3.549377e-04 & 0.0012770596 & -0.0004148242 & 0.017505496 & 8.850074e-04 & -5.435629e-04 & 9.115598e-04 & -1.148509e-05 & 2.181989e-04 & -1.067792e-03 & -0.0003216798 & -0.0005795758 & 0.0007411074\\\\\n", + "\tB.cell\\_\\_ARHGDIB & 0.002896903 & -3.862585e-04 & 0.0011406868 & 1.345452e-03 & 0.0149805072 & 1.937177e-04 & 0.001627769 & -2.946817e-04 & 0.0001059237 & 0.0006040866 & -0.014150740 & 2.117682e-04 & -2.951694e-04 & -5.412122e-04 & -1.305719e-04 & -5.945591e-05 & -2.291039e-03 & 0.0001882649 & -0.0002861554 & 0.0002756893\\\\\n", + "\tB.cell\\_\\_ARPC2 & -0.001114492 & -6.099780e-05 & 0.0003215208 & 4.180816e-04 & 0.0209911475 & -3.725421e-04 & 0.003521452 & 2.003061e-04 & 0.0013498674 & -0.0020104846 & 0.018957908 & -6.100954e-04 & -6.614388e-05 & 2.495120e-04 & -2.905480e-04 & 1.102433e-03 & -2.069296e-03 & -0.0005396076 & 0.0004881568 & -0.0004772543\\\\\n", + "\tB.cell\\_\\_ARPC3 & 0.001612616 & -1.623649e-05 & 0.0008018460 & 3.144724e-04 & 0.0432317206 & -2.770284e-03 & 0.002947859 & -2.522687e-04 & -0.0009838727 & -0.0028467784 & -0.022270218 & 2.108312e-05 & 7.876929e-04 & 2.812663e-03 & -3.912311e-04 & 2.077665e-03 & 7.316946e-05 & 0.0002948308 & 0.0005419943 & -0.0002239274\\\\\n", + "\tB.cell\\_\\_B2M & -0.001969751 & 9.628387e-04 & 0.0012608342 & 5.635957e-05 & -0.0039711670 & -4.673417e-04 & 0.003398598 & 5.528947e-05 & 0.0030406779 & -0.0009352855 & 0.006424656 & 1.930557e-03 & -1.285816e-04 & 6.238179e-05 & 1.034847e-04 & 9.638252e-05 & -2.213990e-03 & -0.0004096560 & 0.0010927847 & 0.0014692351\\\\\n", + "\tB.cell\\_\\_BIRC3 & 0.001618585 & -5.811380e-05 & 0.0007165601 & -1.982736e-03 & 0.0008224573 & -9.228915e-05 & -0.002526311 & -1.446628e-04 & -0.0032944132 & 0.0005858396 & -0.042494057 & 1.831863e-03 & 3.520557e-04 & -9.515494e-04 & 1.258441e-03 & -4.166334e-04 & -4.329759e-03 & 0.0009212626 & 0.0013008816 & 0.0012679802\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A matrix: 6 × 20 of type dbl\n", + "\n", + "| | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | Factor11 | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| B.cell__ACTB | -0.002412865 | 1.100829e-03 | 0.0018519132 | -9.387865e-05 | 0.0197072832 | -1.171368e-03 | 0.004082177 | 3.549377e-04 | 0.0012770596 | -0.0004148242 | 0.017505496 | 8.850074e-04 | -5.435629e-04 | 9.115598e-04 | -1.148509e-05 | 2.181989e-04 | -1.067792e-03 | -0.0003216798 | -0.0005795758 | 0.0007411074 |\n", + "| B.cell__ARHGDIB | 0.002896903 | -3.862585e-04 | 0.0011406868 | 1.345452e-03 | 0.0149805072 | 1.937177e-04 | 0.001627769 | -2.946817e-04 | 0.0001059237 | 0.0006040866 | -0.014150740 | 2.117682e-04 | -2.951694e-04 | -5.412122e-04 | -1.305719e-04 | -5.945591e-05 | -2.291039e-03 | 0.0001882649 | -0.0002861554 | 0.0002756893 |\n", + "| B.cell__ARPC2 | -0.001114492 | -6.099780e-05 | 0.0003215208 | 4.180816e-04 | 0.0209911475 | -3.725421e-04 | 0.003521452 | 2.003061e-04 | 0.0013498674 | -0.0020104846 | 0.018957908 | -6.100954e-04 | -6.614388e-05 | 2.495120e-04 | -2.905480e-04 | 1.102433e-03 | -2.069296e-03 | -0.0005396076 | 0.0004881568 | -0.0004772543 |\n", + "| B.cell__ARPC3 | 0.001612616 | -1.623649e-05 | 0.0008018460 | 3.144724e-04 | 0.0432317206 | -2.770284e-03 | 0.002947859 | -2.522687e-04 | -0.0009838727 | -0.0028467784 | -0.022270218 | 2.108312e-05 | 7.876929e-04 | 2.812663e-03 | -3.912311e-04 | 2.077665e-03 | 7.316946e-05 | 0.0002948308 | 0.0005419943 | -0.0002239274 |\n", + "| B.cell__B2M | -0.001969751 | 9.628387e-04 | 0.0012608342 | 5.635957e-05 | -0.0039711670 | -4.673417e-04 | 0.003398598 | 5.528947e-05 | 0.0030406779 | -0.0009352855 | 0.006424656 | 1.930557e-03 | -1.285816e-04 | 6.238179e-05 | 1.034847e-04 | 9.638252e-05 | -2.213990e-03 | -0.0004096560 | 0.0010927847 | 0.0014692351 |\n", + "| B.cell__BIRC3 | 0.001618585 | -5.811380e-05 | 0.0007165601 | -1.982736e-03 | 0.0008224573 | -9.228915e-05 | -0.002526311 | -1.446628e-04 | -0.0032944132 | 0.0005858396 | -0.042494057 | 1.831863e-03 | 3.520557e-04 | -9.515494e-04 | 1.258441e-03 | -4.166334e-04 | -4.329759e-03 | 0.0009212626 | 0.0013008816 | 0.0012679802 |\n", + "\n" + ], + "text/plain": [ + " Factor1 Factor2 Factor3 Factor4 \n", + "B.cell__ACTB -0.002412865 1.100829e-03 0.0018519132 -9.387865e-05\n", + "B.cell__ARHGDIB 0.002896903 -3.862585e-04 0.0011406868 1.345452e-03\n", + "B.cell__ARPC2 -0.001114492 -6.099780e-05 0.0003215208 4.180816e-04\n", + "B.cell__ARPC3 0.001612616 -1.623649e-05 0.0008018460 3.144724e-04\n", + "B.cell__B2M -0.001969751 9.628387e-04 0.0012608342 5.635957e-05\n", + "B.cell__BIRC3 0.001618585 -5.811380e-05 0.0007165601 -1.982736e-03\n", + " Factor5 Factor6 Factor7 Factor8 \n", + "B.cell__ACTB 0.0197072832 -1.171368e-03 0.004082177 3.549377e-04\n", + "B.cell__ARHGDIB 0.0149805072 1.937177e-04 0.001627769 -2.946817e-04\n", + "B.cell__ARPC2 0.0209911475 -3.725421e-04 0.003521452 2.003061e-04\n", + "B.cell__ARPC3 0.0432317206 -2.770284e-03 0.002947859 -2.522687e-04\n", + "B.cell__B2M -0.0039711670 -4.673417e-04 0.003398598 5.528947e-05\n", + "B.cell__BIRC3 0.0008224573 -9.228915e-05 -0.002526311 -1.446628e-04\n", + " Factor9 Factor10 Factor11 Factor12 \n", + "B.cell__ACTB 0.0012770596 -0.0004148242 0.017505496 8.850074e-04\n", + "B.cell__ARHGDIB 0.0001059237 0.0006040866 -0.014150740 2.117682e-04\n", + "B.cell__ARPC2 0.0013498674 -0.0020104846 0.018957908 -6.100954e-04\n", + "B.cell__ARPC3 -0.0009838727 -0.0028467784 -0.022270218 2.108312e-05\n", + "B.cell__B2M 0.0030406779 -0.0009352855 0.006424656 1.930557e-03\n", + "B.cell__BIRC3 -0.0032944132 0.0005858396 -0.042494057 1.831863e-03\n", + " Factor13 Factor14 Factor15 Factor16 \n", + "B.cell__ACTB -5.435629e-04 9.115598e-04 -1.148509e-05 2.181989e-04\n", + "B.cell__ARHGDIB -2.951694e-04 -5.412122e-04 -1.305719e-04 -5.945591e-05\n", + "B.cell__ARPC2 -6.614388e-05 2.495120e-04 -2.905480e-04 1.102433e-03\n", + "B.cell__ARPC3 7.876929e-04 2.812663e-03 -3.912311e-04 2.077665e-03\n", + "B.cell__B2M -1.285816e-04 6.238179e-05 1.034847e-04 9.638252e-05\n", + "B.cell__BIRC3 3.520557e-04 -9.515494e-04 1.258441e-03 -4.166334e-04\n", + " Factor17 Factor18 Factor19 Factor20 \n", + "B.cell__ACTB -1.067792e-03 -0.0003216798 -0.0005795758 0.0007411074\n", + "B.cell__ARHGDIB -2.291039e-03 0.0001882649 -0.0002861554 0.0002756893\n", + "B.cell__ARPC2 -2.069296e-03 -0.0005396076 0.0004881568 -0.0004772543\n", + "B.cell__ARPC3 7.316946e-05 0.0002948308 0.0005419943 -0.0002239274\n", + "B.cell__B2M -2.213990e-03 -0.0004096560 0.0010927847 0.0014692351\n", + "B.cell__BIRC3 -4.329759e-03 0.0009212626 0.0013008816 0.0012679802" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A matrix: 6 × 20 of type dbl
Factor1Factor2Factor3Factor4Factor5Factor6Factor7Factor8Factor9Factor10Factor11Factor12Factor13Factor14Factor15Factor16Factor17Factor18Factor19Factor20
NKdim__ZC3HAV1-4.191599e-03 4.440209e-04 0.0002153844 0.0004540283 0.0002949058-0.00070857760.0004619467 0.0009136763 0.0017003505 0.0002068845 0.029082471-0.0013899221 0.0005879015-0.0017232555 0.0002550067 0.0020538919-0.0001903588-0.001723843-0.0001421164-1.611028e-04
NKdim__ZEB2-4.169701e-06 2.449899e-03 0.0024518547-0.0006276671-0.0006177782 0.00036622610.0002654389-0.0001024299-0.0011893979 0.0029621473 0.008816921 0.0004274647 0.0017044722-0.0002884657-0.0002698381-0.0009618766-0.0013183011 0.001507303 0.0001103782 2.288160e-04
NKdim__ZFAS1-1.824321e-03-2.196386e-05-0.0001309291 0.0002609197 0.0031960898-0.00098088650.0003104403-0.0003755947-0.0001760204-0.0030818508-0.113659437-0.0030505201-0.0005130803-0.0014203910 0.0004811148 0.0003157216-0.0009303368 0.001418756 0.0007723607 2.154406e-04
NKdim__ZFP36-1.884174e-03-1.013808e-04-0.0009070395 0.0004174070 0.0002005844 0.00199328580.0003703909 0.0008108770-0.0016578828-0.0039247834 0.008802220 0.0007937521 0.0021959706-0.0009571731-0.0004312455 0.0007124782 0.0005021595-0.004344592 0.0007678170-2.155689e-04
NKdim__ZFP36L1-2.860396e-03 1.044763e-04 0.0009260348-0.0028899986 0.0022447461 0.00098517760.0015616902 0.0010531281-0.0021344237-0.0053043853-0.004016054 0.0004029870 0.0009022350-0.0020380839 0.0004893038 0.0004537000 0.0002285394-0.000127352-0.0014641171 1.223379e-05
NKdim__ZFP36L2-1.056855e-03-6.163430e-04 0.0018555297 0.0006076941 0.0017680046 0.00078745960.0007943166 0.0003794940 0.0004912557-0.0051923213 0.080864937-0.0008642006 0.0014579452-0.0028402238 0.0004304173 0.0003052706-0.0003380432-0.002692252-0.0017124035-6.965453e-05
\n" + ], + "text/latex": [ + "A matrix: 6 × 20 of type dbl\n", + "\\begin{tabular}{r|llllllllllllllllllll}\n", + " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & Factor11 & Factor12 & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20\\\\\n", + "\\hline\n", + "\tNKdim\\_\\_ZC3HAV1 & -4.191599e-03 & 4.440209e-04 & 0.0002153844 & 0.0004540283 & 0.0002949058 & -0.0007085776 & 0.0004619467 & 0.0009136763 & 0.0017003505 & 0.0002068845 & 0.029082471 & -0.0013899221 & 0.0005879015 & -0.0017232555 & 0.0002550067 & 0.0020538919 & -0.0001903588 & -0.001723843 & -0.0001421164 & -1.611028e-04\\\\\n", + "\tNKdim\\_\\_ZEB2 & -4.169701e-06 & 2.449899e-03 & 0.0024518547 & -0.0006276671 & -0.0006177782 & 0.0003662261 & 0.0002654389 & -0.0001024299 & -0.0011893979 & 0.0029621473 & 0.008816921 & 0.0004274647 & 0.0017044722 & -0.0002884657 & -0.0002698381 & -0.0009618766 & -0.0013183011 & 0.001507303 & 0.0001103782 & 2.288160e-04\\\\\n", + "\tNKdim\\_\\_ZFAS1 & -1.824321e-03 & -2.196386e-05 & -0.0001309291 & 0.0002609197 & 0.0031960898 & -0.0009808865 & 0.0003104403 & -0.0003755947 & -0.0001760204 & -0.0030818508 & -0.113659437 & -0.0030505201 & -0.0005130803 & -0.0014203910 & 0.0004811148 & 0.0003157216 & -0.0009303368 & 0.001418756 & 0.0007723607 & 2.154406e-04\\\\\n", + "\tNKdim\\_\\_ZFP36 & -1.884174e-03 & -1.013808e-04 & -0.0009070395 & 0.0004174070 & 0.0002005844 & 0.0019932858 & 0.0003703909 & 0.0008108770 & -0.0016578828 & -0.0039247834 & 0.008802220 & 0.0007937521 & 0.0021959706 & -0.0009571731 & -0.0004312455 & 0.0007124782 & 0.0005021595 & -0.004344592 & 0.0007678170 & -2.155689e-04\\\\\n", + "\tNKdim\\_\\_ZFP36L1 & -2.860396e-03 & 1.044763e-04 & 0.0009260348 & -0.0028899986 & 0.0022447461 & 0.0009851776 & 0.0015616902 & 0.0010531281 & -0.0021344237 & -0.0053043853 & -0.004016054 & 0.0004029870 & 0.0009022350 & -0.0020380839 & 0.0004893038 & 0.0004537000 & 0.0002285394 & -0.000127352 & -0.0014641171 & 1.223379e-05\\\\\n", + "\tNKdim\\_\\_ZFP36L2 & -1.056855e-03 & -6.163430e-04 & 0.0018555297 & 0.0006076941 & 0.0017680046 & 0.0007874596 & 0.0007943166 & 0.0003794940 & 0.0004912557 & -0.0051923213 & 0.080864937 & -0.0008642006 & 0.0014579452 & -0.0028402238 & 0.0004304173 & 0.0003052706 & -0.0003380432 & -0.002692252 & -0.0017124035 & -6.965453e-05\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A matrix: 6 × 20 of type dbl\n", + "\n", + "| | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | Factor11 | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| NKdim__ZC3HAV1 | -4.191599e-03 | 4.440209e-04 | 0.0002153844 | 0.0004540283 | 0.0002949058 | -0.0007085776 | 0.0004619467 | 0.0009136763 | 0.0017003505 | 0.0002068845 | 0.029082471 | -0.0013899221 | 0.0005879015 | -0.0017232555 | 0.0002550067 | 0.0020538919 | -0.0001903588 | -0.001723843 | -0.0001421164 | -1.611028e-04 |\n", + "| NKdim__ZEB2 | -4.169701e-06 | 2.449899e-03 | 0.0024518547 | -0.0006276671 | -0.0006177782 | 0.0003662261 | 0.0002654389 | -0.0001024299 | -0.0011893979 | 0.0029621473 | 0.008816921 | 0.0004274647 | 0.0017044722 | -0.0002884657 | -0.0002698381 | -0.0009618766 | -0.0013183011 | 0.001507303 | 0.0001103782 | 2.288160e-04 |\n", + "| NKdim__ZFAS1 | -1.824321e-03 | -2.196386e-05 | -0.0001309291 | 0.0002609197 | 0.0031960898 | -0.0009808865 | 0.0003104403 | -0.0003755947 | -0.0001760204 | -0.0030818508 | -0.113659437 | -0.0030505201 | -0.0005130803 | -0.0014203910 | 0.0004811148 | 0.0003157216 | -0.0009303368 | 0.001418756 | 0.0007723607 | 2.154406e-04 |\n", + "| NKdim__ZFP36 | -1.884174e-03 | -1.013808e-04 | -0.0009070395 | 0.0004174070 | 0.0002005844 | 0.0019932858 | 0.0003703909 | 0.0008108770 | -0.0016578828 | -0.0039247834 | 0.008802220 | 0.0007937521 | 0.0021959706 | -0.0009571731 | -0.0004312455 | 0.0007124782 | 0.0005021595 | -0.004344592 | 0.0007678170 | -2.155689e-04 |\n", + "| NKdim__ZFP36L1 | -2.860396e-03 | 1.044763e-04 | 0.0009260348 | -0.0028899986 | 0.0022447461 | 0.0009851776 | 0.0015616902 | 0.0010531281 | -0.0021344237 | -0.0053043853 | -0.004016054 | 0.0004029870 | 0.0009022350 | -0.0020380839 | 0.0004893038 | 0.0004537000 | 0.0002285394 | -0.000127352 | -0.0014641171 | 1.223379e-05 |\n", + "| NKdim__ZFP36L2 | -1.056855e-03 | -6.163430e-04 | 0.0018555297 | 0.0006076941 | 0.0017680046 | 0.0007874596 | 0.0007943166 | 0.0003794940 | 0.0004912557 | -0.0051923213 | 0.080864937 | -0.0008642006 | 0.0014579452 | -0.0028402238 | 0.0004304173 | 0.0003052706 | -0.0003380432 | -0.002692252 | -0.0017124035 | -6.965453e-05 |\n", + "\n" + ], + "text/plain": [ + " Factor1 Factor2 Factor3 Factor4 \n", + "NKdim__ZC3HAV1 -4.191599e-03 4.440209e-04 0.0002153844 0.0004540283\n", + "NKdim__ZEB2 -4.169701e-06 2.449899e-03 0.0024518547 -0.0006276671\n", + "NKdim__ZFAS1 -1.824321e-03 -2.196386e-05 -0.0001309291 0.0002609197\n", + "NKdim__ZFP36 -1.884174e-03 -1.013808e-04 -0.0009070395 0.0004174070\n", + "NKdim__ZFP36L1 -2.860396e-03 1.044763e-04 0.0009260348 -0.0028899986\n", + "NKdim__ZFP36L2 -1.056855e-03 -6.163430e-04 0.0018555297 0.0006076941\n", + " Factor5 Factor6 Factor7 Factor8 \n", + "NKdim__ZC3HAV1 0.0002949058 -0.0007085776 0.0004619467 0.0009136763\n", + "NKdim__ZEB2 -0.0006177782 0.0003662261 0.0002654389 -0.0001024299\n", + "NKdim__ZFAS1 0.0031960898 -0.0009808865 0.0003104403 -0.0003755947\n", + "NKdim__ZFP36 0.0002005844 0.0019932858 0.0003703909 0.0008108770\n", + "NKdim__ZFP36L1 0.0022447461 0.0009851776 0.0015616902 0.0010531281\n", + "NKdim__ZFP36L2 0.0017680046 0.0007874596 0.0007943166 0.0003794940\n", + " Factor9 Factor10 Factor11 Factor12 \n", + "NKdim__ZC3HAV1 0.0017003505 0.0002068845 0.029082471 -0.0013899221\n", + "NKdim__ZEB2 -0.0011893979 0.0029621473 0.008816921 0.0004274647\n", + "NKdim__ZFAS1 -0.0001760204 -0.0030818508 -0.113659437 -0.0030505201\n", + "NKdim__ZFP36 -0.0016578828 -0.0039247834 0.008802220 0.0007937521\n", + "NKdim__ZFP36L1 -0.0021344237 -0.0053043853 -0.004016054 0.0004029870\n", + "NKdim__ZFP36L2 0.0004912557 -0.0051923213 0.080864937 -0.0008642006\n", + " Factor13 Factor14 Factor15 Factor16 \n", + "NKdim__ZC3HAV1 0.0005879015 -0.0017232555 0.0002550067 0.0020538919\n", + "NKdim__ZEB2 0.0017044722 -0.0002884657 -0.0002698381 -0.0009618766\n", + "NKdim__ZFAS1 -0.0005130803 -0.0014203910 0.0004811148 0.0003157216\n", + "NKdim__ZFP36 0.0021959706 -0.0009571731 -0.0004312455 0.0007124782\n", + "NKdim__ZFP36L1 0.0009022350 -0.0020380839 0.0004893038 0.0004537000\n", + "NKdim__ZFP36L2 0.0014579452 -0.0028402238 0.0004304173 0.0003052706\n", + " Factor17 Factor18 Factor19 Factor20 \n", + "NKdim__ZC3HAV1 -0.0001903588 -0.001723843 -0.0001421164 -1.611028e-04\n", + "NKdim__ZEB2 -0.0013183011 0.001507303 0.0001103782 2.288160e-04\n", + "NKdim__ZFAS1 -0.0009303368 0.001418756 0.0007723607 2.154406e-04\n", + "NKdim__ZFP36 0.0005021595 -0.004344592 0.0007678170 -2.155689e-04\n", + "NKdim__ZFP36L1 0.0002285394 -0.000127352 -0.0014641171 1.223379e-05\n", + "NKdim__ZFP36L2 -0.0003380432 -0.002692252 -0.0017124035 -6.965453e-05" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
  1. 3148
  2. 20
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 3148\n", + "\\item 20\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 3148\n", + "2. 20\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 3148 20" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "### Calculate right inverse of weight matrix (mu_ma)\n", + "\n", + "dim(mu_ma) ## corresponds to A^T\n", + "mu_ma_t = t(mu_ma) \n", + "dim(mu_ma_t) ## corresponds to # A\n", + "\n", + "mu_mat_right_inv = mu_ma %*% (solve(mu_ma_t %*% mu_ma))\n", + "head(mu_mat_right_inv)\n", + "tail(mu_mat_right_inv)\n", + "dim(mu_mat_right_inv)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 548, + "id": "769bb480-9aaf-46cf-a75c-eaf317a4585b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "
A matrix: 2 × 3148 of type dbl
B.cell__ACTBB.cell__ARHGDIBB.cell__ARPC2B.cell__ARPC3B.cell__B2MB.cell__BIRC3B.cell__BTF3B.cell__BTG1B.cell__BTG2B.cell__CALM1NKdim__YPEL5NKdim__YWHABNKdim__YWHAQNKdim__YWHAZNKdim__ZC3HAV1NKdim__ZEB2NKdim__ZFAS1NKdim__ZFP36NKdim__ZFP36L1NKdim__ZFP36L2
Baseline.TEST_1-0.1451209-0.2808230.2933812-1.3037827 0.3569171-0.29338123-0.57202111.6637928-1.3037827-1.1048357 1.6637928 0.705065890.4752408 0.9234567 0.6744898-0.02410453-1.040566-0.94207577-0.8010945-0.7050659
Baseline.TEST_12 0.2310092-0.0602920.3697907-0.3697907-0.2933812-0.08445798 0.47524080.1451209 0.1573107 0.3827258-0.6744898-0.096558620.8178173-0.3827258-0.7363159-0.44842548-1.898029-0.07236971 1.3623834 0.8178173
\n" + ], + "text/latex": [ + "A matrix: 2 × 3148 of type dbl\n", + "\\begin{tabular}{r|lllllllllllllllllllll}\n", + " & B.cell\\_\\_ACTB & B.cell\\_\\_ARHGDIB & B.cell\\_\\_ARPC2 & B.cell\\_\\_ARPC3 & B.cell\\_\\_B2M & B.cell\\_\\_BIRC3 & B.cell\\_\\_BTF3 & B.cell\\_\\_BTG1 & B.cell\\_\\_BTG2 & B.cell\\_\\_CALM1 & ⋯ & NKdim\\_\\_YPEL5 & NKdim\\_\\_YWHAB & NKdim\\_\\_YWHAQ & NKdim\\_\\_YWHAZ & NKdim\\_\\_ZC3HAV1 & NKdim\\_\\_ZEB2 & NKdim\\_\\_ZFAS1 & NKdim\\_\\_ZFP36 & NKdim\\_\\_ZFP36L1 & NKdim\\_\\_ZFP36L2\\\\\n", + "\\hline\n", + "\tBaseline.TEST\\_1 & -0.1451209 & -0.280823 & 0.2933812 & -1.3037827 & 0.3569171 & -0.29338123 & -0.5720211 & 1.6637928 & -1.3037827 & -1.1048357 & ⋯ & 1.6637928 & 0.70506589 & 0.4752408 & 0.9234567 & 0.6744898 & -0.02410453 & -1.040566 & -0.94207577 & -0.8010945 & -0.7050659\\\\\n", + "\tBaseline.TEST\\_12 & 0.2310092 & -0.060292 & 0.3697907 & -0.3697907 & -0.2933812 & -0.08445798 & 0.4752408 & 0.1451209 & 0.1573107 & 0.3827258 & ⋯ & -0.6744898 & -0.09655862 & 0.8178173 & -0.3827258 & -0.7363159 & -0.44842548 & -1.898029 & -0.07236971 & 1.3623834 & 0.8178173\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A matrix: 2 × 3148 of type dbl\n", + "\n", + "| | B.cell__ACTB | B.cell__ARHGDIB | B.cell__ARPC2 | B.cell__ARPC3 | B.cell__B2M | B.cell__BIRC3 | B.cell__BTF3 | B.cell__BTG1 | B.cell__BTG2 | B.cell__CALM1 | ⋯ | NKdim__YPEL5 | NKdim__YWHAB | NKdim__YWHAQ | NKdim__YWHAZ | NKdim__ZC3HAV1 | NKdim__ZEB2 | NKdim__ZFAS1 | NKdim__ZFP36 | NKdim__ZFP36L1 | NKdim__ZFP36L2 |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| Baseline.TEST_1 | -0.1451209 | -0.280823 | 0.2933812 | -1.3037827 | 0.3569171 | -0.29338123 | -0.5720211 | 1.6637928 | -1.3037827 | -1.1048357 | ⋯ | 1.6637928 | 0.70506589 | 0.4752408 | 0.9234567 | 0.6744898 | -0.02410453 | -1.040566 | -0.94207577 | -0.8010945 | -0.7050659 |\n", + "| Baseline.TEST_12 | 0.2310092 | -0.060292 | 0.3697907 | -0.3697907 | -0.2933812 | -0.08445798 | 0.4752408 | 0.1451209 | 0.1573107 | 0.3827258 | ⋯ | -0.6744898 | -0.09655862 | 0.8178173 | -0.3827258 | -0.7363159 | -0.44842548 | -1.898029 | -0.07236971 | 1.3623834 | 0.8178173 |\n", + "\n" + ], + "text/plain": [ + " B.cell__ACTB B.cell__ARHGDIB B.cell__ARPC2 B.cell__ARPC3\n", + "Baseline.TEST_1 -0.1451209 -0.280823 0.2933812 -1.3037827 \n", + "Baseline.TEST_12 0.2310092 -0.060292 0.3697907 -0.3697907 \n", + " B.cell__B2M B.cell__BIRC3 B.cell__BTF3 B.cell__BTG1\n", + "Baseline.TEST_1 0.3569171 -0.29338123 -0.5720211 1.6637928 \n", + "Baseline.TEST_12 -0.2933812 -0.08445798 0.4752408 0.1451209 \n", + " B.cell__BTG2 B.cell__CALM1 ⋯ NKdim__YPEL5 NKdim__YWHAB\n", + "Baseline.TEST_1 -1.3037827 -1.1048357 ⋯ 1.6637928 0.70506589 \n", + "Baseline.TEST_12 0.1573107 0.3827258 ⋯ -0.6744898 -0.09655862 \n", + " NKdim__YWHAQ NKdim__YWHAZ NKdim__ZC3HAV1 NKdim__ZEB2\n", + "Baseline.TEST_1 0.4752408 0.9234567 0.6744898 -0.02410453\n", + "Baseline.TEST_12 0.8178173 -0.3827258 -0.7363159 -0.44842548\n", + " NKdim__ZFAS1 NKdim__ZFP36 NKdim__ZFP36L1 NKdim__ZFP36L2\n", + "Baseline.TEST_1 -1.040566 -0.94207577 -0.8010945 -0.7050659 \n", + "Baseline.TEST_12 -1.898029 -0.07236971 1.3623834 0.8178173 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "head(gr_ma,2)" + ] + }, + { + "cell_type": "code", + "execution_count": 549, + "id": "3217a141-505d-43c1-8e2e-041dd7af37e8", + "metadata": {}, + "outputs": [], + "source": [ + "gr_ma[is.na(gr_ma)]= 0" + ] + }, + { + "cell_type": "code", + "execution_count": 550, + "id": "a7a1fec5-2861-4917-8678-e2cbd5f301ad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
  1. 103
  2. 20
\n" + ], + "text/latex": [ + "\\begin{enumerate*}\n", + "\\item 103\n", + "\\item 20\n", + "\\end{enumerate*}\n" + ], + "text/markdown": [ + "1. 103\n", + "2. 20\n", + "\n", + "\n" + ], + "text/plain": [ + "[1] 103 20" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "
A matrix: 2 × 20 of type dbl
Factor1Factor2Factor3Factor4Factor5Factor6Factor7Factor8Factor9Factor10Factor11Factor12Factor13Factor14Factor15Factor16Factor17Factor18Factor19Factor20
Baseline.TEST_1-0.120630791.43053620.14446840.5318507-0.12354370.12083592 0.20167710.2368509-0.214168260.211821320.3961745-0.32766720.41714980.3391938-0.012485970.08640766-0.02221648-0.41247990.195775378-0.1918527
Baseline.TEST_12-0.098823740.40139370.32064660.5860041-0.21556690.08823914-0.13225150.2131773 0.049167820.073662543.4977509-0.21866510.44844450.2666195-0.135612540.53762915 0.02581057-0.25009090.008599943-0.1842714
\n" + ], + "text/latex": [ + "A matrix: 2 × 20 of type dbl\n", + "\\begin{tabular}{r|llllllllllllllllllll}\n", + " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & Factor11 & Factor12 & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20\\\\\n", + "\\hline\n", + "\tBaseline.TEST\\_1 & -0.12063079 & 1.4305362 & 0.1444684 & 0.5318507 & -0.1235437 & 0.12083592 & 0.2016771 & 0.2368509 & -0.21416826 & 0.21182132 & 0.3961745 & -0.3276672 & 0.4171498 & 0.3391938 & -0.01248597 & 0.08640766 & -0.02221648 & -0.4124799 & 0.195775378 & -0.1918527\\\\\n", + "\tBaseline.TEST\\_12 & -0.09882374 & 0.4013937 & 0.3206466 & 0.5860041 & -0.2155669 & 0.08823914 & -0.1322515 & 0.2131773 & 0.04916782 & 0.07366254 & 3.4977509 & -0.2186651 & 0.4484445 & 0.2666195 & -0.13561254 & 0.53762915 & 0.02581057 & -0.2500909 & 0.008599943 & -0.1842714\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A matrix: 2 × 20 of type dbl\n", + "\n", + "| | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | Factor11 | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 |\n", + "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", + "| Baseline.TEST_1 | -0.12063079 | 1.4305362 | 0.1444684 | 0.5318507 | -0.1235437 | 0.12083592 | 0.2016771 | 0.2368509 | -0.21416826 | 0.21182132 | 0.3961745 | -0.3276672 | 0.4171498 | 0.3391938 | -0.01248597 | 0.08640766 | -0.02221648 | -0.4124799 | 0.195775378 | -0.1918527 |\n", + "| Baseline.TEST_12 | -0.09882374 | 0.4013937 | 0.3206466 | 0.5860041 | -0.2155669 | 0.08823914 | -0.1322515 | 0.2131773 | 0.04916782 | 0.07366254 | 3.4977509 | -0.2186651 | 0.4484445 | 0.2666195 | -0.13561254 | 0.53762915 | 0.02581057 | -0.2500909 | 0.008599943 | -0.1842714 |\n", + "\n" + ], + "text/plain": [ + " Factor1 Factor2 Factor3 Factor4 Factor5 \n", + "Baseline.TEST_1 -0.12063079 1.4305362 0.1444684 0.5318507 -0.1235437\n", + "Baseline.TEST_12 -0.09882374 0.4013937 0.3206466 0.5860041 -0.2155669\n", + " Factor6 Factor7 Factor8 Factor9 Factor10 \n", + "Baseline.TEST_1 0.12083592 0.2016771 0.2368509 -0.21416826 0.21182132\n", + "Baseline.TEST_12 0.08823914 -0.1322515 0.2131773 0.04916782 0.07366254\n", + " Factor11 Factor12 Factor13 Factor14 Factor15 \n", + "Baseline.TEST_1 0.3961745 -0.3276672 0.4171498 0.3391938 -0.01248597\n", + "Baseline.TEST_12 3.4977509 -0.2186651 0.4484445 0.2666195 -0.13561254\n", + " Factor16 Factor17 Factor18 Factor19 Factor20 \n", + "Baseline.TEST_1 0.08640766 -0.02221648 -0.4124799 0.195775378 -0.1918527\n", + "Baseline.TEST_12 0.53762915 0.02581057 -0.2500909 0.008599943 -0.1842714" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#### Apply right inverse to Y from GR \n", + "\n", + "result = gr_ma %*% mu_mat_right_inv\n", + "dim(result)\n", + "\n", + "\n", + "head(result,2)" + ] + }, + { + "cell_type": "code", + "execution_count": 551, + "id": "356ff5d9-a4a7-4ca1-bb89-09c63c460d35", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "103" + ], + "text/latex": [ + "103" + ], + "text/markdown": [ + "103" + ], + "text/plain": [ + "[1] 103" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "nrow(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 552, + "id": "a25d43b4-221b-4096-a2ea-0c9590c4eabb", + "metadata": {}, + "outputs": [], + "source": [ + "result = as.data.frame(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 553, + "id": "fc0822ca-40d8-4d50-b0ab-2e40a91fd481", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "103" + ], + "text/latex": [ + "103" + ], + "text/markdown": [ + "103" + ], + "text/plain": [ + "[1] 103" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "nrow(result[!is.na(result$Factor1),])" + ] + }, + { + "cell_type": "code", + "execution_count": 554, + "id": "6f47cb1c-6d40-473d-9e72-741fef9fedef", + "metadata": {}, + "outputs": [], + "source": [ + "result = result[!is.na(result$Factor1),]" + ] + }, + { + "cell_type": "code", + "execution_count": 555, + "id": "04f65759-f99e-4258-8d8c-87cc2fa4e834", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "103" + ], + "text/latex": [ + "103" + ], + "text/markdown": [ + "103" + ], + "text/plain": [ + "[1] 103" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "nrow(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 561, + "id": "943868fa-6979-479f-b05c-669b45839ae4", + "metadata": {}, + "outputs": [], + "source": [ + "### Save Factor Pattern to Share\n", + "\n", + "write.csv(result, paste0(output_path, 'Factor_Data_Groningen.csv'))\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.1.1" + }, + "toc-autonumbering": true + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/G/GX_MOFA_Reproduction_Preprocessing.r b/G/GX_MOFA_Reproduction_Preprocessing.r new file mode 100644 index 0000000..42f4bf3 --- /dev/null +++ b/G/GX_MOFA_Reproduction_Preprocessing.r @@ -0,0 +1,553 @@ +# Load Seurat object and create pseudobulk for MOFA analysis + +############################################# +# Prerequisites - Load Libraries + + +library(dplyr) + +library(Seurat, quietly = TRUE, verbose = FALSE) +library(SeuratDisk, quietly = TRUE, verbose = FALSE) + +library(muscat) + +library(reshape2) + +library(SummarizedExperiment) + +library(stringr) + +############################################### +# Preqrequisites Configurations & Parameters + +result_path = '../results/current/Reproduction' + +seurat_input_data_name = 'G4_Seurat_Input_Replication.h5seurat' + +name_save = 'V_AZIMUTH_REPRODUCTION' + + +## Define columns in seurat object containing sample-id and cluster annotations + +sample_column = 'sample_id' # to be sample-id + +cluster_column = 'cluster_id' # to be cluster_id; cell-type annotation + + +################################################## + +### Should quantile normalization be applied? + +quantile_normalization_single_cell = TRUE + +standardize = FALSE + +set_zero_na = FALSE + +quantile_norm_feat = TRUE + +# Functions + +### Function for quantile normalization + +quantile_normalization = function(X){ + set.seed(42) + ranks = apply(X, 2, rank, ties.method = 'min') # determine ranks of each entry + + sorted = data.frame(apply(X, 2, sort)) # sort the entries + means = apply(sorted, 1, mean) # calculate the means + + normalized_data = apply(ranks, 2 ,function(x){ means[x]}) # substitute the means into ranks matrix +} + + +### Gene wise quantile normalization + +stdnorm <- function(x) { + set.seed(42) + r = rank(x[!is.na(x)], ties.method="average") + x[!is.na(x)] = qnorm(r / (length(x[!is.na(x)]) + 1)) + return(x) +} + + + +# Load Data + +## Load Seurat object + +###### Load the generated seurat objects + +source_text = paste( result_path, '/', seurat_input_data_name , sep = '') +print(source_text) +print(file.info(source_text)$mtime) +rna_seurat_data = LoadH5Seurat(source_text, assays = "RNA", quietly = TRUE ) + + + + +## Should contain raw counts +## After QC and Pre-processing +## annotations: +#### 'sample_id' - identification of sample/ patient incl. timepoint +### 'cluster_id' - cell-type annotation + +colnames(rna_seurat_data[[]]) + + +rna_seurat_data + +rna_seurat_data_subset = rna_seurat_data # rename + +# Data Processing (Pseudobulk) + +obs = rna_seurat_data_subset@meta.data + +obs$cell = rownames(obs) + +### add a dummy group column +obs$group_id = '1' ## group-id neede in DE analysis, here not --> DUMMY Variable + +### assign sample-id: TBD remove library part for script sharing +obs$sample_id = obs[[sample_column]] + +### add cell-type assignment +obs$cluster_id = as.character(obs[[cluster_column]]) + +rownames(obs) = obs$cell + +nrow(obs) + +head(obs,2) + +### Adjust B-cell mapping/ aggregate Azimuth cell-types +obs$cluster_id = str_replace(obs$cluster_id, 'B_intermediate|B_memory|B_naive', 'B cell') + +sort(unique(obs$cluster_id)) + + + +## Add to seurat dataset + +## group-id + +rna_seurat_data_subset = AddMetaData(object = rna_seurat_data_subset, metadata = obs[,'group_id', drop = FALSE], col.name = 'group_id') + +## cluster-id + +rna_seurat_data_subset = AddMetaData(object = rna_seurat_data_subset, metadata = obs[,'cluster_id', drop = FALSE], col.name = 'cluster_id') + +## sample-id + +rna_seurat_data_subset = AddMetaData(object = rna_seurat_data_subset, metadata = obs[,'sample_id', drop = FALSE], col.name = 'sample_id') + +head(rna_seurat_data_subset@meta.data,2) + +## Convert to SCE + +rna_sce = as.SingleCellExperiment(rna_seurat_data_subset) + +rna_sce # rows = genes; columns = cells + + + +### Check amount of cells per sample and cluster + +colSums(table(rna_sce$cluster_id,rna_sce$cluster_id )) + +cells_per_sample_cluster = t(table(rna_sce$cluster_id, rna_sce$sample_id)) + +cells_per_sample_cluster = data.frame(cells_per_sample_cluster) + +colnames(cells_per_sample_cluster) = c('Sample', 'Cluster_Cell_Type', 'amount_cells') + +head(cells_per_sample_cluster,2) + +## Analyze and calculate gene expression percentages per cluster + +gene_list = list() + +gene_cell_expr = list() + +clusters = unique(rna_sce$cluster_id) +#clusters = unique(rna_sce$cell_type_Scanorama) + +clusters + + for(i in clusters){ +# print(i) + + # subset data on cluster + rna_sce_subset = rna_sce[,rna_sce$cluster_id== i] # cluster + + amount_cells = dim(rna_sce_subset)[2] + + # Calculate percentage of cells expressing gene + amount_cells_expressing_gene = rowSums(assay(rna_sce_subset) > 0 ) + perc_cells_expressing_gene = (amount_cells_expressing_gene/ amount_cells) * 100 + + + gene_cell_expr[[i]] = data.frame(perc_cells_expressing_gene = perc_cells_expressing_gene, total_amount_cells_expressing_gene = amount_cells_expressing_gene) + + } + + ### Resulting amount of genes per cluster + +gene_cell_expr_data = data.frame() + + for(i in names(gene_cell_expr)){ + data = gene_cell_expr[[i]] + data$gene = rownames(gene_cell_expr[[i]]) + data$cluster = i + gene_cell_expr_data = rbind( gene_cell_expr_data, data) + } + +head(gene_cell_expr_data,2) + +## Add cluster, group and sample columns for aggregation + +#### Add cluster_id, sample_id and group_id columns +(rna_sce <- prepSCE(rna_sce, + kid = 'cluster_id', # subpopulation assignments + gid = 'group_id', # group IDs (ctrl/stim) # sample_id; using dummy sample id which corresponds to cluster columns + sid = 'sample_id', # sample IDs (ctrl/stim.1234) + drop = FALSE)) # drop all other colData columns + +nk <- length(kids <- levels(rna_sce$cluster_id)) +ns <- length(sids <- levels(rna_sce$sample_id)) +names(kids) <- kids; names(sids) <- sids + +nk # amount of cluster + +ns # amount of samples + +kids # cluster ids + +length(kids) # amount cluster-id + +## Aggregate single cell to pseudo-bulk data + +pb <- aggregateData(rna_sce, + assay = "counts", fun = "mean", + by = c("cluster_id", "sample_id")) +# one sheet per subpopulation + +pb + +#sum(colSums(assay(pb))) + +### Save aggregated data + +#save( pb , file = paste0(result_path, '/G0_aggregated_RNA_input_correlations_all.RDS')) + +# Normalization + +## RNA-Single-Seq + +### Cell / gene expression data filtering + +cell_perc_cluster = gene_cell_expr_data + +head(cell_perc_cluster,2) + +nrow(cell_perc_cluster) + +length(unique(cell_perc_cluster$cluster)) + +##### Condition for filtering genes +cell_perc_cluster = cell_perc_cluster[((cell_perc_cluster$perc_cells > 50) & (cell_perc_cluster$total_amount_cells_expressing_gene > 1200)) | ((cell_perc_cluster$perc_cells > 40) & (cell_perc_cluster$total_amount_cells_expressing_gene > 3000)) ,] + + +nrow(cell_perc_cluster) + +### Pseudobulk + +pb + +all_genes = rownames(pb) + +head(all_genes) + +length(all_genes) + +### Pre-Process + +#### Remove Clusters (TBD) + +names(assays(pb)) + +assay(pb, 'platelet') = NULL + +assay(pb, 'plasmablast') = NULL + +assay(pb, 'pDC') = NULL + +assay(pb, 'Nkbright') = NULL + +assay(pb, 'NK_Proliferating') = NULL + +assay(pb, 'ILC') = NULL + +assay(pb, 'HSPC') = NULL + +assay(pb, 'Eryth') = NULL + +assay(pb, 'Doublet') = NULL + +assay(pb, 'doublet') = NULL + +assay(pb, 'dnT') = NULL + +assay(pb, 'cDC1') = NULL + +assay(pb, 'CD8_TCM') = NULL + +assay(pb, 'CD8_Proliferating') = NULL + +assay(pb, 'CD4_Proliferating') = NULL + +assay(pb, 'ASDC') = NULL + +length(names(assays(pb))) + +names(assays(pb)) + +#### Prepare gene-cluster dataframe + normalize + +final_data = data.frame(samples = colnames(pb)) + +rownames(final_data) = final_data$samples + +final_data_vis = data.frame(samples = colnames(pb)) + +rownames(final_data_vis) = final_data_vis$samples + +genes_subset = cell_perc_cluster + +name_save + + +for(i in unique(genes_subset$cluster)){ + data = assay(pb, i) + + + ##### Normalize counts per sample (library size) - currently only for non-scanorama functions + + if(is.na(str_extract(name_save, 'scano')) == TRUE){ + scaling_factor = colSums(data) /mean(colSums(data)) + + for (j in 1:ncol(data)){ + if(scaling_factor[j] != 0){ + data[,j] = data[,j]/ scaling_factor[j] + } + } + } + + ### Subset data on genes with minimum expression in cluster + data = data[rownames(data) %in% genes_subset$gene[genes_subset$cluster == i],] + + ### Alternative - cluster independent subsetting + #data = data[rownames(data) %in% genes_subset,] + + ##### TBD pre-processing stepd + + if(is.na(str_extract(name_save, 'scano')) == TRUE){ + data = log2(data+1) # logarithmize count values (optional!) + } + + #### Quantile normalization (TBD maybe also on complete dataset?) + + if(quantile_normalization_single_cell == TRUE){ + data_rows = rownames(data) + data = quantile_normalization(data ) + rownames(data) = data_rows + } + + rownames(data) = paste0(i, '__' ,rownames(data)) + + data = data.frame(t(data)) + + expr_mean = data.frame( mean_expr = rowMeans(data)) + colnames(expr_mean) = i + rownames(expr_mean) = rownames(data) + + final_data = merge(final_data, data, by = 0) + final_data_vis = merge(final_data_vis, expr_mean, by = 0) + + rownames(final_data) = final_data$Row.names + rownames(final_data_vis) = final_data_vis$Row.names + final_data$Row.names = NULL + final_data_vis$Row.names = NULL + } + + + +head(final_data,2) + +ncol(final_data) + +nrow(genes_subset) + +dim(final_data) + +final_data$samples = str_replace(final_data$samples, '-.*', '') + +head(final_data,2) + + + +#### Filter genes + +### Remove mitochondrial & ribosomal genes + +head(final_data,2) + +ncol(final_data) + +final_data = final_data[, !colnames(final_data) %in% (colnames(final_data)[!is.na(str_extract(colnames(final_data), '__MT.*|__RPL.*|__RPS.*'))])] + +ncol(final_data) # minus sample + sample_id column --> 11.831 + +head(final_data,2) + +## Genes with high variance + +head(final_data,2) + +final_data$samples = NULL + +final_data$sample_id = NULL + +ncol(final_data) + +final_data$samples = rownames(final_data) + +head(final_data,2) + + + +#### Prepare long format + +final_data_long = melt(final_data) + +### Decide what to do with duplicates + +head(final_data_long,2) + +final_data_long$type = 'single_cell' + +final_data_long$samples = str_replace(final_data_long$samples, '-.*', '') + +final_data_long = final_data_long %>% group_by(samples, type, variable) %>% summarise(value = mean(value)) # take average in case same samples measured multiple times + +length(unique(final_data_long$variable)) + +final_data_long$sample_id = final_data_long$samples +final_data_long$samples = NULL + +# Quantile Normalization + +head(final_data_long,2) + +data_long = final_data_long + +head(data_long,2) + +nrow(data_long) + +length(unique(data_long$sample_id)) + +### Normalization & wide format + +### Standardize values + +standardize + +if(standardize == TRUE){ + data_long = merge(data_long, data_long %>% group_by(variable) %>% summarise(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE))) + + data_long[data_long == 0] = NA + + data_long = data_long[(data_long$sd != 0) & (!is.na(data_long$sd)),] + + data_nas = data.frame(is.na(data_long)) + data_long$value = (data_long$value - data_long$mean)/data_long$sd + + #data_long = data.frame(data_long) + data_long$mean = NULL + data_long$sd = NULL + data_long$value[data_nas$value] = NA + } + +unique(data_long$type) + +## Prepare wide format for correlations + +data_long$ident = paste0(data_long$type, '_0_', data_long$variable) + +nrow(unique(data_long[,c('sample_id', 'ident')])) + +nrow(data_long) + +### Transform to wide + +final_data = dcast(data_long, sample_id ~ ident , value.var = "value") # ! with this merging there might be NA values for some samples on some data types + +head(final_data,2) + +rownames(final_data) = final_data$sample_id + +final_data$sample_id = NULL + +ncol(final_data) + +nrow(final_data) + +### Deal with NA - Set NA for 0 observation + remove samples with only NA + +head(final_data,2) + +set_zero_na + +if(set_zero_na == TRUE){ + final_data[final_data == 0] = NA + } + + + +### Remember NA's + +data_nas = is.na(final_data) + +ncol(final_data) + +keep_samples = names(rowSums(data_nas))[rowSums(data_nas) != ncol(final_data)] + +final_data = final_data[keep_samples,] + +data_nas = data_nas[keep_samples,] + +### Apply feature wise quantile normalization + +quantile_norm_feat + +if(quantile_norm_feat == TRUE){ + final_data = apply(final_data, 2,stdnorm) + final_data = data.frame(final_data) + final_data[data_nas] = NA + final_data$sample_id = rownames(final_data) + data_long = melt(final_data) + data_long$type = str_extract(data_long$variable, '.*_0_') + data_long$type = str_replace(data_long$type , '_0_', '') + data_long$variable = str_replace(data_long$variable, '.*_0_', '') + } + +# Save Prepared Data + +name_save + +write.csv(data_long, paste0(result_path, '/Combined_Data_', name_save, '.csv')) + +length(unique(data_long$variable)) + +