diff --git a/G/GX11_Matrix_Inverse_for_Replication.ipynb b/G/GX11_Matrix_Inverse_for_Replication.ipynb
new file mode 100644
index 0000000..28c9eab
--- /dev/null
+++ b/G/GX11_Matrix_Inverse_for_Replication.ipynb
@@ -0,0 +1,2047 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 504,
+ "id": "20d8a525-8121-417b-8bd8-632e75beef12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### Script to apply feature factor weights identified in Munich Data (Azimuth Annotation) on Groningen data for computation of factor values\n",
+ "### Replication Script"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "528702ab-7be4-42ec-a5a5-8fa5647ddf6b",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "# Libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 505,
+ "id": "51def42a-4a18-4c27-bf37-ee082b4a1363",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "library(dplyr)\n",
+ "library(stringr)\n",
+ "library(Seurat)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 506,
+ "id": "6dc51e69-70e6-4bf2-a7d1-b5fad935cc12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "library(MOFA2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 507,
+ "id": "20ea0b90-9663-46a8-9bdf-d994aa28b25c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "library(ggplot2)\n",
+ "library(reticulate)\n",
+ "library(reshape2)\n",
+ "library(ggpubr)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3d97489e-c741-4853-8df6-01a1b57959af",
+ "metadata": {},
+ "source": [
+ "# Parameters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 508,
+ "id": "aca812f2-f0bf-43ba-ba91-3f012033d097",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_path = '../data/current'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 509,
+ "id": "ac48c1d3-04a7-424f-bcb3-7120a31b7f51",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "output_path = '../results/current'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0d2a3050-f8f6-48b8-be0f-e2a92053947d",
+ "metadata": {},
+ "source": [
+ "# Load data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aacd4ca7-cfaf-4205-8b1c-eb3e1f58d607",
+ "metadata": {},
+ "source": [
+ "## Munich factor data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 510,
+ "id": "e222c0b3-6316-47ff-bf05-6fd2fd88bd20",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "A data.frame: 6 × 21\n",
+ "\n",
+ "\t | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | ⋯ | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 | sample_id |
\n",
+ "\t | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\t1 | 0.2822028 | -1.2559541 | -0.53390026 | 1.1204467 | -0.46669223 | 1.2484388 | 0.20759032 | -0.364617945 | -0.40242258 | 0.78832135 | ⋯ | 0.05325919 | 0.27738239 | 0.13303365 | -0.76325230 | -0.04660508 | 0.95971046 | 0.431779125 | -0.46741569 | -0.45888943 | k1 |
\n",
+ "\t2 | 0.6928825 | -1.0380557 | 0.33933545 | 0.2294335 | 0.10520940 | 0.8011520 | 0.36775010 | 1.417721825 | -0.58040927 | 0.04100113 | ⋯ | 0.22233569 | 0.31122340 | 0.04224790 | 0.14198857 | -0.04503017 | -0.04460783 | -0.186394597 | -0.26701984 | -0.03163122 | k10 |
\n",
+ "\t3 | -1.4468008 | -0.3123073 | 0.68857816 | -0.6941110 | 0.08007671 | 1.1839352 | -0.19331847 | 0.036830866 | 0.21506213 | 0.03104541 | ⋯ | -0.70126043 | 0.17834780 | -0.05727359 | 0.05895828 | 0.14746604 | -0.09273922 | 0.172580802 | 0.02947317 | -0.08184409 | k11 |
\n",
+ "\t4 | -2.5657673 | -0.7162750 | 0.99399349 | -1.0877413 | -0.89743312 | 0.5699462 | -0.08501748 | -0.168697397 | 0.03162881 | -0.03488845 | ⋯ | -1.09175933 | 0.48647681 | -0.39887633 | -0.04494189 | 0.54622186 | 0.16467157 | -0.149100299 | 0.43129440 | -0.05209181 | k12 |
\n",
+ "\t5 | -0.2657781 | -0.1293536 | 0.30498721 | -1.0304524 | 0.04534561 | 1.3639342 | 0.02831011 | -0.003340543 | 0.13941860 | 0.16450688 | ⋯ | -0.33919680 | -0.66542364 | 0.03761243 | 0.15980442 | 0.11184416 | 0.03747477 | 0.099951109 | 0.24887446 | 0.27424704 | k13 |
\n",
+ "\t6 | 1.0376091 | -1.4776762 | -0.03257904 | -0.5101523 | 0.12084320 | 0.1672229 | 0.24861033 | 1.302995675 | 0.07186483 | 0.02188782 | ⋯ | 0.56497282 | -0.05101193 | -0.29314109 | 0.26959618 | -0.70380578 | 0.07279563 | 0.004502305 | 0.38742801 | 0.05504769 | k14 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A data.frame: 6 × 21\n",
+ "\\begin{tabular}{r|lllllllllllllllllllll}\n",
+ " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & ⋯ & Factor12 & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20 & sample\\_id\\\\\n",
+ " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n",
+ "\\hline\n",
+ "\t1 & 0.2822028 & -1.2559541 & -0.53390026 & 1.1204467 & -0.46669223 & 1.2484388 & 0.20759032 & -0.364617945 & -0.40242258 & 0.78832135 & ⋯ & 0.05325919 & 0.27738239 & 0.13303365 & -0.76325230 & -0.04660508 & 0.95971046 & 0.431779125 & -0.46741569 & -0.45888943 & k1 \\\\\n",
+ "\t2 & 0.6928825 & -1.0380557 & 0.33933545 & 0.2294335 & 0.10520940 & 0.8011520 & 0.36775010 & 1.417721825 & -0.58040927 & 0.04100113 & ⋯ & 0.22233569 & 0.31122340 & 0.04224790 & 0.14198857 & -0.04503017 & -0.04460783 & -0.186394597 & -0.26701984 & -0.03163122 & k10\\\\\n",
+ "\t3 & -1.4468008 & -0.3123073 & 0.68857816 & -0.6941110 & 0.08007671 & 1.1839352 & -0.19331847 & 0.036830866 & 0.21506213 & 0.03104541 & ⋯ & -0.70126043 & 0.17834780 & -0.05727359 & 0.05895828 & 0.14746604 & -0.09273922 & 0.172580802 & 0.02947317 & -0.08184409 & k11\\\\\n",
+ "\t4 & -2.5657673 & -0.7162750 & 0.99399349 & -1.0877413 & -0.89743312 & 0.5699462 & -0.08501748 & -0.168697397 & 0.03162881 & -0.03488845 & ⋯ & -1.09175933 & 0.48647681 & -0.39887633 & -0.04494189 & 0.54622186 & 0.16467157 & -0.149100299 & 0.43129440 & -0.05209181 & k12\\\\\n",
+ "\t5 & -0.2657781 & -0.1293536 & 0.30498721 & -1.0304524 & 0.04534561 & 1.3639342 & 0.02831011 & -0.003340543 & 0.13941860 & 0.16450688 & ⋯ & -0.33919680 & -0.66542364 & 0.03761243 & 0.15980442 & 0.11184416 & 0.03747477 & 0.099951109 & 0.24887446 & 0.27424704 & k13\\\\\n",
+ "\t6 & 1.0376091 & -1.4776762 & -0.03257904 & -0.5101523 & 0.12084320 & 0.1672229 & 0.24861033 & 1.302995675 & 0.07186483 & 0.02188782 & ⋯ & 0.56497282 & -0.05101193 & -0.29314109 & 0.26959618 & -0.70380578 & 0.07279563 & 0.004502305 & 0.38742801 & 0.05504769 & k14\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A data.frame: 6 × 21\n",
+ "\n",
+ "| | Factor1 <dbl> | Factor2 <dbl> | Factor3 <dbl> | Factor4 <dbl> | Factor5 <dbl> | Factor6 <dbl> | Factor7 <dbl> | Factor8 <dbl> | Factor9 <dbl> | Factor10 <dbl> | ⋯ ⋯ | Factor12 <dbl> | Factor13 <dbl> | Factor14 <dbl> | Factor15 <dbl> | Factor16 <dbl> | Factor17 <dbl> | Factor18 <dbl> | Factor19 <dbl> | Factor20 <dbl> | sample_id <chr> |\n",
+ "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
+ "| 1 | 0.2822028 | -1.2559541 | -0.53390026 | 1.1204467 | -0.46669223 | 1.2484388 | 0.20759032 | -0.364617945 | -0.40242258 | 0.78832135 | ⋯ | 0.05325919 | 0.27738239 | 0.13303365 | -0.76325230 | -0.04660508 | 0.95971046 | 0.431779125 | -0.46741569 | -0.45888943 | k1 |\n",
+ "| 2 | 0.6928825 | -1.0380557 | 0.33933545 | 0.2294335 | 0.10520940 | 0.8011520 | 0.36775010 | 1.417721825 | -0.58040927 | 0.04100113 | ⋯ | 0.22233569 | 0.31122340 | 0.04224790 | 0.14198857 | -0.04503017 | -0.04460783 | -0.186394597 | -0.26701984 | -0.03163122 | k10 |\n",
+ "| 3 | -1.4468008 | -0.3123073 | 0.68857816 | -0.6941110 | 0.08007671 | 1.1839352 | -0.19331847 | 0.036830866 | 0.21506213 | 0.03104541 | ⋯ | -0.70126043 | 0.17834780 | -0.05727359 | 0.05895828 | 0.14746604 | -0.09273922 | 0.172580802 | 0.02947317 | -0.08184409 | k11 |\n",
+ "| 4 | -2.5657673 | -0.7162750 | 0.99399349 | -1.0877413 | -0.89743312 | 0.5699462 | -0.08501748 | -0.168697397 | 0.03162881 | -0.03488845 | ⋯ | -1.09175933 | 0.48647681 | -0.39887633 | -0.04494189 | 0.54622186 | 0.16467157 | -0.149100299 | 0.43129440 | -0.05209181 | k12 |\n",
+ "| 5 | -0.2657781 | -0.1293536 | 0.30498721 | -1.0304524 | 0.04534561 | 1.3639342 | 0.02831011 | -0.003340543 | 0.13941860 | 0.16450688 | ⋯ | -0.33919680 | -0.66542364 | 0.03761243 | 0.15980442 | 0.11184416 | 0.03747477 | 0.099951109 | 0.24887446 | 0.27424704 | k13 |\n",
+ "| 6 | 1.0376091 | -1.4776762 | -0.03257904 | -0.5101523 | 0.12084320 | 0.1672229 | 0.24861033 | 1.302995675 | 0.07186483 | 0.02188782 | ⋯ | 0.56497282 | -0.05101193 | -0.29314109 | 0.26959618 | -0.70380578 | 0.07279563 | 0.004502305 | 0.38742801 | 0.05504769 | k14 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 \n",
+ "1 0.2822028 -1.2559541 -0.53390026 1.1204467 -0.46669223 1.2484388\n",
+ "2 0.6928825 -1.0380557 0.33933545 0.2294335 0.10520940 0.8011520\n",
+ "3 -1.4468008 -0.3123073 0.68857816 -0.6941110 0.08007671 1.1839352\n",
+ "4 -2.5657673 -0.7162750 0.99399349 -1.0877413 -0.89743312 0.5699462\n",
+ "5 -0.2657781 -0.1293536 0.30498721 -1.0304524 0.04534561 1.3639342\n",
+ "6 1.0376091 -1.4776762 -0.03257904 -0.5101523 0.12084320 0.1672229\n",
+ " Factor7 Factor8 Factor9 Factor10 ⋯ Factor12 Factor13 \n",
+ "1 0.20759032 -0.364617945 -0.40242258 0.78832135 ⋯ 0.05325919 0.27738239\n",
+ "2 0.36775010 1.417721825 -0.58040927 0.04100113 ⋯ 0.22233569 0.31122340\n",
+ "3 -0.19331847 0.036830866 0.21506213 0.03104541 ⋯ -0.70126043 0.17834780\n",
+ "4 -0.08501748 -0.168697397 0.03162881 -0.03488845 ⋯ -1.09175933 0.48647681\n",
+ "5 0.02831011 -0.003340543 0.13941860 0.16450688 ⋯ -0.33919680 -0.66542364\n",
+ "6 0.24861033 1.302995675 0.07186483 0.02188782 ⋯ 0.56497282 -0.05101193\n",
+ " Factor14 Factor15 Factor16 Factor17 Factor18 Factor19 \n",
+ "1 0.13303365 -0.76325230 -0.04660508 0.95971046 0.431779125 -0.46741569\n",
+ "2 0.04224790 0.14198857 -0.04503017 -0.04460783 -0.186394597 -0.26701984\n",
+ "3 -0.05727359 0.05895828 0.14746604 -0.09273922 0.172580802 0.02947317\n",
+ "4 -0.39887633 -0.04494189 0.54622186 0.16467157 -0.149100299 0.43129440\n",
+ "5 0.03761243 0.15980442 0.11184416 0.03747477 0.099951109 0.24887446\n",
+ "6 -0.29314109 0.26959618 -0.70380578 0.07279563 0.004502305 0.38742801\n",
+ " Factor20 sample_id\n",
+ "1 -0.45888943 k1 \n",
+ "2 -0.03163122 k10 \n",
+ "3 -0.08184409 k11 \n",
+ "4 -0.05209181 k12 \n",
+ "5 0.27424704 k13 \n",
+ "6 0.05504769 k14 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "factor_data_mu = read.csv(paste0(data_path, \"/results/G-Analysis/Factor_Data_V_AZIMUTH_INTEGRATED_FALSE.csv\"))\n",
+ "head(factor_data_mu) # alternative: Azimuth"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e943149d-edbb-4728-ad84-5ce37af2f04d",
+ "metadata": {},
+ "source": [
+ "## Munich feature data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 518,
+ "id": "5d76cc1d-c487-476e-8f07-a751703dc1ff",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A data.frame: 6 × 22\n",
+ "\n",
+ "\t | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | ⋯ | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 | type | variable_name |
\n",
+ "\t | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\t1 | -0.020691979 | 0.429515363 | 0.027756312 | -0.02712267 | 0.010438305 | -0.41947963 | 0.01169812 | -0.11248843 | 0.28675204 | 0.0143456634 | ⋯ | 0.030966401 | 0.100109005 | 0.88420419 | -0.002553659 | -0.0002680875 | -0.009308180 | 0.003408993 | -0.002008598 | clinical_data | CK |
\n",
+ "\t2 | -0.001361949 | 0.379039595 | 0.067498675 | -0.06765357 | -0.004929694 | -0.35290690 | 0.01561328 | -0.01496201 | 0.37774439 | -0.0004131968 | ⋯ | 0.003047890 | 0.116174613 | 0.13072112 | -0.002865083 | -0.0003610348 | -0.008757576 | 0.004192790 | -0.010507527 | clinical_data | CK_MB |
\n",
+ "\t3 | -0.052856754 | 0.028641115 | -0.891960581 | 0.08775674 | -0.024844550 | -0.40121965 | -0.08041434 | -0.01238686 | -0.02412622 | -0.0140435079 | ⋯ | 0.008406062 | 0.176649898 | 0.49146629 | -0.007964040 | -0.0036451100 | 0.014286835 | -0.002485743 | 0.009543655 | clinical_data | CRP |
\n",
+ "\t4 | -0.026301185 | 0.327099511 | -0.314584680 | 0.08342660 | 0.014994718 | -0.54065063 | 0.01323074 | -0.31178849 | 0.18761051 | -0.0154090508 | ⋯ | 0.117386061 | 0.216599600 | 0.78670880 | -0.007199139 | 0.0443780006 | -0.003896878 | 0.004977378 | -0.003467445 | clinical_data | Troponin |
\n",
+ "\t5 | 0.015548781 | -0.004530304 | 0.082117144 | -0.02945236 | -0.213756108 | -0.10763285 | 0.07139866 | 0.03609601 | -0.03866970 | 0.0178796863 | ⋯ | 0.262153876 | 0.006848431 | -0.05089458 | -0.020458287 | -0.1043864240 | 0.075482525 | -0.037997785 | -0.029645511 | cytokine | BCA1__CXCL13 |
\n",
+ "\t6 | 0.015036488 | -0.190978857 | 0.001049874 | 0.01339136 | 0.028743482 | 0.01589764 | 0.07016133 | -0.02435615 | 0.01165956 | -0.1114046521 | ⋯ | -0.001984003 | -0.050574208 | -0.04242095 | -0.004868774 | -0.1907216033 | 0.364199683 | 0.085955948 | -0.028747833 | cytokine | CTACK__CCL27 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A data.frame: 6 × 22\n",
+ "\\begin{tabular}{r|lllllllllllllllllllll}\n",
+ " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & ⋯ & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20 & type & variable\\_name\\\\\n",
+ " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n",
+ "\\hline\n",
+ "\t1 & -0.020691979 & 0.429515363 & 0.027756312 & -0.02712267 & 0.010438305 & -0.41947963 & 0.01169812 & -0.11248843 & 0.28675204 & 0.0143456634 & ⋯ & 0.030966401 & 0.100109005 & 0.88420419 & -0.002553659 & -0.0002680875 & -0.009308180 & 0.003408993 & -0.002008598 & clinical\\_data & CK \\\\\n",
+ "\t2 & -0.001361949 & 0.379039595 & 0.067498675 & -0.06765357 & -0.004929694 & -0.35290690 & 0.01561328 & -0.01496201 & 0.37774439 & -0.0004131968 & ⋯ & 0.003047890 & 0.116174613 & 0.13072112 & -0.002865083 & -0.0003610348 & -0.008757576 & 0.004192790 & -0.010507527 & clinical\\_data & CK\\_MB \\\\\n",
+ "\t3 & -0.052856754 & 0.028641115 & -0.891960581 & 0.08775674 & -0.024844550 & -0.40121965 & -0.08041434 & -0.01238686 & -0.02412622 & -0.0140435079 & ⋯ & 0.008406062 & 0.176649898 & 0.49146629 & -0.007964040 & -0.0036451100 & 0.014286835 & -0.002485743 & 0.009543655 & clinical\\_data & CRP \\\\\n",
+ "\t4 & -0.026301185 & 0.327099511 & -0.314584680 & 0.08342660 & 0.014994718 & -0.54065063 & 0.01323074 & -0.31178849 & 0.18761051 & -0.0154090508 & ⋯ & 0.117386061 & 0.216599600 & 0.78670880 & -0.007199139 & 0.0443780006 & -0.003896878 & 0.004977378 & -0.003467445 & clinical\\_data & Troponin \\\\\n",
+ "\t5 & 0.015548781 & -0.004530304 & 0.082117144 & -0.02945236 & -0.213756108 & -0.10763285 & 0.07139866 & 0.03609601 & -0.03866970 & 0.0178796863 & ⋯ & 0.262153876 & 0.006848431 & -0.05089458 & -0.020458287 & -0.1043864240 & 0.075482525 & -0.037997785 & -0.029645511 & cytokine & BCA1\\_\\_CXCL13\\\\\n",
+ "\t6 & 0.015036488 & -0.190978857 & 0.001049874 & 0.01339136 & 0.028743482 & 0.01589764 & 0.07016133 & -0.02435615 & 0.01165956 & -0.1114046521 & ⋯ & -0.001984003 & -0.050574208 & -0.04242095 & -0.004868774 & -0.1907216033 & 0.364199683 & 0.085955948 & -0.028747833 & cytokine & CTACK\\_\\_CCL27\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A data.frame: 6 × 22\n",
+ "\n",
+ "| | Factor1 <dbl> | Factor2 <dbl> | Factor3 <dbl> | Factor4 <dbl> | Factor5 <dbl> | Factor6 <dbl> | Factor7 <dbl> | Factor8 <dbl> | Factor9 <dbl> | Factor10 <dbl> | ⋯ ⋯ | Factor13 <dbl> | Factor14 <dbl> | Factor15 <dbl> | Factor16 <dbl> | Factor17 <dbl> | Factor18 <dbl> | Factor19 <dbl> | Factor20 <dbl> | type <chr> | variable_name <chr> |\n",
+ "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
+ "| 1 | -0.020691979 | 0.429515363 | 0.027756312 | -0.02712267 | 0.010438305 | -0.41947963 | 0.01169812 | -0.11248843 | 0.28675204 | 0.0143456634 | ⋯ | 0.030966401 | 0.100109005 | 0.88420419 | -0.002553659 | -0.0002680875 | -0.009308180 | 0.003408993 | -0.002008598 | clinical_data | CK |\n",
+ "| 2 | -0.001361949 | 0.379039595 | 0.067498675 | -0.06765357 | -0.004929694 | -0.35290690 | 0.01561328 | -0.01496201 | 0.37774439 | -0.0004131968 | ⋯ | 0.003047890 | 0.116174613 | 0.13072112 | -0.002865083 | -0.0003610348 | -0.008757576 | 0.004192790 | -0.010507527 | clinical_data | CK_MB |\n",
+ "| 3 | -0.052856754 | 0.028641115 | -0.891960581 | 0.08775674 | -0.024844550 | -0.40121965 | -0.08041434 | -0.01238686 | -0.02412622 | -0.0140435079 | ⋯ | 0.008406062 | 0.176649898 | 0.49146629 | -0.007964040 | -0.0036451100 | 0.014286835 | -0.002485743 | 0.009543655 | clinical_data | CRP |\n",
+ "| 4 | -0.026301185 | 0.327099511 | -0.314584680 | 0.08342660 | 0.014994718 | -0.54065063 | 0.01323074 | -0.31178849 | 0.18761051 | -0.0154090508 | ⋯ | 0.117386061 | 0.216599600 | 0.78670880 | -0.007199139 | 0.0443780006 | -0.003896878 | 0.004977378 | -0.003467445 | clinical_data | Troponin |\n",
+ "| 5 | 0.015548781 | -0.004530304 | 0.082117144 | -0.02945236 | -0.213756108 | -0.10763285 | 0.07139866 | 0.03609601 | -0.03866970 | 0.0178796863 | ⋯ | 0.262153876 | 0.006848431 | -0.05089458 | -0.020458287 | -0.1043864240 | 0.075482525 | -0.037997785 | -0.029645511 | cytokine | BCA1__CXCL13 |\n",
+ "| 6 | 0.015036488 | -0.190978857 | 0.001049874 | 0.01339136 | 0.028743482 | 0.01589764 | 0.07016133 | -0.02435615 | 0.01165956 | -0.1114046521 | ⋯ | -0.001984003 | -0.050574208 | -0.04242095 | -0.004868774 | -0.1907216033 | 0.364199683 | 0.085955948 | -0.028747833 | cytokine | CTACK__CCL27 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 \n",
+ "1 -0.020691979 0.429515363 0.027756312 -0.02712267 0.010438305 -0.41947963\n",
+ "2 -0.001361949 0.379039595 0.067498675 -0.06765357 -0.004929694 -0.35290690\n",
+ "3 -0.052856754 0.028641115 -0.891960581 0.08775674 -0.024844550 -0.40121965\n",
+ "4 -0.026301185 0.327099511 -0.314584680 0.08342660 0.014994718 -0.54065063\n",
+ "5 0.015548781 -0.004530304 0.082117144 -0.02945236 -0.213756108 -0.10763285\n",
+ "6 0.015036488 -0.190978857 0.001049874 0.01339136 0.028743482 0.01589764\n",
+ " Factor7 Factor8 Factor9 Factor10 ⋯ Factor13 Factor14 \n",
+ "1 0.01169812 -0.11248843 0.28675204 0.0143456634 ⋯ 0.030966401 0.100109005\n",
+ "2 0.01561328 -0.01496201 0.37774439 -0.0004131968 ⋯ 0.003047890 0.116174613\n",
+ "3 -0.08041434 -0.01238686 -0.02412622 -0.0140435079 ⋯ 0.008406062 0.176649898\n",
+ "4 0.01323074 -0.31178849 0.18761051 -0.0154090508 ⋯ 0.117386061 0.216599600\n",
+ "5 0.07139866 0.03609601 -0.03866970 0.0178796863 ⋯ 0.262153876 0.006848431\n",
+ "6 0.07016133 -0.02435615 0.01165956 -0.1114046521 ⋯ -0.001984003 -0.050574208\n",
+ " Factor15 Factor16 Factor17 Factor18 Factor19 Factor20 \n",
+ "1 0.88420419 -0.002553659 -0.0002680875 -0.009308180 0.003408993 -0.002008598\n",
+ "2 0.13072112 -0.002865083 -0.0003610348 -0.008757576 0.004192790 -0.010507527\n",
+ "3 0.49146629 -0.007964040 -0.0036451100 0.014286835 -0.002485743 0.009543655\n",
+ "4 0.78670880 -0.007199139 0.0443780006 -0.003896878 0.004977378 -0.003467445\n",
+ "5 -0.05089458 -0.020458287 -0.1043864240 0.075482525 -0.037997785 -0.029645511\n",
+ "6 -0.04242095 -0.004868774 -0.1907216033 0.364199683 0.085955948 -0.028747833\n",
+ " type variable_name\n",
+ "1 clinical_data CK \n",
+ "2 clinical_data CK_MB \n",
+ "3 clinical_data CRP \n",
+ "4 clinical_data Troponin \n",
+ "5 cytokine BCA1__CXCL13 \n",
+ "6 cytokine CTACK__CCL27 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "feature_data_mu = read.csv(paste0(data_path, \"/results/G-Analysis/Weight_Data_V_AZIMUTH_INTEGRATED_FALSE.csv\"))\n",
+ "head(feature_data_mu) # alternative: Munich"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9b591949-a52d-4494-b5ef-a8dab5c856bc",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Pseudobulk and Normalized Input Data Groningen"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 519,
+ "id": "1f8da877-10dc-45b9-9b73-0808553504e6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A data.frame: 3 × 5\n",
+ "\n",
+ "\t | X | sample_id | variable | value | type |
\n",
+ "\t | <int> | <chr> | <chr> | <dbl> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\t1 | 1 | Baseline.TEST_1 | B.cell__ACTB | -0.1451209 | single_cell |
\n",
+ "\t2 | 2 | Baseline.TEST_12 | B.cell__ACTB | 0.2310092 | single_cell |
\n",
+ "\t3 | 3 | Baseline.TEST_14 | B.cell__ACTB | -0.2933812 | single_cell |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A data.frame: 3 × 5\n",
+ "\\begin{tabular}{r|lllll}\n",
+ " & X & sample\\_id & variable & value & type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t1 & 1 & Baseline.TEST\\_1 & B.cell\\_\\_ACTB & -0.1451209 & single\\_cell\\\\\n",
+ "\t2 & 2 & Baseline.TEST\\_12 & B.cell\\_\\_ACTB & 0.2310092 & single\\_cell\\\\\n",
+ "\t3 & 3 & Baseline.TEST\\_14 & B.cell\\_\\_ACTB & -0.2933812 & single\\_cell\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A data.frame: 3 × 5\n",
+ "\n",
+ "| | X <int> | sample_id <chr> | variable <chr> | value <dbl> | type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| 1 | 1 | Baseline.TEST_1 | B.cell__ACTB | -0.1451209 | single_cell |\n",
+ "| 2 | 2 | Baseline.TEST_12 | B.cell__ACTB | 0.2310092 | single_cell |\n",
+ "| 3 | 3 | Baseline.TEST_14 | B.cell__ACTB | -0.2933812 | single_cell |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " X sample_id variable value type \n",
+ "1 1 Baseline.TEST_1 B.cell__ACTB -0.1451209 single_cell\n",
+ "2 2 Baseline.TEST_12 B.cell__ACTB 0.2310092 single_cell\n",
+ "3 3 Baseline.TEST_14 B.cell__ACTB -0.2933812 single_cell"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1] \"2023-04-25 16:06:43 UTC\"\n"
+ ]
+ }
+ ],
+ "source": [
+ "path = paste0(output_path, '/Reproduction_GR/Combined_Data_V_AZIMUTH_REPRODUCTION_v2','.csv')\n",
+ "input_gr = read.csv(path)\n",
+ "head(input_gr, n=3)\n",
+ "print(file.info(path)$mtime)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 520,
+ "id": "a0d58dc9-25ed-4e81-ac9c-6e667faeabfb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "input_gr$feature = input_gr$variable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4514aed7-d20f-4751-9b56-bd0348eaa8be",
+ "metadata": {},
+ "source": [
+ "## Outcome classification Groningen"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0dfe35b4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "classification_gr = read.csv(paste0(data_path, \"/results/Reproduction_GR/groningen_output_class.csv\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e21f7bb2-a81a-435a-9fb9-5134fb28e839",
+ "metadata": {},
+ "source": [
+ "# Analyze"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0e13ebb1-928a-4f76-850e-7fe3062d67fe",
+ "metadata": {},
+ "source": [
+ "## Match Features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 522,
+ "id": "332502ac-dec8-4dce-9385-d5730a7e48de",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### Ensure that in both datasets feature annotations including cell-type have same format"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 523,
+ "id": "5ec775f1-b707-43bc-9e83-dab914842c14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "input_gr$cell_type = str_replace(input_gr$variable,\n",
+ " '__.*', '')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 524,
+ "id": "df6575ed-a213-483a-8b5e-68db56940787",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 'B.cell'
- 'CD4_CTL'
- 'CD4_Naive'
- 'CD4_TCM'
- 'CD4_TEM'
- 'CD8_Naive'
- 'CD8_TEM'
- 'cDC2'
- 'cMono'
- 'MAIT'
- 'ncMono'
- 'NKdim'
- 'Treg'
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 'B.cell'\n",
+ "\\item 'CD4\\_CTL'\n",
+ "\\item 'CD4\\_Naive'\n",
+ "\\item 'CD4\\_TCM'\n",
+ "\\item 'CD4\\_TEM'\n",
+ "\\item 'CD8\\_Naive'\n",
+ "\\item 'CD8\\_TEM'\n",
+ "\\item 'cDC2'\n",
+ "\\item 'cMono'\n",
+ "\\item 'MAIT'\n",
+ "\\item 'ncMono'\n",
+ "\\item 'NKdim'\n",
+ "\\item 'Treg'\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 'B.cell'\n",
+ "2. 'CD4_CTL'\n",
+ "3. 'CD4_Naive'\n",
+ "4. 'CD4_TCM'\n",
+ "5. 'CD4_TEM'\n",
+ "6. 'CD8_Naive'\n",
+ "7. 'CD8_TEM'\n",
+ "8. 'cDC2'\n",
+ "9. 'cMono'\n",
+ "10. 'MAIT'\n",
+ "11. 'ncMono'\n",
+ "12. 'NKdim'\n",
+ "13. 'Treg'\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ " [1] \"B.cell\" \"CD4_CTL\" \"CD4_Naive\" \"CD4_TCM\" \"CD4_TEM\" \"CD8_Naive\"\n",
+ " [7] \"CD8_TEM\" \"cDC2\" \"cMono\" \"MAIT\" \"ncMono\" \"NKdim\" \n",
+ "[13] \"Treg\" "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "unique(input_gr$cell_type)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 525,
+ "id": "13a36331-18d3-4146-b31d-633122f61d96",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## remove non-sc features from Munich data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 526,
+ "id": "ed1a479e-c9c4-48a4-92dc-539e2f50c788",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A data.frame: 2 × 22\n",
+ "\n",
+ "\t | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | ⋯ | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 | type | variable_name |
\n",
+ "\t | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\t1 | -0.020691979 | 0.4295154 | 0.02775631 | -0.02712267 | 0.010438305 | -0.4194796 | 0.01169812 | -0.11248843 | 0.2867520 | 0.0143456634 | ⋯ | 0.03096640 | 0.1001090 | 0.8842042 | -0.002553659 | -0.0002680875 | -0.009308180 | 0.003408993 | -0.002008598 | clinical_data | CK |
\n",
+ "\t2 | -0.001361949 | 0.3790396 | 0.06749868 | -0.06765357 | -0.004929694 | -0.3529069 | 0.01561328 | -0.01496201 | 0.3777444 | -0.0004131968 | ⋯ | 0.00304789 | 0.1161746 | 0.1307211 | -0.002865083 | -0.0003610348 | -0.008757576 | 0.004192790 | -0.010507527 | clinical_data | CK_MB |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A data.frame: 2 × 22\n",
+ "\\begin{tabular}{r|lllllllllllllllllllll}\n",
+ " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & ⋯ & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20 & type & variable\\_name\\\\\n",
+ " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n",
+ "\\hline\n",
+ "\t1 & -0.020691979 & 0.4295154 & 0.02775631 & -0.02712267 & 0.010438305 & -0.4194796 & 0.01169812 & -0.11248843 & 0.2867520 & 0.0143456634 & ⋯ & 0.03096640 & 0.1001090 & 0.8842042 & -0.002553659 & -0.0002680875 & -0.009308180 & 0.003408993 & -0.002008598 & clinical\\_data & CK \\\\\n",
+ "\t2 & -0.001361949 & 0.3790396 & 0.06749868 & -0.06765357 & -0.004929694 & -0.3529069 & 0.01561328 & -0.01496201 & 0.3777444 & -0.0004131968 & ⋯ & 0.00304789 & 0.1161746 & 0.1307211 & -0.002865083 & -0.0003610348 & -0.008757576 & 0.004192790 & -0.010507527 & clinical\\_data & CK\\_MB\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A data.frame: 2 × 22\n",
+ "\n",
+ "| | Factor1 <dbl> | Factor2 <dbl> | Factor3 <dbl> | Factor4 <dbl> | Factor5 <dbl> | Factor6 <dbl> | Factor7 <dbl> | Factor8 <dbl> | Factor9 <dbl> | Factor10 <dbl> | ⋯ ⋯ | Factor13 <dbl> | Factor14 <dbl> | Factor15 <dbl> | Factor16 <dbl> | Factor17 <dbl> | Factor18 <dbl> | Factor19 <dbl> | Factor20 <dbl> | type <chr> | variable_name <chr> |\n",
+ "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
+ "| 1 | -0.020691979 | 0.4295154 | 0.02775631 | -0.02712267 | 0.010438305 | -0.4194796 | 0.01169812 | -0.11248843 | 0.2867520 | 0.0143456634 | ⋯ | 0.03096640 | 0.1001090 | 0.8842042 | -0.002553659 | -0.0002680875 | -0.009308180 | 0.003408993 | -0.002008598 | clinical_data | CK |\n",
+ "| 2 | -0.001361949 | 0.3790396 | 0.06749868 | -0.06765357 | -0.004929694 | -0.3529069 | 0.01561328 | -0.01496201 | 0.3777444 | -0.0004131968 | ⋯ | 0.00304789 | 0.1161746 | 0.1307211 | -0.002865083 | -0.0003610348 | -0.008757576 | 0.004192790 | -0.010507527 | clinical_data | CK_MB |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 \n",
+ "1 -0.020691979 0.4295154 0.02775631 -0.02712267 0.010438305 -0.4194796\n",
+ "2 -0.001361949 0.3790396 0.06749868 -0.06765357 -0.004929694 -0.3529069\n",
+ " Factor7 Factor8 Factor9 Factor10 ⋯ Factor13 Factor14 \n",
+ "1 0.01169812 -0.11248843 0.2867520 0.0143456634 ⋯ 0.03096640 0.1001090\n",
+ "2 0.01561328 -0.01496201 0.3777444 -0.0004131968 ⋯ 0.00304789 0.1161746\n",
+ " Factor15 Factor16 Factor17 Factor18 Factor19 Factor20 \n",
+ "1 0.8842042 -0.002553659 -0.0002680875 -0.009308180 0.003408993 -0.002008598\n",
+ "2 0.1307211 -0.002865083 -0.0003610348 -0.008757576 0.004192790 -0.010507527\n",
+ " type variable_name\n",
+ "1 clinical_data CK \n",
+ "2 clinical_data CK_MB "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 'clinical_data'
- 'cytokine'
- 'neutrophil'
- 'proteomics'
- 'Bcell'
- 'CD14Mono'
- 'CD16Mono'
- 'CD4CTL'
- 'CD4Naive'
- 'CD4TCM'
- 'CD4TEM'
- 'CD8Naive'
- 'CD8TEM'
- 'cDC2'
- 'gdT'
- 'NK'
- 'Treg'
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 'clinical\\_data'\n",
+ "\\item 'cytokine'\n",
+ "\\item 'neutrophil'\n",
+ "\\item 'proteomics'\n",
+ "\\item 'Bcell'\n",
+ "\\item 'CD14Mono'\n",
+ "\\item 'CD16Mono'\n",
+ "\\item 'CD4CTL'\n",
+ "\\item 'CD4Naive'\n",
+ "\\item 'CD4TCM'\n",
+ "\\item 'CD4TEM'\n",
+ "\\item 'CD8Naive'\n",
+ "\\item 'CD8TEM'\n",
+ "\\item 'cDC2'\n",
+ "\\item 'gdT'\n",
+ "\\item 'NK'\n",
+ "\\item 'Treg'\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 'clinical_data'\n",
+ "2. 'cytokine'\n",
+ "3. 'neutrophil'\n",
+ "4. 'proteomics'\n",
+ "5. 'Bcell'\n",
+ "6. 'CD14Mono'\n",
+ "7. 'CD16Mono'\n",
+ "8. 'CD4CTL'\n",
+ "9. 'CD4Naive'\n",
+ "10. 'CD4TCM'\n",
+ "11. 'CD4TEM'\n",
+ "12. 'CD8Naive'\n",
+ "13. 'CD8TEM'\n",
+ "14. 'cDC2'\n",
+ "15. 'gdT'\n",
+ "16. 'NK'\n",
+ "17. 'Treg'\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ " [1] \"clinical_data\" \"cytokine\" \"neutrophil\" \"proteomics\" \n",
+ " [5] \"Bcell\" \"CD14Mono\" \"CD16Mono\" \"CD4CTL\" \n",
+ " [9] \"CD4Naive\" \"CD4TCM\" \"CD4TEM\" \"CD8Naive\" \n",
+ "[13] \"CD8TEM\" \"cDC2\" \"gdT\" \"NK\" \n",
+ "[17] \"Treg\" "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "head(feature_data_mu,2)\n",
+ "unique(feature_data_mu$type)\n",
+ "feature_data_mu = feature_data_mu[\n",
+ " !feature_data_mu$type %in% c('clinical_data', 'cytokine', 'neutrophil', 'proteomics'),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 527,
+ "id": "696f5865-ddc3-4f05-a9fe-d055f981300b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "7694"
+ ],
+ "text/latex": [
+ "7694"
+ ],
+ "text/markdown": [
+ "7694"
+ ],
+ "text/plain": [
+ "[1] 7694"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "length(unique(feature_data_mu$variable_name))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 528,
+ "id": "72283372-7aa9-48f3-9335-192a6d508af7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## Adjust feature names to align with Groningen Mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 529,
+ "id": "b621bd4a-a38d-4fc1-a6a9-0fd505111f39",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 'B.cell'
- 'CD14.Mono'
- 'CD16.Mono'
- 'CD4.CTL'
- 'CD4.Naive'
- 'CD4.TCM'
- 'CD4.TEM'
- 'CD8.Naive'
- 'CD8.TEM'
- 'cDC2'
- 'gdT'
- 'NK'
- 'Treg'
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 'B.cell'\n",
+ "\\item 'CD14.Mono'\n",
+ "\\item 'CD16.Mono'\n",
+ "\\item 'CD4.CTL'\n",
+ "\\item 'CD4.Naive'\n",
+ "\\item 'CD4.TCM'\n",
+ "\\item 'CD4.TEM'\n",
+ "\\item 'CD8.Naive'\n",
+ "\\item 'CD8.TEM'\n",
+ "\\item 'cDC2'\n",
+ "\\item 'gdT'\n",
+ "\\item 'NK'\n",
+ "\\item 'Treg'\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 'B.cell'\n",
+ "2. 'CD14.Mono'\n",
+ "3. 'CD16.Mono'\n",
+ "4. 'CD4.CTL'\n",
+ "5. 'CD4.Naive'\n",
+ "6. 'CD4.TCM'\n",
+ "7. 'CD4.TEM'\n",
+ "8. 'CD8.Naive'\n",
+ "9. 'CD8.TEM'\n",
+ "10. 'cDC2'\n",
+ "11. 'gdT'\n",
+ "12. 'NK'\n",
+ "13. 'Treg'\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ " [1] \"B.cell\" \"CD14.Mono\" \"CD16.Mono\" \"CD4.CTL\" \"CD4.Naive\" \"CD4.TCM\" \n",
+ " [7] \"CD4.TEM\" \"CD8.Naive\" \"CD8.TEM\" \"cDC2\" \"gdT\" \"NK\" \n",
+ "[13] \"Treg\" "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A data.frame: 2 × 24\n",
+ "\n",
+ "\t | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | ⋯ | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 | type | variable_name | cell_type | feature |
\n",
+ "\t | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\t1452 | -0.17926582 | 0.23935213 | 0.1632959 | -0.04758595 | 0.13904403 | -0.06905755 | 1.1085302 | 0.012131219 | 0.3152998 | 0.04606748 | ⋯ | -0.01115103 | 0.054059598 | -0.2188138 | -0.1313958 | -0.03007437 | 0.1566817 | Bcell | B.cell__ACTB | B.cell | B.cell__ACTB |
\n",
+ "\t1453 | -0.04518202 | -0.01996126 | 0.1115015 | -0.01237420 | 0.07304104 | -0.07186289 | 0.6682053 | 0.002994229 | 0.1844658 | -0.03832849 | ⋯ | -0.02316688 | -0.006085256 | -0.5234638 | -0.3893344 | -0.17256498 | -0.4521462 | Bcell | B.cell__ACTG1 | B.cell | B.cell__ACTG1 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A data.frame: 2 × 24\n",
+ "\\begin{tabular}{r|lllllllllllllllllllll}\n",
+ " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & ⋯ & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20 & type & variable\\_name & cell\\_type & feature\\\\\n",
+ " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n",
+ "\\hline\n",
+ "\t1452 & -0.17926582 & 0.23935213 & 0.1632959 & -0.04758595 & 0.13904403 & -0.06905755 & 1.1085302 & 0.012131219 & 0.3152998 & 0.04606748 & ⋯ & -0.01115103 & 0.054059598 & -0.2188138 & -0.1313958 & -0.03007437 & 0.1566817 & Bcell & B.cell\\_\\_ACTB & B.cell & B.cell\\_\\_ACTB \\\\\n",
+ "\t1453 & -0.04518202 & -0.01996126 & 0.1115015 & -0.01237420 & 0.07304104 & -0.07186289 & 0.6682053 & 0.002994229 & 0.1844658 & -0.03832849 & ⋯ & -0.02316688 & -0.006085256 & -0.5234638 & -0.3893344 & -0.17256498 & -0.4521462 & Bcell & B.cell\\_\\_ACTG1 & B.cell & B.cell\\_\\_ACTG1\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A data.frame: 2 × 24\n",
+ "\n",
+ "| | Factor1 <dbl> | Factor2 <dbl> | Factor3 <dbl> | Factor4 <dbl> | Factor5 <dbl> | Factor6 <dbl> | Factor7 <dbl> | Factor8 <dbl> | Factor9 <dbl> | Factor10 <dbl> | ⋯ ⋯ | Factor15 <dbl> | Factor16 <dbl> | Factor17 <dbl> | Factor18 <dbl> | Factor19 <dbl> | Factor20 <dbl> | type <chr> | variable_name <chr> | cell_type <chr> | feature <chr> |\n",
+ "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
+ "| 1452 | -0.17926582 | 0.23935213 | 0.1632959 | -0.04758595 | 0.13904403 | -0.06905755 | 1.1085302 | 0.012131219 | 0.3152998 | 0.04606748 | ⋯ | -0.01115103 | 0.054059598 | -0.2188138 | -0.1313958 | -0.03007437 | 0.1566817 | Bcell | B.cell__ACTB | B.cell | B.cell__ACTB |\n",
+ "| 1453 | -0.04518202 | -0.01996126 | 0.1115015 | -0.01237420 | 0.07304104 | -0.07186289 | 0.6682053 | 0.002994229 | 0.1844658 | -0.03832849 | ⋯ | -0.02316688 | -0.006085256 | -0.5234638 | -0.3893344 | -0.17256498 | -0.4521462 | Bcell | B.cell__ACTG1 | B.cell | B.cell__ACTG1 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 \n",
+ "1452 -0.17926582 0.23935213 0.1632959 -0.04758595 0.13904403 -0.06905755\n",
+ "1453 -0.04518202 -0.01996126 0.1115015 -0.01237420 0.07304104 -0.07186289\n",
+ " Factor7 Factor8 Factor9 Factor10 ⋯ Factor15 Factor16 \n",
+ "1452 1.1085302 0.012131219 0.3152998 0.04606748 ⋯ -0.01115103 0.054059598\n",
+ "1453 0.6682053 0.002994229 0.1844658 -0.03832849 ⋯ -0.02316688 -0.006085256\n",
+ " Factor17 Factor18 Factor19 Factor20 type variable_name cell_type\n",
+ "1452 -0.2188138 -0.1313958 -0.03007437 0.1566817 Bcell B.cell__ACTB B.cell \n",
+ "1453 -0.5234638 -0.3893344 -0.17256498 -0.4521462 Bcell B.cell__ACTG1 B.cell \n",
+ " feature \n",
+ "1452 B.cell__ACTB \n",
+ "1453 B.cell__ACTG1"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "7694"
+ ],
+ "text/latex": [
+ "7694"
+ ],
+ "text/markdown": [
+ "7694"
+ ],
+ "text/plain": [
+ "[1] 7694"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "#### Azimuth\n",
+ "feature_data_mu$cell_type = str_replace(feature_data_mu$variable_name,\n",
+ " '__.*', '')\n",
+ "\n",
+ "unique(feature_data_mu$cell_type)\n",
+ "\n",
+ "feature_data_mu$feature = feature_data_mu$variable_name\n",
+ "\n",
+ "feature_data_mu$feature = str_replace(\n",
+ " feature_data_mu$feature, \"CD14.Mono\", 'cMono')\n",
+ "\n",
+ "\n",
+ "feature_data_mu$feature = str_replace(\n",
+ " feature_data_mu$feature, \"Bcell\", 'B.cell')\n",
+ "\n",
+ "feature_data_mu$feature = str_replace(\n",
+ " feature_data_mu$feature, \"CD16.Mono\", 'ncMono')\n",
+ "\n",
+ "feature_data_mu$feature = str_replace(\n",
+ " feature_data_mu$feature, \"CD4.TCM\", 'CD4_TCM')\n",
+ "\n",
+ "feature_data_mu$feature = str_replace(\n",
+ " feature_data_mu$feature, \"cDC2\", 'cDC2')\n",
+ "\n",
+ "feature_data_mu$feature = str_replace(\n",
+ " feature_data_mu$feature, \"NK\", 'NKdim')\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "head(feature_data_mu,2)\n",
+ "length(unique(feature_data_mu$feature))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 530,
+ "id": "2f48f0a6-4b43-4016-b6f2-8e1278403689",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### Compare"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 531,
+ "id": "da55979d-3dea-4570-a5fe-b6110fb34a64",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#sort(unique(str_replace(feature_data_mu$feature, '__.*', ''))) # Munich\n",
+ "#sort(unique(input_gr$cell_type)) # Groningen"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 532,
+ "id": "494c2d51-eb8d-436c-9847-628b31128b14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### Long format of mu feature data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 533,
+ "id": "abffbf24-58f8-4232-8f3b-21fdf05be942",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using type, variable_name, cell_type, feature as id variables\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "feature_data_mu = melt(feature_data_mu)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 534,
+ "id": "ec80b0bc-a85b-4482-a5b8-bab7a3e7f58e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A data.frame: 2 × 6\n",
+ "\n",
+ "\t | type | variable_name | cell_type | feature | variable | value |
\n",
+ "\t | <chr> | <chr> | <chr> | <chr> | <fct> | <dbl> |
\n",
+ "\n",
+ "\n",
+ "\t1 | Bcell | B.cell__ACTB | B.cell | B.cell__ACTB | Factor1 | -0.17926582 |
\n",
+ "\t2 | Bcell | B.cell__ACTG1 | B.cell | B.cell__ACTG1 | Factor1 | -0.04518202 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A data.frame: 2 × 6\n",
+ "\\begin{tabular}{r|llllll}\n",
+ " & type & variable\\_name & cell\\_type & feature & variable & value\\\\\n",
+ " & & & & & & \\\\\n",
+ "\\hline\n",
+ "\t1 & Bcell & B.cell\\_\\_ACTB & B.cell & B.cell\\_\\_ACTB & Factor1 & -0.17926582\\\\\n",
+ "\t2 & Bcell & B.cell\\_\\_ACTG1 & B.cell & B.cell\\_\\_ACTG1 & Factor1 & -0.04518202\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A data.frame: 2 × 6\n",
+ "\n",
+ "| | type <chr> | variable_name <chr> | cell_type <chr> | feature <chr> | variable <fct> | value <dbl> |\n",
+ "|---|---|---|---|---|---|---|\n",
+ "| 1 | Bcell | B.cell__ACTB | B.cell | B.cell__ACTB | Factor1 | -0.17926582 |\n",
+ "| 2 | Bcell | B.cell__ACTG1 | B.cell | B.cell__ACTG1 | Factor1 | -0.04518202 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " type variable_name cell_type feature variable value \n",
+ "1 Bcell B.cell__ACTB B.cell B.cell__ACTB Factor1 -0.17926582\n",
+ "2 Bcell B.cell__ACTG1 B.cell B.cell__ACTG1 Factor1 -0.04518202"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "head(feature_data_mu,2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 535,
+ "id": "4bfcb098-18c1-453d-a0c4-51e86e323905",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Filter MU feature weights for features also in GR data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 536,
+ "id": "32765cc2-8611-4817-bdb3-68ce5b961ba4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A data.frame: 2 × 6\n",
+ "\n",
+ "\t | type | variable_name | cell_type | feature | variable | value |
\n",
+ "\t | <chr> | <chr> | <chr> | <chr> | <fct> | <dbl> |
\n",
+ "\n",
+ "\n",
+ "\t1 | Bcell | B.cell__ACTB | B.cell | B.cell__ACTB | Factor1 | -0.1792658 |
\n",
+ "\t24 | Bcell | B.cell__ARHGDIB | B.cell | B.cell__ARHGDIB | Factor1 | 0.3273940 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A data.frame: 2 × 6\n",
+ "\\begin{tabular}{r|llllll}\n",
+ " & type & variable\\_name & cell\\_type & feature & variable & value\\\\\n",
+ " & & & & & & \\\\\n",
+ "\\hline\n",
+ "\t1 & Bcell & B.cell\\_\\_ACTB & B.cell & B.cell\\_\\_ACTB & Factor1 & -0.1792658\\\\\n",
+ "\t24 & Bcell & B.cell\\_\\_ARHGDIB & B.cell & B.cell\\_\\_ARHGDIB & Factor1 & 0.3273940\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A data.frame: 2 × 6\n",
+ "\n",
+ "| | type <chr> | variable_name <chr> | cell_type <chr> | feature <chr> | variable <fct> | value <dbl> |\n",
+ "|---|---|---|---|---|---|---|\n",
+ "| 1 | Bcell | B.cell__ACTB | B.cell | B.cell__ACTB | Factor1 | -0.1792658 |\n",
+ "| 24 | Bcell | B.cell__ARHGDIB | B.cell | B.cell__ARHGDIB | Factor1 | 0.3273940 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " type variable_name cell_type feature variable value \n",
+ "1 Bcell B.cell__ACTB B.cell B.cell__ACTB Factor1 -0.1792658\n",
+ "24 Bcell B.cell__ARHGDIB B.cell B.cell__ARHGDIB Factor1 0.3273940"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "3230"
+ ],
+ "text/latex": [
+ "3230"
+ ],
+ "text/markdown": [
+ "3230"
+ ],
+ "text/plain": [
+ "[1] 3230"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "feature_data_mu = feature_data_mu[feature_data_mu$feature %in% unique(input_gr$feature),]\n",
+ "head(feature_data_mu,2)\n",
+ "length(unique(feature_data_mu$feature))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 537,
+ "id": "d6c05068-71a9-4abf-9918-52aa21030551",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 'B.cell'
- 'CD4_TCM'
- 'cDC2'
- 'cMono'
- 'ncMono'
- 'NKdim'
- 'Treg'
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 'B.cell'\n",
+ "\\item 'CD4\\_TCM'\n",
+ "\\item 'cDC2'\n",
+ "\\item 'cMono'\n",
+ "\\item 'ncMono'\n",
+ "\\item 'NKdim'\n",
+ "\\item 'Treg'\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 'B.cell'\n",
+ "2. 'CD4_TCM'\n",
+ "3. 'cDC2'\n",
+ "4. 'cMono'\n",
+ "5. 'ncMono'\n",
+ "6. 'NKdim'\n",
+ "7. 'Treg'\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] \"B.cell\" \"CD4_TCM\" \"cDC2\" \"cMono\" \"ncMono\" \"NKdim\" \"Treg\" "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sort(unique(str_replace(feature_data_mu$feature, '__.*', ''))) # check mapped cell-types"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 538,
+ "id": "e51e4854-d4ec-4589-91d2-1db3e80fa546",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "feature_data_mu = feature_data_mu[feature_data_mu$cell_type != 'Treg',] # exclude not so well mapped cell-type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 539,
+ "id": "33a617a4-c3a1-4798-bae5-79924cb40fec",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "3148"
+ ],
+ "text/latex": [
+ "3148"
+ ],
+ "text/markdown": [
+ "3148"
+ ],
+ "text/plain": [
+ "[1] 3148"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "length(unique(feature_data_mu$feature))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 540,
+ "id": "a755b487-d640-46b3-9c69-64034ffd33d6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Filter GR input features for features also in MU data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 541,
+ "id": "92e5d5d6-4daa-4525-907e-bb0a328fecc4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "6353"
+ ],
+ "text/latex": [
+ "6353"
+ ],
+ "text/markdown": [
+ "6353"
+ ],
+ "text/plain": [
+ "[1] 6353"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "#head(input_gr)\n",
+ "length(unique(input_gr$feature))\n",
+ "input_gr = input_gr[input_gr$feature %in% unique(feature_data_mu$feature), ]\n",
+ "input_vis = input_gr\n",
+ "input_gr = input_gr %>% dcast(sample_id ~ feature, value.var = 'value')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 542,
+ "id": "28410730-af80-43e5-ae10-41f92ee080b2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A data.frame: 6 × 3149\n",
+ "\n",
+ "\t | sample_id | B.cell__ACTB | B.cell__ARHGDIB | B.cell__ARPC2 | B.cell__ARPC3 | B.cell__B2M | B.cell__BIRC3 | B.cell__BTF3 | B.cell__BTG1 | B.cell__BTG2 | ⋯ | NKdim__YPEL5 | NKdim__YWHAB | NKdim__YWHAQ | NKdim__YWHAZ | NKdim__ZC3HAV1 | NKdim__ZEB2 | NKdim__ZFAS1 | NKdim__ZFP36 | NKdim__ZFP36L1 | NKdim__ZFP36L2 |
\n",
+ "\t | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> |
\n",
+ "\n",
+ "\n",
+ "\t1 | Baseline.TEST_1 | -0.1451209 | -0.2808230 | 0.2933812 | -1.3037827 | 0.35691713 | -0.29338123 | -0.5720211 | 1.6637928 | -1.3037827 | ⋯ | 1.6637928 | 0.70506589 | 0.4752408 | 0.9234567 | 0.6744898 | -0.02410453 | -1.0405662 | -0.94207577 | -0.80109453 | -0.7050659 |
\n",
+ "\t2 | Baseline.TEST_12 | 0.2310092 | -0.0602920 | 0.3697907 | -0.3697907 | -0.29338123 | -0.08445798 | 0.4752408 | 0.1451209 | 0.1573107 | ⋯ | -0.6744898 | -0.09655862 | 0.8178173 | -0.3827258 | -0.7363159 | -0.44842548 | -1.8980287 | -0.07236971 | 1.36238339 | 0.8178173 |
\n",
+ "\t3 | Baseline.TEST_14 | -0.2933812 | 0.1329527 | 1.6637928 | -0.6151411 | 0.96102726 | 1.53412054 | 1.3935235 | 1.0405662 | -1.0829150 | ⋯ | 1.7688250 | 0.34410246 | 0.7363159 | -0.1817624 | -1.5341205 | -0.61514110 | -0.1451209 | -1.06150263 | -1.76882504 | 0.5299395 |
\n",
+ "\t4 | Baseline.TEST_15 | -1.0615026 | -0.4087920 | -0.7206029 | -0.2683089 | -0.08445798 | -0.38272581 | -1.2493462 | -1.0405662 | 1.1740260 | ⋯ | 0.6744898 | -0.58627265 | 0.6445316 | 0.6297684 | -0.3313441 | 0.60064430 | 0.2683089 | -1.15034938 | -0.04822307 | -0.8694238 |
\n",
+ "\t5 | Baseline.TEST_17 | -1.1503494 | -1.4602158 | -0.5299395 | -0.9420758 | 0.78459288 | 0.52993955 | 1.5744450 | -0.4219289 | 1.0829150 | ⋯ | 0.7363159 | -1.19837970 | -0.1817624 | -1.1503494 | 0.1086734 | -0.39572530 | -1.3037827 | 0.20632319 | -1.66379279 | 1.4602158 |
\n",
+ "\t6 | Baseline.TEST_18 | 1.1273007 | 0.6297684 | 2.3410271 | 0.2063232 | 1.61750540 | 0.68969722 | -1.5341205 | 0.4617916 | -1.5744450 | ⋯ | -0.3827258 | -0.73631592 | -0.3186394 | 1.3037827 | 0.9234567 | 1.49614688 | -1.2493462 | -0.73631592 | -1.30378267 | 1.4602158 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A data.frame: 6 × 3149\n",
+ "\\begin{tabular}{r|lllllllllllllllllllll}\n",
+ " & sample\\_id & B.cell\\_\\_ACTB & B.cell\\_\\_ARHGDIB & B.cell\\_\\_ARPC2 & B.cell\\_\\_ARPC3 & B.cell\\_\\_B2M & B.cell\\_\\_BIRC3 & B.cell\\_\\_BTF3 & B.cell\\_\\_BTG1 & B.cell\\_\\_BTG2 & ⋯ & NKdim\\_\\_YPEL5 & NKdim\\_\\_YWHAB & NKdim\\_\\_YWHAQ & NKdim\\_\\_YWHAZ & NKdim\\_\\_ZC3HAV1 & NKdim\\_\\_ZEB2 & NKdim\\_\\_ZFAS1 & NKdim\\_\\_ZFP36 & NKdim\\_\\_ZFP36L1 & NKdim\\_\\_ZFP36L2\\\\\n",
+ " & & & & & & & & & & & ⋯ & & & & & & & & & & \\\\\n",
+ "\\hline\n",
+ "\t1 & Baseline.TEST\\_1 & -0.1451209 & -0.2808230 & 0.2933812 & -1.3037827 & 0.35691713 & -0.29338123 & -0.5720211 & 1.6637928 & -1.3037827 & ⋯ & 1.6637928 & 0.70506589 & 0.4752408 & 0.9234567 & 0.6744898 & -0.02410453 & -1.0405662 & -0.94207577 & -0.80109453 & -0.7050659\\\\\n",
+ "\t2 & Baseline.TEST\\_12 & 0.2310092 & -0.0602920 & 0.3697907 & -0.3697907 & -0.29338123 & -0.08445798 & 0.4752408 & 0.1451209 & 0.1573107 & ⋯ & -0.6744898 & -0.09655862 & 0.8178173 & -0.3827258 & -0.7363159 & -0.44842548 & -1.8980287 & -0.07236971 & 1.36238339 & 0.8178173\\\\\n",
+ "\t3 & Baseline.TEST\\_14 & -0.2933812 & 0.1329527 & 1.6637928 & -0.6151411 & 0.96102726 & 1.53412054 & 1.3935235 & 1.0405662 & -1.0829150 & ⋯ & 1.7688250 & 0.34410246 & 0.7363159 & -0.1817624 & -1.5341205 & -0.61514110 & -0.1451209 & -1.06150263 & -1.76882504 & 0.5299395\\\\\n",
+ "\t4 & Baseline.TEST\\_15 & -1.0615026 & -0.4087920 & -0.7206029 & -0.2683089 & -0.08445798 & -0.38272581 & -1.2493462 & -1.0405662 & 1.1740260 & ⋯ & 0.6744898 & -0.58627265 & 0.6445316 & 0.6297684 & -0.3313441 & 0.60064430 & 0.2683089 & -1.15034938 & -0.04822307 & -0.8694238\\\\\n",
+ "\t5 & Baseline.TEST\\_17 & -1.1503494 & -1.4602158 & -0.5299395 & -0.9420758 & 0.78459288 & 0.52993955 & 1.5744450 & -0.4219289 & 1.0829150 & ⋯ & 0.7363159 & -1.19837970 & -0.1817624 & -1.1503494 & 0.1086734 & -0.39572530 & -1.3037827 & 0.20632319 & -1.66379279 & 1.4602158\\\\\n",
+ "\t6 & Baseline.TEST\\_18 & 1.1273007 & 0.6297684 & 2.3410271 & 0.2063232 & 1.61750540 & 0.68969722 & -1.5341205 & 0.4617916 & -1.5744450 & ⋯ & -0.3827258 & -0.73631592 & -0.3186394 & 1.3037827 & 0.9234567 & 1.49614688 & -1.2493462 & -0.73631592 & -1.30378267 & 1.4602158\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A data.frame: 6 × 3149\n",
+ "\n",
+ "| | sample_id <chr> | B.cell__ACTB <dbl> | B.cell__ARHGDIB <dbl> | B.cell__ARPC2 <dbl> | B.cell__ARPC3 <dbl> | B.cell__B2M <dbl> | B.cell__BIRC3 <dbl> | B.cell__BTF3 <dbl> | B.cell__BTG1 <dbl> | B.cell__BTG2 <dbl> | ⋯ ⋯ | NKdim__YPEL5 <dbl> | NKdim__YWHAB <dbl> | NKdim__YWHAQ <dbl> | NKdim__YWHAZ <dbl> | NKdim__ZC3HAV1 <dbl> | NKdim__ZEB2 <dbl> | NKdim__ZFAS1 <dbl> | NKdim__ZFP36 <dbl> | NKdim__ZFP36L1 <dbl> | NKdim__ZFP36L2 <dbl> |\n",
+ "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
+ "| 1 | Baseline.TEST_1 | -0.1451209 | -0.2808230 | 0.2933812 | -1.3037827 | 0.35691713 | -0.29338123 | -0.5720211 | 1.6637928 | -1.3037827 | ⋯ | 1.6637928 | 0.70506589 | 0.4752408 | 0.9234567 | 0.6744898 | -0.02410453 | -1.0405662 | -0.94207577 | -0.80109453 | -0.7050659 |\n",
+ "| 2 | Baseline.TEST_12 | 0.2310092 | -0.0602920 | 0.3697907 | -0.3697907 | -0.29338123 | -0.08445798 | 0.4752408 | 0.1451209 | 0.1573107 | ⋯ | -0.6744898 | -0.09655862 | 0.8178173 | -0.3827258 | -0.7363159 | -0.44842548 | -1.8980287 | -0.07236971 | 1.36238339 | 0.8178173 |\n",
+ "| 3 | Baseline.TEST_14 | -0.2933812 | 0.1329527 | 1.6637928 | -0.6151411 | 0.96102726 | 1.53412054 | 1.3935235 | 1.0405662 | -1.0829150 | ⋯ | 1.7688250 | 0.34410246 | 0.7363159 | -0.1817624 | -1.5341205 | -0.61514110 | -0.1451209 | -1.06150263 | -1.76882504 | 0.5299395 |\n",
+ "| 4 | Baseline.TEST_15 | -1.0615026 | -0.4087920 | -0.7206029 | -0.2683089 | -0.08445798 | -0.38272581 | -1.2493462 | -1.0405662 | 1.1740260 | ⋯ | 0.6744898 | -0.58627265 | 0.6445316 | 0.6297684 | -0.3313441 | 0.60064430 | 0.2683089 | -1.15034938 | -0.04822307 | -0.8694238 |\n",
+ "| 5 | Baseline.TEST_17 | -1.1503494 | -1.4602158 | -0.5299395 | -0.9420758 | 0.78459288 | 0.52993955 | 1.5744450 | -0.4219289 | 1.0829150 | ⋯ | 0.7363159 | -1.19837970 | -0.1817624 | -1.1503494 | 0.1086734 | -0.39572530 | -1.3037827 | 0.20632319 | -1.66379279 | 1.4602158 |\n",
+ "| 6 | Baseline.TEST_18 | 1.1273007 | 0.6297684 | 2.3410271 | 0.2063232 | 1.61750540 | 0.68969722 | -1.5341205 | 0.4617916 | -1.5744450 | ⋯ | -0.3827258 | -0.73631592 | -0.3186394 | 1.3037827 | 0.9234567 | 1.49614688 | -1.2493462 | -0.73631592 | -1.30378267 | 1.4602158 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_id B.cell__ACTB B.cell__ARHGDIB B.cell__ARPC2 B.cell__ARPC3\n",
+ "1 Baseline.TEST_1 -0.1451209 -0.2808230 0.2933812 -1.3037827 \n",
+ "2 Baseline.TEST_12 0.2310092 -0.0602920 0.3697907 -0.3697907 \n",
+ "3 Baseline.TEST_14 -0.2933812 0.1329527 1.6637928 -0.6151411 \n",
+ "4 Baseline.TEST_15 -1.0615026 -0.4087920 -0.7206029 -0.2683089 \n",
+ "5 Baseline.TEST_17 -1.1503494 -1.4602158 -0.5299395 -0.9420758 \n",
+ "6 Baseline.TEST_18 1.1273007 0.6297684 2.3410271 0.2063232 \n",
+ " B.cell__B2M B.cell__BIRC3 B.cell__BTF3 B.cell__BTG1 B.cell__BTG2 ⋯\n",
+ "1 0.35691713 -0.29338123 -0.5720211 1.6637928 -1.3037827 ⋯\n",
+ "2 -0.29338123 -0.08445798 0.4752408 0.1451209 0.1573107 ⋯\n",
+ "3 0.96102726 1.53412054 1.3935235 1.0405662 -1.0829150 ⋯\n",
+ "4 -0.08445798 -0.38272581 -1.2493462 -1.0405662 1.1740260 ⋯\n",
+ "5 0.78459288 0.52993955 1.5744450 -0.4219289 1.0829150 ⋯\n",
+ "6 1.61750540 0.68969722 -1.5341205 0.4617916 -1.5744450 ⋯\n",
+ " NKdim__YPEL5 NKdim__YWHAB NKdim__YWHAQ NKdim__YWHAZ NKdim__ZC3HAV1\n",
+ "1 1.6637928 0.70506589 0.4752408 0.9234567 0.6744898 \n",
+ "2 -0.6744898 -0.09655862 0.8178173 -0.3827258 -0.7363159 \n",
+ "3 1.7688250 0.34410246 0.7363159 -0.1817624 -1.5341205 \n",
+ "4 0.6744898 -0.58627265 0.6445316 0.6297684 -0.3313441 \n",
+ "5 0.7363159 -1.19837970 -0.1817624 -1.1503494 0.1086734 \n",
+ "6 -0.3827258 -0.73631592 -0.3186394 1.3037827 0.9234567 \n",
+ " NKdim__ZEB2 NKdim__ZFAS1 NKdim__ZFP36 NKdim__ZFP36L1 NKdim__ZFP36L2\n",
+ "1 -0.02410453 -1.0405662 -0.94207577 -0.80109453 -0.7050659 \n",
+ "2 -0.44842548 -1.8980287 -0.07236971 1.36238339 0.8178173 \n",
+ "3 -0.61514110 -0.1451209 -1.06150263 -1.76882504 0.5299395 \n",
+ "4 0.60064430 0.2683089 -1.15034938 -0.04822307 -0.8694238 \n",
+ "5 -0.39572530 -1.3037827 0.20632319 -1.66379279 1.4602158 \n",
+ "6 1.49614688 -1.2493462 -0.73631592 -1.30378267 1.4602158 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 103
- 3149
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 103\n",
+ "\\item 3149\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 103\n",
+ "2. 3149\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 103 3149"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "head(input_gr)\n",
+ "dim(input_gr)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d217dad6-5687-4d43-bb37-593c9d62fded",
+ "metadata": {},
+ "source": [
+ "## Prepare matrices"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 543,
+ "id": "a8702239-2b63-45ac-8962-d5b853544d13",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 103
- 3148
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 103\n",
+ "\\item 3148\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 103\n",
+ "2. 3148\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 103 3148"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "### Get matrices for matrix facorization approach\n",
+ "\n",
+ "## Groningen Matrix\n",
+ "#head(input_gr)\n",
+ "gr_ma = input_gr\n",
+ "rownames(gr_ma) = input_gr$sample_id\n",
+ "gr_ma$sample_id = NULL\n",
+ "gr_ma = as.matrix(gr_ma)\n",
+ "dim(gr_ma)\n",
+ "\n",
+ "## Munich Matrix\n",
+ "#head(feature_data_mu)\n",
+ "mu_ma = feature_data_mu %>% dcast(feature ~variable, value.var = 'value')\n",
+ "rownames(mu_ma) = mu_ma$feature\n",
+ "mu_ma$feature = NULL\n",
+ "mu_ma = as.matrix(mu_ma)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 544,
+ "id": "ff3bbc8b-c6ae-4f28-8607-594741a1eec4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 103
- 3148
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 103\n",
+ "\\item 3148\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 103\n",
+ "2. 3148\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 103 3148"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 3148
- 20
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 3148\n",
+ "\\item 20\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 3148\n",
+ "2. 20\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 3148 20"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "dim(gr_ma)\n",
+ "dim(mu_ma)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 545,
+ "id": "704706c0-e484-41c6-99dc-5125e0130c35",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### Reorder to have same ordering of features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 546,
+ "id": "02cdfc6e-ee58-4add-9212-eb6ef66731cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gr_ma = gr_ma[, order(colnames(gr_ma))]\n",
+ "mu_ma = mu_ma[order(rownames(mu_ma)),]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "07330aac-0598-4c8c-b92d-1db68a28f8ed",
+ "metadata": {},
+ "source": [
+ "## Calculate right inverse of MU feature matrix and apply"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 547,
+ "id": "94497574-ce99-4a04-8b2b-4a81d60fb860",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 3148
- 20
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 3148\n",
+ "\\item 20\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 3148\n",
+ "2. 20\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 3148 20"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 20
- 3148
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 20\n",
+ "\\item 3148\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 20\n",
+ "2. 3148\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 20 3148"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A matrix: 6 × 20 of type dbl\n",
+ "\n",
+ "\t | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | Factor11 | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 |
\n",
+ "\n",
+ "\n",
+ "\tB.cell__ACTB | -0.002412865 | 1.100829e-03 | 0.0018519132 | -9.387865e-05 | 0.0197072832 | -1.171368e-03 | 0.004082177 | 3.549377e-04 | 0.0012770596 | -0.0004148242 | 0.017505496 | 8.850074e-04 | -5.435629e-04 | 9.115598e-04 | -1.148509e-05 | 2.181989e-04 | -1.067792e-03 | -0.0003216798 | -0.0005795758 | 0.0007411074 |
\n",
+ "\tB.cell__ARHGDIB | 0.002896903 | -3.862585e-04 | 0.0011406868 | 1.345452e-03 | 0.0149805072 | 1.937177e-04 | 0.001627769 | -2.946817e-04 | 0.0001059237 | 0.0006040866 | -0.014150740 | 2.117682e-04 | -2.951694e-04 | -5.412122e-04 | -1.305719e-04 | -5.945591e-05 | -2.291039e-03 | 0.0001882649 | -0.0002861554 | 0.0002756893 |
\n",
+ "\tB.cell__ARPC2 | -0.001114492 | -6.099780e-05 | 0.0003215208 | 4.180816e-04 | 0.0209911475 | -3.725421e-04 | 0.003521452 | 2.003061e-04 | 0.0013498674 | -0.0020104846 | 0.018957908 | -6.100954e-04 | -6.614388e-05 | 2.495120e-04 | -2.905480e-04 | 1.102433e-03 | -2.069296e-03 | -0.0005396076 | 0.0004881568 | -0.0004772543 |
\n",
+ "\tB.cell__ARPC3 | 0.001612616 | -1.623649e-05 | 0.0008018460 | 3.144724e-04 | 0.0432317206 | -2.770284e-03 | 0.002947859 | -2.522687e-04 | -0.0009838727 | -0.0028467784 | -0.022270218 | 2.108312e-05 | 7.876929e-04 | 2.812663e-03 | -3.912311e-04 | 2.077665e-03 | 7.316946e-05 | 0.0002948308 | 0.0005419943 | -0.0002239274 |
\n",
+ "\tB.cell__B2M | -0.001969751 | 9.628387e-04 | 0.0012608342 | 5.635957e-05 | -0.0039711670 | -4.673417e-04 | 0.003398598 | 5.528947e-05 | 0.0030406779 | -0.0009352855 | 0.006424656 | 1.930557e-03 | -1.285816e-04 | 6.238179e-05 | 1.034847e-04 | 9.638252e-05 | -2.213990e-03 | -0.0004096560 | 0.0010927847 | 0.0014692351 |
\n",
+ "\tB.cell__BIRC3 | 0.001618585 | -5.811380e-05 | 0.0007165601 | -1.982736e-03 | 0.0008224573 | -9.228915e-05 | -0.002526311 | -1.446628e-04 | -0.0032944132 | 0.0005858396 | -0.042494057 | 1.831863e-03 | 3.520557e-04 | -9.515494e-04 | 1.258441e-03 | -4.166334e-04 | -4.329759e-03 | 0.0009212626 | 0.0013008816 | 0.0012679802 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A matrix: 6 × 20 of type dbl\n",
+ "\\begin{tabular}{r|llllllllllllllllllll}\n",
+ " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & Factor11 & Factor12 & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20\\\\\n",
+ "\\hline\n",
+ "\tB.cell\\_\\_ACTB & -0.002412865 & 1.100829e-03 & 0.0018519132 & -9.387865e-05 & 0.0197072832 & -1.171368e-03 & 0.004082177 & 3.549377e-04 & 0.0012770596 & -0.0004148242 & 0.017505496 & 8.850074e-04 & -5.435629e-04 & 9.115598e-04 & -1.148509e-05 & 2.181989e-04 & -1.067792e-03 & -0.0003216798 & -0.0005795758 & 0.0007411074\\\\\n",
+ "\tB.cell\\_\\_ARHGDIB & 0.002896903 & -3.862585e-04 & 0.0011406868 & 1.345452e-03 & 0.0149805072 & 1.937177e-04 & 0.001627769 & -2.946817e-04 & 0.0001059237 & 0.0006040866 & -0.014150740 & 2.117682e-04 & -2.951694e-04 & -5.412122e-04 & -1.305719e-04 & -5.945591e-05 & -2.291039e-03 & 0.0001882649 & -0.0002861554 & 0.0002756893\\\\\n",
+ "\tB.cell\\_\\_ARPC2 & -0.001114492 & -6.099780e-05 & 0.0003215208 & 4.180816e-04 & 0.0209911475 & -3.725421e-04 & 0.003521452 & 2.003061e-04 & 0.0013498674 & -0.0020104846 & 0.018957908 & -6.100954e-04 & -6.614388e-05 & 2.495120e-04 & -2.905480e-04 & 1.102433e-03 & -2.069296e-03 & -0.0005396076 & 0.0004881568 & -0.0004772543\\\\\n",
+ "\tB.cell\\_\\_ARPC3 & 0.001612616 & -1.623649e-05 & 0.0008018460 & 3.144724e-04 & 0.0432317206 & -2.770284e-03 & 0.002947859 & -2.522687e-04 & -0.0009838727 & -0.0028467784 & -0.022270218 & 2.108312e-05 & 7.876929e-04 & 2.812663e-03 & -3.912311e-04 & 2.077665e-03 & 7.316946e-05 & 0.0002948308 & 0.0005419943 & -0.0002239274\\\\\n",
+ "\tB.cell\\_\\_B2M & -0.001969751 & 9.628387e-04 & 0.0012608342 & 5.635957e-05 & -0.0039711670 & -4.673417e-04 & 0.003398598 & 5.528947e-05 & 0.0030406779 & -0.0009352855 & 0.006424656 & 1.930557e-03 & -1.285816e-04 & 6.238179e-05 & 1.034847e-04 & 9.638252e-05 & -2.213990e-03 & -0.0004096560 & 0.0010927847 & 0.0014692351\\\\\n",
+ "\tB.cell\\_\\_BIRC3 & 0.001618585 & -5.811380e-05 & 0.0007165601 & -1.982736e-03 & 0.0008224573 & -9.228915e-05 & -0.002526311 & -1.446628e-04 & -0.0032944132 & 0.0005858396 & -0.042494057 & 1.831863e-03 & 3.520557e-04 & -9.515494e-04 & 1.258441e-03 & -4.166334e-04 & -4.329759e-03 & 0.0009212626 & 0.0013008816 & 0.0012679802\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A matrix: 6 × 20 of type dbl\n",
+ "\n",
+ "| | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | Factor11 | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 |\n",
+ "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
+ "| B.cell__ACTB | -0.002412865 | 1.100829e-03 | 0.0018519132 | -9.387865e-05 | 0.0197072832 | -1.171368e-03 | 0.004082177 | 3.549377e-04 | 0.0012770596 | -0.0004148242 | 0.017505496 | 8.850074e-04 | -5.435629e-04 | 9.115598e-04 | -1.148509e-05 | 2.181989e-04 | -1.067792e-03 | -0.0003216798 | -0.0005795758 | 0.0007411074 |\n",
+ "| B.cell__ARHGDIB | 0.002896903 | -3.862585e-04 | 0.0011406868 | 1.345452e-03 | 0.0149805072 | 1.937177e-04 | 0.001627769 | -2.946817e-04 | 0.0001059237 | 0.0006040866 | -0.014150740 | 2.117682e-04 | -2.951694e-04 | -5.412122e-04 | -1.305719e-04 | -5.945591e-05 | -2.291039e-03 | 0.0001882649 | -0.0002861554 | 0.0002756893 |\n",
+ "| B.cell__ARPC2 | -0.001114492 | -6.099780e-05 | 0.0003215208 | 4.180816e-04 | 0.0209911475 | -3.725421e-04 | 0.003521452 | 2.003061e-04 | 0.0013498674 | -0.0020104846 | 0.018957908 | -6.100954e-04 | -6.614388e-05 | 2.495120e-04 | -2.905480e-04 | 1.102433e-03 | -2.069296e-03 | -0.0005396076 | 0.0004881568 | -0.0004772543 |\n",
+ "| B.cell__ARPC3 | 0.001612616 | -1.623649e-05 | 0.0008018460 | 3.144724e-04 | 0.0432317206 | -2.770284e-03 | 0.002947859 | -2.522687e-04 | -0.0009838727 | -0.0028467784 | -0.022270218 | 2.108312e-05 | 7.876929e-04 | 2.812663e-03 | -3.912311e-04 | 2.077665e-03 | 7.316946e-05 | 0.0002948308 | 0.0005419943 | -0.0002239274 |\n",
+ "| B.cell__B2M | -0.001969751 | 9.628387e-04 | 0.0012608342 | 5.635957e-05 | -0.0039711670 | -4.673417e-04 | 0.003398598 | 5.528947e-05 | 0.0030406779 | -0.0009352855 | 0.006424656 | 1.930557e-03 | -1.285816e-04 | 6.238179e-05 | 1.034847e-04 | 9.638252e-05 | -2.213990e-03 | -0.0004096560 | 0.0010927847 | 0.0014692351 |\n",
+ "| B.cell__BIRC3 | 0.001618585 | -5.811380e-05 | 0.0007165601 | -1.982736e-03 | 0.0008224573 | -9.228915e-05 | -0.002526311 | -1.446628e-04 | -0.0032944132 | 0.0005858396 | -0.042494057 | 1.831863e-03 | 3.520557e-04 | -9.515494e-04 | 1.258441e-03 | -4.166334e-04 | -4.329759e-03 | 0.0009212626 | 0.0013008816 | 0.0012679802 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Factor1 Factor2 Factor3 Factor4 \n",
+ "B.cell__ACTB -0.002412865 1.100829e-03 0.0018519132 -9.387865e-05\n",
+ "B.cell__ARHGDIB 0.002896903 -3.862585e-04 0.0011406868 1.345452e-03\n",
+ "B.cell__ARPC2 -0.001114492 -6.099780e-05 0.0003215208 4.180816e-04\n",
+ "B.cell__ARPC3 0.001612616 -1.623649e-05 0.0008018460 3.144724e-04\n",
+ "B.cell__B2M -0.001969751 9.628387e-04 0.0012608342 5.635957e-05\n",
+ "B.cell__BIRC3 0.001618585 -5.811380e-05 0.0007165601 -1.982736e-03\n",
+ " Factor5 Factor6 Factor7 Factor8 \n",
+ "B.cell__ACTB 0.0197072832 -1.171368e-03 0.004082177 3.549377e-04\n",
+ "B.cell__ARHGDIB 0.0149805072 1.937177e-04 0.001627769 -2.946817e-04\n",
+ "B.cell__ARPC2 0.0209911475 -3.725421e-04 0.003521452 2.003061e-04\n",
+ "B.cell__ARPC3 0.0432317206 -2.770284e-03 0.002947859 -2.522687e-04\n",
+ "B.cell__B2M -0.0039711670 -4.673417e-04 0.003398598 5.528947e-05\n",
+ "B.cell__BIRC3 0.0008224573 -9.228915e-05 -0.002526311 -1.446628e-04\n",
+ " Factor9 Factor10 Factor11 Factor12 \n",
+ "B.cell__ACTB 0.0012770596 -0.0004148242 0.017505496 8.850074e-04\n",
+ "B.cell__ARHGDIB 0.0001059237 0.0006040866 -0.014150740 2.117682e-04\n",
+ "B.cell__ARPC2 0.0013498674 -0.0020104846 0.018957908 -6.100954e-04\n",
+ "B.cell__ARPC3 -0.0009838727 -0.0028467784 -0.022270218 2.108312e-05\n",
+ "B.cell__B2M 0.0030406779 -0.0009352855 0.006424656 1.930557e-03\n",
+ "B.cell__BIRC3 -0.0032944132 0.0005858396 -0.042494057 1.831863e-03\n",
+ " Factor13 Factor14 Factor15 Factor16 \n",
+ "B.cell__ACTB -5.435629e-04 9.115598e-04 -1.148509e-05 2.181989e-04\n",
+ "B.cell__ARHGDIB -2.951694e-04 -5.412122e-04 -1.305719e-04 -5.945591e-05\n",
+ "B.cell__ARPC2 -6.614388e-05 2.495120e-04 -2.905480e-04 1.102433e-03\n",
+ "B.cell__ARPC3 7.876929e-04 2.812663e-03 -3.912311e-04 2.077665e-03\n",
+ "B.cell__B2M -1.285816e-04 6.238179e-05 1.034847e-04 9.638252e-05\n",
+ "B.cell__BIRC3 3.520557e-04 -9.515494e-04 1.258441e-03 -4.166334e-04\n",
+ " Factor17 Factor18 Factor19 Factor20 \n",
+ "B.cell__ACTB -1.067792e-03 -0.0003216798 -0.0005795758 0.0007411074\n",
+ "B.cell__ARHGDIB -2.291039e-03 0.0001882649 -0.0002861554 0.0002756893\n",
+ "B.cell__ARPC2 -2.069296e-03 -0.0005396076 0.0004881568 -0.0004772543\n",
+ "B.cell__ARPC3 7.316946e-05 0.0002948308 0.0005419943 -0.0002239274\n",
+ "B.cell__B2M -2.213990e-03 -0.0004096560 0.0010927847 0.0014692351\n",
+ "B.cell__BIRC3 -4.329759e-03 0.0009212626 0.0013008816 0.0012679802"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A matrix: 6 × 20 of type dbl\n",
+ "\n",
+ "\t | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | Factor11 | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 |
\n",
+ "\n",
+ "\n",
+ "\tNKdim__ZC3HAV1 | -4.191599e-03 | 4.440209e-04 | 0.0002153844 | 0.0004540283 | 0.0002949058 | -0.0007085776 | 0.0004619467 | 0.0009136763 | 0.0017003505 | 0.0002068845 | 0.029082471 | -0.0013899221 | 0.0005879015 | -0.0017232555 | 0.0002550067 | 0.0020538919 | -0.0001903588 | -0.001723843 | -0.0001421164 | -1.611028e-04 |
\n",
+ "\tNKdim__ZEB2 | -4.169701e-06 | 2.449899e-03 | 0.0024518547 | -0.0006276671 | -0.0006177782 | 0.0003662261 | 0.0002654389 | -0.0001024299 | -0.0011893979 | 0.0029621473 | 0.008816921 | 0.0004274647 | 0.0017044722 | -0.0002884657 | -0.0002698381 | -0.0009618766 | -0.0013183011 | 0.001507303 | 0.0001103782 | 2.288160e-04 |
\n",
+ "\tNKdim__ZFAS1 | -1.824321e-03 | -2.196386e-05 | -0.0001309291 | 0.0002609197 | 0.0031960898 | -0.0009808865 | 0.0003104403 | -0.0003755947 | -0.0001760204 | -0.0030818508 | -0.113659437 | -0.0030505201 | -0.0005130803 | -0.0014203910 | 0.0004811148 | 0.0003157216 | -0.0009303368 | 0.001418756 | 0.0007723607 | 2.154406e-04 |
\n",
+ "\tNKdim__ZFP36 | -1.884174e-03 | -1.013808e-04 | -0.0009070395 | 0.0004174070 | 0.0002005844 | 0.0019932858 | 0.0003703909 | 0.0008108770 | -0.0016578828 | -0.0039247834 | 0.008802220 | 0.0007937521 | 0.0021959706 | -0.0009571731 | -0.0004312455 | 0.0007124782 | 0.0005021595 | -0.004344592 | 0.0007678170 | -2.155689e-04 |
\n",
+ "\tNKdim__ZFP36L1 | -2.860396e-03 | 1.044763e-04 | 0.0009260348 | -0.0028899986 | 0.0022447461 | 0.0009851776 | 0.0015616902 | 0.0010531281 | -0.0021344237 | -0.0053043853 | -0.004016054 | 0.0004029870 | 0.0009022350 | -0.0020380839 | 0.0004893038 | 0.0004537000 | 0.0002285394 | -0.000127352 | -0.0014641171 | 1.223379e-05 |
\n",
+ "\tNKdim__ZFP36L2 | -1.056855e-03 | -6.163430e-04 | 0.0018555297 | 0.0006076941 | 0.0017680046 | 0.0007874596 | 0.0007943166 | 0.0003794940 | 0.0004912557 | -0.0051923213 | 0.080864937 | -0.0008642006 | 0.0014579452 | -0.0028402238 | 0.0004304173 | 0.0003052706 | -0.0003380432 | -0.002692252 | -0.0017124035 | -6.965453e-05 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A matrix: 6 × 20 of type dbl\n",
+ "\\begin{tabular}{r|llllllllllllllllllll}\n",
+ " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & Factor11 & Factor12 & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20\\\\\n",
+ "\\hline\n",
+ "\tNKdim\\_\\_ZC3HAV1 & -4.191599e-03 & 4.440209e-04 & 0.0002153844 & 0.0004540283 & 0.0002949058 & -0.0007085776 & 0.0004619467 & 0.0009136763 & 0.0017003505 & 0.0002068845 & 0.029082471 & -0.0013899221 & 0.0005879015 & -0.0017232555 & 0.0002550067 & 0.0020538919 & -0.0001903588 & -0.001723843 & -0.0001421164 & -1.611028e-04\\\\\n",
+ "\tNKdim\\_\\_ZEB2 & -4.169701e-06 & 2.449899e-03 & 0.0024518547 & -0.0006276671 & -0.0006177782 & 0.0003662261 & 0.0002654389 & -0.0001024299 & -0.0011893979 & 0.0029621473 & 0.008816921 & 0.0004274647 & 0.0017044722 & -0.0002884657 & -0.0002698381 & -0.0009618766 & -0.0013183011 & 0.001507303 & 0.0001103782 & 2.288160e-04\\\\\n",
+ "\tNKdim\\_\\_ZFAS1 & -1.824321e-03 & -2.196386e-05 & -0.0001309291 & 0.0002609197 & 0.0031960898 & -0.0009808865 & 0.0003104403 & -0.0003755947 & -0.0001760204 & -0.0030818508 & -0.113659437 & -0.0030505201 & -0.0005130803 & -0.0014203910 & 0.0004811148 & 0.0003157216 & -0.0009303368 & 0.001418756 & 0.0007723607 & 2.154406e-04\\\\\n",
+ "\tNKdim\\_\\_ZFP36 & -1.884174e-03 & -1.013808e-04 & -0.0009070395 & 0.0004174070 & 0.0002005844 & 0.0019932858 & 0.0003703909 & 0.0008108770 & -0.0016578828 & -0.0039247834 & 0.008802220 & 0.0007937521 & 0.0021959706 & -0.0009571731 & -0.0004312455 & 0.0007124782 & 0.0005021595 & -0.004344592 & 0.0007678170 & -2.155689e-04\\\\\n",
+ "\tNKdim\\_\\_ZFP36L1 & -2.860396e-03 & 1.044763e-04 & 0.0009260348 & -0.0028899986 & 0.0022447461 & 0.0009851776 & 0.0015616902 & 0.0010531281 & -0.0021344237 & -0.0053043853 & -0.004016054 & 0.0004029870 & 0.0009022350 & -0.0020380839 & 0.0004893038 & 0.0004537000 & 0.0002285394 & -0.000127352 & -0.0014641171 & 1.223379e-05\\\\\n",
+ "\tNKdim\\_\\_ZFP36L2 & -1.056855e-03 & -6.163430e-04 & 0.0018555297 & 0.0006076941 & 0.0017680046 & 0.0007874596 & 0.0007943166 & 0.0003794940 & 0.0004912557 & -0.0051923213 & 0.080864937 & -0.0008642006 & 0.0014579452 & -0.0028402238 & 0.0004304173 & 0.0003052706 & -0.0003380432 & -0.002692252 & -0.0017124035 & -6.965453e-05\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A matrix: 6 × 20 of type dbl\n",
+ "\n",
+ "| | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | Factor11 | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 |\n",
+ "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
+ "| NKdim__ZC3HAV1 | -4.191599e-03 | 4.440209e-04 | 0.0002153844 | 0.0004540283 | 0.0002949058 | -0.0007085776 | 0.0004619467 | 0.0009136763 | 0.0017003505 | 0.0002068845 | 0.029082471 | -0.0013899221 | 0.0005879015 | -0.0017232555 | 0.0002550067 | 0.0020538919 | -0.0001903588 | -0.001723843 | -0.0001421164 | -1.611028e-04 |\n",
+ "| NKdim__ZEB2 | -4.169701e-06 | 2.449899e-03 | 0.0024518547 | -0.0006276671 | -0.0006177782 | 0.0003662261 | 0.0002654389 | -0.0001024299 | -0.0011893979 | 0.0029621473 | 0.008816921 | 0.0004274647 | 0.0017044722 | -0.0002884657 | -0.0002698381 | -0.0009618766 | -0.0013183011 | 0.001507303 | 0.0001103782 | 2.288160e-04 |\n",
+ "| NKdim__ZFAS1 | -1.824321e-03 | -2.196386e-05 | -0.0001309291 | 0.0002609197 | 0.0031960898 | -0.0009808865 | 0.0003104403 | -0.0003755947 | -0.0001760204 | -0.0030818508 | -0.113659437 | -0.0030505201 | -0.0005130803 | -0.0014203910 | 0.0004811148 | 0.0003157216 | -0.0009303368 | 0.001418756 | 0.0007723607 | 2.154406e-04 |\n",
+ "| NKdim__ZFP36 | -1.884174e-03 | -1.013808e-04 | -0.0009070395 | 0.0004174070 | 0.0002005844 | 0.0019932858 | 0.0003703909 | 0.0008108770 | -0.0016578828 | -0.0039247834 | 0.008802220 | 0.0007937521 | 0.0021959706 | -0.0009571731 | -0.0004312455 | 0.0007124782 | 0.0005021595 | -0.004344592 | 0.0007678170 | -2.155689e-04 |\n",
+ "| NKdim__ZFP36L1 | -2.860396e-03 | 1.044763e-04 | 0.0009260348 | -0.0028899986 | 0.0022447461 | 0.0009851776 | 0.0015616902 | 0.0010531281 | -0.0021344237 | -0.0053043853 | -0.004016054 | 0.0004029870 | 0.0009022350 | -0.0020380839 | 0.0004893038 | 0.0004537000 | 0.0002285394 | -0.000127352 | -0.0014641171 | 1.223379e-05 |\n",
+ "| NKdim__ZFP36L2 | -1.056855e-03 | -6.163430e-04 | 0.0018555297 | 0.0006076941 | 0.0017680046 | 0.0007874596 | 0.0007943166 | 0.0003794940 | 0.0004912557 | -0.0051923213 | 0.080864937 | -0.0008642006 | 0.0014579452 | -0.0028402238 | 0.0004304173 | 0.0003052706 | -0.0003380432 | -0.002692252 | -0.0017124035 | -6.965453e-05 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Factor1 Factor2 Factor3 Factor4 \n",
+ "NKdim__ZC3HAV1 -4.191599e-03 4.440209e-04 0.0002153844 0.0004540283\n",
+ "NKdim__ZEB2 -4.169701e-06 2.449899e-03 0.0024518547 -0.0006276671\n",
+ "NKdim__ZFAS1 -1.824321e-03 -2.196386e-05 -0.0001309291 0.0002609197\n",
+ "NKdim__ZFP36 -1.884174e-03 -1.013808e-04 -0.0009070395 0.0004174070\n",
+ "NKdim__ZFP36L1 -2.860396e-03 1.044763e-04 0.0009260348 -0.0028899986\n",
+ "NKdim__ZFP36L2 -1.056855e-03 -6.163430e-04 0.0018555297 0.0006076941\n",
+ " Factor5 Factor6 Factor7 Factor8 \n",
+ "NKdim__ZC3HAV1 0.0002949058 -0.0007085776 0.0004619467 0.0009136763\n",
+ "NKdim__ZEB2 -0.0006177782 0.0003662261 0.0002654389 -0.0001024299\n",
+ "NKdim__ZFAS1 0.0031960898 -0.0009808865 0.0003104403 -0.0003755947\n",
+ "NKdim__ZFP36 0.0002005844 0.0019932858 0.0003703909 0.0008108770\n",
+ "NKdim__ZFP36L1 0.0022447461 0.0009851776 0.0015616902 0.0010531281\n",
+ "NKdim__ZFP36L2 0.0017680046 0.0007874596 0.0007943166 0.0003794940\n",
+ " Factor9 Factor10 Factor11 Factor12 \n",
+ "NKdim__ZC3HAV1 0.0017003505 0.0002068845 0.029082471 -0.0013899221\n",
+ "NKdim__ZEB2 -0.0011893979 0.0029621473 0.008816921 0.0004274647\n",
+ "NKdim__ZFAS1 -0.0001760204 -0.0030818508 -0.113659437 -0.0030505201\n",
+ "NKdim__ZFP36 -0.0016578828 -0.0039247834 0.008802220 0.0007937521\n",
+ "NKdim__ZFP36L1 -0.0021344237 -0.0053043853 -0.004016054 0.0004029870\n",
+ "NKdim__ZFP36L2 0.0004912557 -0.0051923213 0.080864937 -0.0008642006\n",
+ " Factor13 Factor14 Factor15 Factor16 \n",
+ "NKdim__ZC3HAV1 0.0005879015 -0.0017232555 0.0002550067 0.0020538919\n",
+ "NKdim__ZEB2 0.0017044722 -0.0002884657 -0.0002698381 -0.0009618766\n",
+ "NKdim__ZFAS1 -0.0005130803 -0.0014203910 0.0004811148 0.0003157216\n",
+ "NKdim__ZFP36 0.0021959706 -0.0009571731 -0.0004312455 0.0007124782\n",
+ "NKdim__ZFP36L1 0.0009022350 -0.0020380839 0.0004893038 0.0004537000\n",
+ "NKdim__ZFP36L2 0.0014579452 -0.0028402238 0.0004304173 0.0003052706\n",
+ " Factor17 Factor18 Factor19 Factor20 \n",
+ "NKdim__ZC3HAV1 -0.0001903588 -0.001723843 -0.0001421164 -1.611028e-04\n",
+ "NKdim__ZEB2 -0.0013183011 0.001507303 0.0001103782 2.288160e-04\n",
+ "NKdim__ZFAS1 -0.0009303368 0.001418756 0.0007723607 2.154406e-04\n",
+ "NKdim__ZFP36 0.0005021595 -0.004344592 0.0007678170 -2.155689e-04\n",
+ "NKdim__ZFP36L1 0.0002285394 -0.000127352 -0.0014641171 1.223379e-05\n",
+ "NKdim__ZFP36L2 -0.0003380432 -0.002692252 -0.0017124035 -6.965453e-05"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 3148
- 20
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 3148\n",
+ "\\item 20\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 3148\n",
+ "2. 20\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 3148 20"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "### Calculate right inverse of weight matrix (mu_ma)\n",
+ "\n",
+ "dim(mu_ma) ## corresponds to A^T\n",
+ "mu_ma_t = t(mu_ma) \n",
+ "dim(mu_ma_t) ## corresponds to # A\n",
+ "\n",
+ "mu_mat_right_inv = mu_ma %*% (solve(mu_ma_t %*% mu_ma))\n",
+ "head(mu_mat_right_inv)\n",
+ "tail(mu_mat_right_inv)\n",
+ "dim(mu_mat_right_inv)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 548,
+ "id": "769bb480-9aaf-46cf-a75c-eaf317a4585b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A matrix: 2 × 3148 of type dbl\n",
+ "\n",
+ "\t | B.cell__ACTB | B.cell__ARHGDIB | B.cell__ARPC2 | B.cell__ARPC3 | B.cell__B2M | B.cell__BIRC3 | B.cell__BTF3 | B.cell__BTG1 | B.cell__BTG2 | B.cell__CALM1 | ⋯ | NKdim__YPEL5 | NKdim__YWHAB | NKdim__YWHAQ | NKdim__YWHAZ | NKdim__ZC3HAV1 | NKdim__ZEB2 | NKdim__ZFAS1 | NKdim__ZFP36 | NKdim__ZFP36L1 | NKdim__ZFP36L2 |
\n",
+ "\n",
+ "\n",
+ "\tBaseline.TEST_1 | -0.1451209 | -0.280823 | 0.2933812 | -1.3037827 | 0.3569171 | -0.29338123 | -0.5720211 | 1.6637928 | -1.3037827 | -1.1048357 | ⋯ | 1.6637928 | 0.70506589 | 0.4752408 | 0.9234567 | 0.6744898 | -0.02410453 | -1.040566 | -0.94207577 | -0.8010945 | -0.7050659 |
\n",
+ "\tBaseline.TEST_12 | 0.2310092 | -0.060292 | 0.3697907 | -0.3697907 | -0.2933812 | -0.08445798 | 0.4752408 | 0.1451209 | 0.1573107 | 0.3827258 | ⋯ | -0.6744898 | -0.09655862 | 0.8178173 | -0.3827258 | -0.7363159 | -0.44842548 | -1.898029 | -0.07236971 | 1.3623834 | 0.8178173 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A matrix: 2 × 3148 of type dbl\n",
+ "\\begin{tabular}{r|lllllllllllllllllllll}\n",
+ " & B.cell\\_\\_ACTB & B.cell\\_\\_ARHGDIB & B.cell\\_\\_ARPC2 & B.cell\\_\\_ARPC3 & B.cell\\_\\_B2M & B.cell\\_\\_BIRC3 & B.cell\\_\\_BTF3 & B.cell\\_\\_BTG1 & B.cell\\_\\_BTG2 & B.cell\\_\\_CALM1 & ⋯ & NKdim\\_\\_YPEL5 & NKdim\\_\\_YWHAB & NKdim\\_\\_YWHAQ & NKdim\\_\\_YWHAZ & NKdim\\_\\_ZC3HAV1 & NKdim\\_\\_ZEB2 & NKdim\\_\\_ZFAS1 & NKdim\\_\\_ZFP36 & NKdim\\_\\_ZFP36L1 & NKdim\\_\\_ZFP36L2\\\\\n",
+ "\\hline\n",
+ "\tBaseline.TEST\\_1 & -0.1451209 & -0.280823 & 0.2933812 & -1.3037827 & 0.3569171 & -0.29338123 & -0.5720211 & 1.6637928 & -1.3037827 & -1.1048357 & ⋯ & 1.6637928 & 0.70506589 & 0.4752408 & 0.9234567 & 0.6744898 & -0.02410453 & -1.040566 & -0.94207577 & -0.8010945 & -0.7050659\\\\\n",
+ "\tBaseline.TEST\\_12 & 0.2310092 & -0.060292 & 0.3697907 & -0.3697907 & -0.2933812 & -0.08445798 & 0.4752408 & 0.1451209 & 0.1573107 & 0.3827258 & ⋯ & -0.6744898 & -0.09655862 & 0.8178173 & -0.3827258 & -0.7363159 & -0.44842548 & -1.898029 & -0.07236971 & 1.3623834 & 0.8178173\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A matrix: 2 × 3148 of type dbl\n",
+ "\n",
+ "| | B.cell__ACTB | B.cell__ARHGDIB | B.cell__ARPC2 | B.cell__ARPC3 | B.cell__B2M | B.cell__BIRC3 | B.cell__BTF3 | B.cell__BTG1 | B.cell__BTG2 | B.cell__CALM1 | ⋯ | NKdim__YPEL5 | NKdim__YWHAB | NKdim__YWHAQ | NKdim__YWHAZ | NKdim__ZC3HAV1 | NKdim__ZEB2 | NKdim__ZFAS1 | NKdim__ZFP36 | NKdim__ZFP36L1 | NKdim__ZFP36L2 |\n",
+ "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
+ "| Baseline.TEST_1 | -0.1451209 | -0.280823 | 0.2933812 | -1.3037827 | 0.3569171 | -0.29338123 | -0.5720211 | 1.6637928 | -1.3037827 | -1.1048357 | ⋯ | 1.6637928 | 0.70506589 | 0.4752408 | 0.9234567 | 0.6744898 | -0.02410453 | -1.040566 | -0.94207577 | -0.8010945 | -0.7050659 |\n",
+ "| Baseline.TEST_12 | 0.2310092 | -0.060292 | 0.3697907 | -0.3697907 | -0.2933812 | -0.08445798 | 0.4752408 | 0.1451209 | 0.1573107 | 0.3827258 | ⋯ | -0.6744898 | -0.09655862 | 0.8178173 | -0.3827258 | -0.7363159 | -0.44842548 | -1.898029 | -0.07236971 | 1.3623834 | 0.8178173 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " B.cell__ACTB B.cell__ARHGDIB B.cell__ARPC2 B.cell__ARPC3\n",
+ "Baseline.TEST_1 -0.1451209 -0.280823 0.2933812 -1.3037827 \n",
+ "Baseline.TEST_12 0.2310092 -0.060292 0.3697907 -0.3697907 \n",
+ " B.cell__B2M B.cell__BIRC3 B.cell__BTF3 B.cell__BTG1\n",
+ "Baseline.TEST_1 0.3569171 -0.29338123 -0.5720211 1.6637928 \n",
+ "Baseline.TEST_12 -0.2933812 -0.08445798 0.4752408 0.1451209 \n",
+ " B.cell__BTG2 B.cell__CALM1 ⋯ NKdim__YPEL5 NKdim__YWHAB\n",
+ "Baseline.TEST_1 -1.3037827 -1.1048357 ⋯ 1.6637928 0.70506589 \n",
+ "Baseline.TEST_12 0.1573107 0.3827258 ⋯ -0.6744898 -0.09655862 \n",
+ " NKdim__YWHAQ NKdim__YWHAZ NKdim__ZC3HAV1 NKdim__ZEB2\n",
+ "Baseline.TEST_1 0.4752408 0.9234567 0.6744898 -0.02410453\n",
+ "Baseline.TEST_12 0.8178173 -0.3827258 -0.7363159 -0.44842548\n",
+ " NKdim__ZFAS1 NKdim__ZFP36 NKdim__ZFP36L1 NKdim__ZFP36L2\n",
+ "Baseline.TEST_1 -1.040566 -0.94207577 -0.8010945 -0.7050659 \n",
+ "Baseline.TEST_12 -1.898029 -0.07236971 1.3623834 0.8178173 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "head(gr_ma,2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 549,
+ "id": "3217a141-505d-43c1-8e2e-041dd7af37e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gr_ma[is.na(gr_ma)]= 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 550,
+ "id": "a7a1fec5-2861-4917-8678-e2cbd5f301ad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 103
- 20
\n"
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 103\n",
+ "\\item 20\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 103\n",
+ "2. 20\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 103 20"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A matrix: 2 × 20 of type dbl\n",
+ "\n",
+ "\t | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | Factor11 | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 |
\n",
+ "\n",
+ "\n",
+ "\tBaseline.TEST_1 | -0.12063079 | 1.4305362 | 0.1444684 | 0.5318507 | -0.1235437 | 0.12083592 | 0.2016771 | 0.2368509 | -0.21416826 | 0.21182132 | 0.3961745 | -0.3276672 | 0.4171498 | 0.3391938 | -0.01248597 | 0.08640766 | -0.02221648 | -0.4124799 | 0.195775378 | -0.1918527 |
\n",
+ "\tBaseline.TEST_12 | -0.09882374 | 0.4013937 | 0.3206466 | 0.5860041 | -0.2155669 | 0.08823914 | -0.1322515 | 0.2131773 | 0.04916782 | 0.07366254 | 3.4977509 | -0.2186651 | 0.4484445 | 0.2666195 | -0.13561254 | 0.53762915 | 0.02581057 | -0.2500909 | 0.008599943 | -0.1842714 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A matrix: 2 × 20 of type dbl\n",
+ "\\begin{tabular}{r|llllllllllllllllllll}\n",
+ " & Factor1 & Factor2 & Factor3 & Factor4 & Factor5 & Factor6 & Factor7 & Factor8 & Factor9 & Factor10 & Factor11 & Factor12 & Factor13 & Factor14 & Factor15 & Factor16 & Factor17 & Factor18 & Factor19 & Factor20\\\\\n",
+ "\\hline\n",
+ "\tBaseline.TEST\\_1 & -0.12063079 & 1.4305362 & 0.1444684 & 0.5318507 & -0.1235437 & 0.12083592 & 0.2016771 & 0.2368509 & -0.21416826 & 0.21182132 & 0.3961745 & -0.3276672 & 0.4171498 & 0.3391938 & -0.01248597 & 0.08640766 & -0.02221648 & -0.4124799 & 0.195775378 & -0.1918527\\\\\n",
+ "\tBaseline.TEST\\_12 & -0.09882374 & 0.4013937 & 0.3206466 & 0.5860041 & -0.2155669 & 0.08823914 & -0.1322515 & 0.2131773 & 0.04916782 & 0.07366254 & 3.4977509 & -0.2186651 & 0.4484445 & 0.2666195 & -0.13561254 & 0.53762915 & 0.02581057 & -0.2500909 & 0.008599943 & -0.1842714\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A matrix: 2 × 20 of type dbl\n",
+ "\n",
+ "| | Factor1 | Factor2 | Factor3 | Factor4 | Factor5 | Factor6 | Factor7 | Factor8 | Factor9 | Factor10 | Factor11 | Factor12 | Factor13 | Factor14 | Factor15 | Factor16 | Factor17 | Factor18 | Factor19 | Factor20 |\n",
+ "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
+ "| Baseline.TEST_1 | -0.12063079 | 1.4305362 | 0.1444684 | 0.5318507 | -0.1235437 | 0.12083592 | 0.2016771 | 0.2368509 | -0.21416826 | 0.21182132 | 0.3961745 | -0.3276672 | 0.4171498 | 0.3391938 | -0.01248597 | 0.08640766 | -0.02221648 | -0.4124799 | 0.195775378 | -0.1918527 |\n",
+ "| Baseline.TEST_12 | -0.09882374 | 0.4013937 | 0.3206466 | 0.5860041 | -0.2155669 | 0.08823914 | -0.1322515 | 0.2131773 | 0.04916782 | 0.07366254 | 3.4977509 | -0.2186651 | 0.4484445 | 0.2666195 | -0.13561254 | 0.53762915 | 0.02581057 | -0.2500909 | 0.008599943 | -0.1842714 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Factor1 Factor2 Factor3 Factor4 Factor5 \n",
+ "Baseline.TEST_1 -0.12063079 1.4305362 0.1444684 0.5318507 -0.1235437\n",
+ "Baseline.TEST_12 -0.09882374 0.4013937 0.3206466 0.5860041 -0.2155669\n",
+ " Factor6 Factor7 Factor8 Factor9 Factor10 \n",
+ "Baseline.TEST_1 0.12083592 0.2016771 0.2368509 -0.21416826 0.21182132\n",
+ "Baseline.TEST_12 0.08823914 -0.1322515 0.2131773 0.04916782 0.07366254\n",
+ " Factor11 Factor12 Factor13 Factor14 Factor15 \n",
+ "Baseline.TEST_1 0.3961745 -0.3276672 0.4171498 0.3391938 -0.01248597\n",
+ "Baseline.TEST_12 3.4977509 -0.2186651 0.4484445 0.2666195 -0.13561254\n",
+ " Factor16 Factor17 Factor18 Factor19 Factor20 \n",
+ "Baseline.TEST_1 0.08640766 -0.02221648 -0.4124799 0.195775378 -0.1918527\n",
+ "Baseline.TEST_12 0.53762915 0.02581057 -0.2500909 0.008599943 -0.1842714"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "#### Apply right inverse to Y from GR \n",
+ "\n",
+ "result = gr_ma %*% mu_mat_right_inv\n",
+ "dim(result)\n",
+ "\n",
+ "\n",
+ "head(result,2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 551,
+ "id": "356ff5d9-a4a7-4ca1-bb89-09c63c460d35",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "103"
+ ],
+ "text/latex": [
+ "103"
+ ],
+ "text/markdown": [
+ "103"
+ ],
+ "text/plain": [
+ "[1] 103"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "nrow(result)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 552,
+ "id": "a25d43b4-221b-4096-a2ea-0c9590c4eabb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result = as.data.frame(result)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 553,
+ "id": "fc0822ca-40d8-4d50-b0ab-2e40a91fd481",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "103"
+ ],
+ "text/latex": [
+ "103"
+ ],
+ "text/markdown": [
+ "103"
+ ],
+ "text/plain": [
+ "[1] 103"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "nrow(result[!is.na(result$Factor1),])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 554,
+ "id": "6f47cb1c-6d40-473d-9e72-741fef9fedef",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result = result[!is.na(result$Factor1),]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 555,
+ "id": "04f65759-f99e-4258-8d8c-87cc2fa4e834",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "103"
+ ],
+ "text/latex": [
+ "103"
+ ],
+ "text/markdown": [
+ "103"
+ ],
+ "text/plain": [
+ "[1] 103"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "nrow(result)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 561,
+ "id": "943868fa-6979-479f-b05c-669b45839ae4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### Save Factor Pattern to Share\n",
+ "\n",
+ "write.csv(result, paste0(output_path, 'Factor_Data_Groningen.csv'))\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "R",
+ "language": "R",
+ "name": "ir"
+ },
+ "language_info": {
+ "codemirror_mode": "r",
+ "file_extension": ".r",
+ "mimetype": "text/x-r-source",
+ "name": "R",
+ "pygments_lexer": "r",
+ "version": "4.1.1"
+ },
+ "toc-autonumbering": true
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/G/GX_MOFA_Reproduction_Preprocessing.r b/G/GX_MOFA_Reproduction_Preprocessing.r
new file mode 100644
index 0000000..42f4bf3
--- /dev/null
+++ b/G/GX_MOFA_Reproduction_Preprocessing.r
@@ -0,0 +1,553 @@
+# Load Seurat object and create pseudobulk for MOFA analysis
+
+#############################################
+# Prerequisites - Load Libraries
+
+
+library(dplyr)
+
+library(Seurat, quietly = TRUE, verbose = FALSE)
+library(SeuratDisk, quietly = TRUE, verbose = FALSE)
+
+library(muscat)
+
+library(reshape2)
+
+library(SummarizedExperiment)
+
+library(stringr)
+
+###############################################
+# Preqrequisites Configurations & Parameters
+
+result_path = '../results/current/Reproduction'
+
+seurat_input_data_name = 'G4_Seurat_Input_Replication.h5seurat'
+
+name_save = 'V_AZIMUTH_REPRODUCTION'
+
+
+## Define columns in seurat object containing sample-id and cluster annotations
+
+sample_column = 'sample_id' # to be sample-id
+
+cluster_column = 'cluster_id' # to be cluster_id; cell-type annotation
+
+
+##################################################
+
+### Should quantile normalization be applied?
+
+quantile_normalization_single_cell = TRUE
+
+standardize = FALSE
+
+set_zero_na = FALSE
+
+quantile_norm_feat = TRUE
+
+# Functions
+
+### Function for quantile normalization
+
+quantile_normalization = function(X){
+ set.seed(42)
+ ranks = apply(X, 2, rank, ties.method = 'min') # determine ranks of each entry
+
+ sorted = data.frame(apply(X, 2, sort)) # sort the entries
+ means = apply(sorted, 1, mean) # calculate the means
+
+ normalized_data = apply(ranks, 2 ,function(x){ means[x]}) # substitute the means into ranks matrix
+}
+
+
+### Gene wise quantile normalization
+
+stdnorm <- function(x) {
+ set.seed(42)
+ r = rank(x[!is.na(x)], ties.method="average")
+ x[!is.na(x)] = qnorm(r / (length(x[!is.na(x)]) + 1))
+ return(x)
+}
+
+
+
+# Load Data
+
+## Load Seurat object
+
+###### Load the generated seurat objects
+
+source_text = paste( result_path, '/', seurat_input_data_name , sep = '')
+print(source_text)
+print(file.info(source_text)$mtime)
+rna_seurat_data = LoadH5Seurat(source_text, assays = "RNA", quietly = TRUE )
+
+
+
+
+## Should contain raw counts
+## After QC and Pre-processing
+## annotations:
+#### 'sample_id' - identification of sample/ patient incl. timepoint
+### 'cluster_id' - cell-type annotation
+
+colnames(rna_seurat_data[[]])
+
+
+rna_seurat_data
+
+rna_seurat_data_subset = rna_seurat_data # rename
+
+# Data Processing (Pseudobulk)
+
+obs = rna_seurat_data_subset@meta.data
+
+obs$cell = rownames(obs)
+
+### add a dummy group column
+obs$group_id = '1' ## group-id neede in DE analysis, here not --> DUMMY Variable
+
+### assign sample-id: TBD remove library part for script sharing
+obs$sample_id = obs[[sample_column]]
+
+### add cell-type assignment
+obs$cluster_id = as.character(obs[[cluster_column]])
+
+rownames(obs) = obs$cell
+
+nrow(obs)
+
+head(obs,2)
+
+### Adjust B-cell mapping/ aggregate Azimuth cell-types
+obs$cluster_id = str_replace(obs$cluster_id, 'B_intermediate|B_memory|B_naive', 'B cell')
+
+sort(unique(obs$cluster_id))
+
+
+
+## Add to seurat dataset
+
+## group-id
+
+rna_seurat_data_subset = AddMetaData(object = rna_seurat_data_subset, metadata = obs[,'group_id', drop = FALSE], col.name = 'group_id')
+
+## cluster-id
+
+rna_seurat_data_subset = AddMetaData(object = rna_seurat_data_subset, metadata = obs[,'cluster_id', drop = FALSE], col.name = 'cluster_id')
+
+## sample-id
+
+rna_seurat_data_subset = AddMetaData(object = rna_seurat_data_subset, metadata = obs[,'sample_id', drop = FALSE], col.name = 'sample_id')
+
+head(rna_seurat_data_subset@meta.data,2)
+
+## Convert to SCE
+
+rna_sce = as.SingleCellExperiment(rna_seurat_data_subset)
+
+rna_sce # rows = genes; columns = cells
+
+
+
+### Check amount of cells per sample and cluster
+
+colSums(table(rna_sce$cluster_id,rna_sce$cluster_id ))
+
+cells_per_sample_cluster = t(table(rna_sce$cluster_id, rna_sce$sample_id))
+
+cells_per_sample_cluster = data.frame(cells_per_sample_cluster)
+
+colnames(cells_per_sample_cluster) = c('Sample', 'Cluster_Cell_Type', 'amount_cells')
+
+head(cells_per_sample_cluster,2)
+
+## Analyze and calculate gene expression percentages per cluster
+
+gene_list = list()
+
+gene_cell_expr = list()
+
+clusters = unique(rna_sce$cluster_id)
+#clusters = unique(rna_sce$cell_type_Scanorama)
+
+clusters
+
+ for(i in clusters){
+# print(i)
+
+ # subset data on cluster
+ rna_sce_subset = rna_sce[,rna_sce$cluster_id== i] # cluster
+
+ amount_cells = dim(rna_sce_subset)[2]
+
+ # Calculate percentage of cells expressing gene
+ amount_cells_expressing_gene = rowSums(assay(rna_sce_subset) > 0 )
+ perc_cells_expressing_gene = (amount_cells_expressing_gene/ amount_cells) * 100
+
+
+ gene_cell_expr[[i]] = data.frame(perc_cells_expressing_gene = perc_cells_expressing_gene, total_amount_cells_expressing_gene = amount_cells_expressing_gene)
+
+ }
+
+ ### Resulting amount of genes per cluster
+
+gene_cell_expr_data = data.frame()
+
+ for(i in names(gene_cell_expr)){
+ data = gene_cell_expr[[i]]
+ data$gene = rownames(gene_cell_expr[[i]])
+ data$cluster = i
+ gene_cell_expr_data = rbind( gene_cell_expr_data, data)
+ }
+
+head(gene_cell_expr_data,2)
+
+## Add cluster, group and sample columns for aggregation
+
+#### Add cluster_id, sample_id and group_id columns
+(rna_sce <- prepSCE(rna_sce,
+ kid = 'cluster_id', # subpopulation assignments
+ gid = 'group_id', # group IDs (ctrl/stim) # sample_id; using dummy sample id which corresponds to cluster columns
+ sid = 'sample_id', # sample IDs (ctrl/stim.1234)
+ drop = FALSE)) # drop all other colData columns
+
+nk <- length(kids <- levels(rna_sce$cluster_id))
+ns <- length(sids <- levels(rna_sce$sample_id))
+names(kids) <- kids; names(sids) <- sids
+
+nk # amount of cluster
+
+ns # amount of samples
+
+kids # cluster ids
+
+length(kids) # amount cluster-id
+
+## Aggregate single cell to pseudo-bulk data
+
+pb <- aggregateData(rna_sce,
+ assay = "counts", fun = "mean",
+ by = c("cluster_id", "sample_id"))
+# one sheet per subpopulation
+
+pb
+
+#sum(colSums(assay(pb)))
+
+### Save aggregated data
+
+#save( pb , file = paste0(result_path, '/G0_aggregated_RNA_input_correlations_all.RDS'))
+
+# Normalization
+
+## RNA-Single-Seq
+
+### Cell / gene expression data filtering
+
+cell_perc_cluster = gene_cell_expr_data
+
+head(cell_perc_cluster,2)
+
+nrow(cell_perc_cluster)
+
+length(unique(cell_perc_cluster$cluster))
+
+##### Condition for filtering genes
+cell_perc_cluster = cell_perc_cluster[((cell_perc_cluster$perc_cells > 50) & (cell_perc_cluster$total_amount_cells_expressing_gene > 1200)) | ((cell_perc_cluster$perc_cells > 40) & (cell_perc_cluster$total_amount_cells_expressing_gene > 3000)) ,]
+
+
+nrow(cell_perc_cluster)
+
+### Pseudobulk
+
+pb
+
+all_genes = rownames(pb)
+
+head(all_genes)
+
+length(all_genes)
+
+### Pre-Process
+
+#### Remove Clusters (TBD)
+
+names(assays(pb))
+
+assay(pb, 'platelet') = NULL
+
+assay(pb, 'plasmablast') = NULL
+
+assay(pb, 'pDC') = NULL
+
+assay(pb, 'Nkbright') = NULL
+
+assay(pb, 'NK_Proliferating') = NULL
+
+assay(pb, 'ILC') = NULL
+
+assay(pb, 'HSPC') = NULL
+
+assay(pb, 'Eryth') = NULL
+
+assay(pb, 'Doublet') = NULL
+
+assay(pb, 'doublet') = NULL
+
+assay(pb, 'dnT') = NULL
+
+assay(pb, 'cDC1') = NULL
+
+assay(pb, 'CD8_TCM') = NULL
+
+assay(pb, 'CD8_Proliferating') = NULL
+
+assay(pb, 'CD4_Proliferating') = NULL
+
+assay(pb, 'ASDC') = NULL
+
+length(names(assays(pb)))
+
+names(assays(pb))
+
+#### Prepare gene-cluster dataframe + normalize
+
+final_data = data.frame(samples = colnames(pb))
+
+rownames(final_data) = final_data$samples
+
+final_data_vis = data.frame(samples = colnames(pb))
+
+rownames(final_data_vis) = final_data_vis$samples
+
+genes_subset = cell_perc_cluster
+
+name_save
+
+
+for(i in unique(genes_subset$cluster)){
+ data = assay(pb, i)
+
+
+ ##### Normalize counts per sample (library size) - currently only for non-scanorama functions
+
+ if(is.na(str_extract(name_save, 'scano')) == TRUE){
+ scaling_factor = colSums(data) /mean(colSums(data))
+
+ for (j in 1:ncol(data)){
+ if(scaling_factor[j] != 0){
+ data[,j] = data[,j]/ scaling_factor[j]
+ }
+ }
+ }
+
+ ### Subset data on genes with minimum expression in cluster
+ data = data[rownames(data) %in% genes_subset$gene[genes_subset$cluster == i],]
+
+ ### Alternative - cluster independent subsetting
+ #data = data[rownames(data) %in% genes_subset,]
+
+ ##### TBD pre-processing stepd
+
+ if(is.na(str_extract(name_save, 'scano')) == TRUE){
+ data = log2(data+1) # logarithmize count values (optional!)
+ }
+
+ #### Quantile normalization (TBD maybe also on complete dataset?)
+
+ if(quantile_normalization_single_cell == TRUE){
+ data_rows = rownames(data)
+ data = quantile_normalization(data )
+ rownames(data) = data_rows
+ }
+
+ rownames(data) = paste0(i, '__' ,rownames(data))
+
+ data = data.frame(t(data))
+
+ expr_mean = data.frame( mean_expr = rowMeans(data))
+ colnames(expr_mean) = i
+ rownames(expr_mean) = rownames(data)
+
+ final_data = merge(final_data, data, by = 0)
+ final_data_vis = merge(final_data_vis, expr_mean, by = 0)
+
+ rownames(final_data) = final_data$Row.names
+ rownames(final_data_vis) = final_data_vis$Row.names
+ final_data$Row.names = NULL
+ final_data_vis$Row.names = NULL
+ }
+
+
+
+head(final_data,2)
+
+ncol(final_data)
+
+nrow(genes_subset)
+
+dim(final_data)
+
+final_data$samples = str_replace(final_data$samples, '-.*', '')
+
+head(final_data,2)
+
+
+
+#### Filter genes
+
+### Remove mitochondrial & ribosomal genes
+
+head(final_data,2)
+
+ncol(final_data)
+
+final_data = final_data[, !colnames(final_data) %in% (colnames(final_data)[!is.na(str_extract(colnames(final_data), '__MT.*|__RPL.*|__RPS.*'))])]
+
+ncol(final_data) # minus sample + sample_id column --> 11.831
+
+head(final_data,2)
+
+## Genes with high variance
+
+head(final_data,2)
+
+final_data$samples = NULL
+
+final_data$sample_id = NULL
+
+ncol(final_data)
+
+final_data$samples = rownames(final_data)
+
+head(final_data,2)
+
+
+
+#### Prepare long format
+
+final_data_long = melt(final_data)
+
+### Decide what to do with duplicates
+
+head(final_data_long,2)
+
+final_data_long$type = 'single_cell'
+
+final_data_long$samples = str_replace(final_data_long$samples, '-.*', '')
+
+final_data_long = final_data_long %>% group_by(samples, type, variable) %>% summarise(value = mean(value)) # take average in case same samples measured multiple times
+
+length(unique(final_data_long$variable))
+
+final_data_long$sample_id = final_data_long$samples
+final_data_long$samples = NULL
+
+# Quantile Normalization
+
+head(final_data_long,2)
+
+data_long = final_data_long
+
+head(data_long,2)
+
+nrow(data_long)
+
+length(unique(data_long$sample_id))
+
+### Normalization & wide format
+
+### Standardize values
+
+standardize
+
+if(standardize == TRUE){
+ data_long = merge(data_long, data_long %>% group_by(variable) %>% summarise(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE)))
+
+ data_long[data_long == 0] = NA
+
+ data_long = data_long[(data_long$sd != 0) & (!is.na(data_long$sd)),]
+
+ data_nas = data.frame(is.na(data_long))
+ data_long$value = (data_long$value - data_long$mean)/data_long$sd
+
+ #data_long = data.frame(data_long)
+ data_long$mean = NULL
+ data_long$sd = NULL
+ data_long$value[data_nas$value] = NA
+ }
+
+unique(data_long$type)
+
+## Prepare wide format for correlations
+
+data_long$ident = paste0(data_long$type, '_0_', data_long$variable)
+
+nrow(unique(data_long[,c('sample_id', 'ident')]))
+
+nrow(data_long)
+
+### Transform to wide
+
+final_data = dcast(data_long, sample_id ~ ident , value.var = "value") # ! with this merging there might be NA values for some samples on some data types
+
+head(final_data,2)
+
+rownames(final_data) = final_data$sample_id
+
+final_data$sample_id = NULL
+
+ncol(final_data)
+
+nrow(final_data)
+
+### Deal with NA - Set NA for 0 observation + remove samples with only NA
+
+head(final_data,2)
+
+set_zero_na
+
+if(set_zero_na == TRUE){
+ final_data[final_data == 0] = NA
+ }
+
+
+
+### Remember NA's
+
+data_nas = is.na(final_data)
+
+ncol(final_data)
+
+keep_samples = names(rowSums(data_nas))[rowSums(data_nas) != ncol(final_data)]
+
+final_data = final_data[keep_samples,]
+
+data_nas = data_nas[keep_samples,]
+
+### Apply feature wise quantile normalization
+
+quantile_norm_feat
+
+if(quantile_norm_feat == TRUE){
+ final_data = apply(final_data, 2,stdnorm)
+ final_data = data.frame(final_data)
+ final_data[data_nas] = NA
+ final_data$sample_id = rownames(final_data)
+ data_long = melt(final_data)
+ data_long$type = str_extract(data_long$variable, '.*_0_')
+ data_long$type = str_replace(data_long$type , '_0_', '')
+ data_long$variable = str_replace(data_long$variable, '.*_0_', '')
+ }
+
+# Save Prepared Data
+
+name_save
+
+write.csv(data_long, paste0(result_path, '/Combined_Data_', name_save, '.csv'))
+
+length(unique(data_long$variable))
+
+