diff --git a/clocks/notebooks/epitoc1.ipynb b/clocks/notebooks/epitoc1.ipynb index e3fd7b4..047b76e 100644 --- a/clocks/notebooks/epitoc1.ipynb +++ b/clocks/notebooks/epitoc1.ipynb @@ -128,7 +128,7 @@ "model.metadata[\"data_type\"] = 'methylation'\n", "model.metadata[\"species\"] = 'Homo sapiens'\n", "model.metadata[\"year\"] = 2016\n", - "model.metadata[\"approved_by_author\"] = '⌛'\n", + "model.metadata[\"approved_by_author\"] = '✅'\n", "model.metadata[\"citation\"] = \"Yang, Zhen, et al. \\\"Correlation of an epigenetic mitotic clock with cancer risk.\\\" Genome biology 17 (2016): 1-18.\"\n", "model.metadata[\"doi\"] = \"https://doi.org/10.1186/s13059-016-1064-3\"\n", "model.metadata[\"research_only\"] = None\n", diff --git a/clocks/notebooks/intrinclock.ipynb b/clocks/notebooks/intrinclock.ipynb new file mode 100644 index 0000000..1ad650d --- /dev/null +++ b/clocks/notebooks/intrinclock.ipynb @@ -0,0 +1,849 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2f04eee0-5928-4e74-a754-6dc2e528810c", + "metadata": {}, + "source": [ + "# IntrinClock" + ] + }, + { + "cell_type": "markdown", + "id": "a3f514a3-772c-4a14-afdf-5a8376851ff4", + "metadata": {}, + "source": [ + "## Index\n", + "1. [Instantiate model class](#Instantiate-model-class)\n", + "2. [Define clock metadata](#Define-clock-metadata)\n", + "3. [Download clock dependencies](#Download-clock-dependencies)\n", + "5. [Load features](#Load-features)\n", + "6. [Load weights into base model](#Load-weights-into-base-model)\n", + "7. [Load reference values](#Load-reference-values)\n", + "8. [Load preprocess and postprocess objects](#Load-preprocess-and-postprocess-objects)\n", + "10. [Check all clock parameters](#Check-all-clock-parameters)\n", + "10. [Basic test](#Basic-test)\n", + "11. [Save torch model](#Save-torch-model)\n", + "12. [Clear directory](#Clear-directory)" + ] + }, + { + "cell_type": "markdown", + "id": "d95fafdc-643a-40ea-a689-200bd132e90c", + "metadata": {}, + "source": [ + "Let's first import some packages:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4adfb4de-cd79-4913-a1af-9e23e9e236c9", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import inspect\n", + "import shutil\n", + "import json\n", + "import torch\n", + "import pandas as pd\n", + "import pyaging as pya" + ] + }, + { + "cell_type": "markdown", + "id": "145082e5-ced4-47ae-88c0-cb69773e3c5a", + "metadata": {}, + "source": [ + "## Instantiate model class" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8aa77372-7ed3-4da7-abc9-d30372106139", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "class IntrinClock(pyagingModel):\n", + " def __init__(self):\n", + " super().__init__()\n", + "\n", + " def preprocess(self, x):\n", + " return x\n", + "\n", + " def postprocess(self, x):\n", + " \"\"\"\n", + " Applies an anti-logarithmic linear transformation to a PyTorch tensor.\n", + " \"\"\"\n", + " adult_age = 20\n", + "\n", + " # Create a mask for negative and non-negative values\n", + " mask_negative = x < 0\n", + " mask_non_negative = ~mask_negative\n", + "\n", + " # Initialize the result tensor\n", + " age_tensor = torch.empty_like(x)\n", + "\n", + " # Exponential transformation for negative values\n", + " age_tensor[mask_negative] = (1 + adult_age) * torch.exp(x[mask_negative]) - 1\n", + "\n", + " # Linear transformation for non-negative values\n", + " age_tensor[mask_non_negative] = (1 + adult_age) * x[mask_non_negative] + adult_age\n", + "\n", + " return age_tensor\n", + "\n" + ] + } + ], + "source": [ + "def print_entire_class(cls):\n", + " source = inspect.getsource(cls)\n", + " print(source)\n", + "\n", + "print_entire_class(pya.models.IntrinClock)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "914a94cf-bf6c-4b9d-862a-a2787842e07e", + "metadata": {}, + "outputs": [], + "source": [ + "model = pya.models.IntrinClock()" + ] + }, + { + "cell_type": "markdown", + "id": "51f8615e-01fa-4aa5-b196-3ee2b35d261c", + "metadata": {}, + "source": [ + "## Define clock metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9a662703-0ea4-4b9b-98cc-310e4e604d65", + "metadata": {}, + "outputs": [], + "source": [ + "model.metadata[\"clock_name\"] = 'intrinclock'\n", + "model.metadata[\"data_type\"] = 'methylation'\n", + "model.metadata[\"species\"] = 'Homo sapiens'\n", + "model.metadata[\"year\"] = 2024\n", + "model.metadata[\"approved_by_author\"] = '⌛'\n", + "model.metadata[\"citation\"] = \"Tomusiak, Alan, et al. \\\"Development of an epigenetic clock resistant to changes in immune cell composition.\\\" Communications Biology 7.1 (2024): 934.\"\n", + "model.metadata[\"doi\"] = 'https://doi.org/10.1038/s42003-024-06609-4'\n", + "model.metadata[\"research_only\"] = None\n", + "model.metadata[\"notes\"] = None" + ] + }, + { + "cell_type": "markdown", + "id": "74492239-5aae-4026-9d90-6bc9c574c110", + "metadata": {}, + "source": [ + "## Download clock dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f1f9bbe4-cfc8-494c-b910-c96da88afb2b", + "metadata": {}, + "outputs": [], + "source": [ + "coefficient_dict = {\n", + " 'cg00054496': 0.629765,\n", + " 'cg00055555': 0.000478,\n", + " 'cg00088042': 0.542504,\n", + " 'cg00107606': -0.602358,\n", + " 'cg00359604': -0.207408,\n", + " 'cg00448707': 0.101321,\n", + " 'cg00454305': 0.214177,\n", + " 'cg00480331': 0.279553,\n", + " 'cg00491255': 0.235923,\n", + " 'cg00579423': -0.023261,\n", + " 'cg00583733': -0.187858,\n", + " 'cg00588393': 0.306040,\n", + " 'cg00590036': 0.946985,\n", + " 'cg00593462': 0.339070,\n", + " 'cg00716277': -0.117843,\n", + " 'cg00734800': -0.282424,\n", + " 'cg00751156': 0.895214,\n", + " 'cg00753885': -0.463940,\n", + " 'cg00790928': -0.333871,\n", + " 'cg00792123': -0.455670,\n", + " 'cg00796360': -0.040682,\n", + " 'cg01028142': 0.248537,\n", + " 'cg01080924': -0.063070,\n", + " 'cg01131866': -0.143417,\n", + " 'cg01153342': -0.434237,\n", + " 'cg01176694': -0.164361,\n", + " 'cg01260146': -0.685730,\n", + " 'cg01410876': 0.000990,\n", + " 'cg01426818': 0.090433,\n", + " 'cg01447660': -0.377915,\n", + " 'cg01483824': 0.001925,\n", + " 'cg01632288': 0.020347,\n", + " 'cg01676322': -0.008812,\n", + " 'cg01723606': 0.150160,\n", + " 'cg01747664': -0.195955,\n", + " 'cg01748892': 0.291528,\n", + " 'cg01773685': 0.108342,\n", + " 'cg01945641': 0.077529,\n", + " 'cg01948062': -0.243899,\n", + " 'cg02018277': -0.101781,\n", + " 'cg02071825': 0.663272,\n", + " 'cg02118627': -0.015532,\n", + " 'cg02273647': 0.098363,\n", + " 'cg02315513': 0.163021,\n", + " 'cg02372745': 0.844674,\n", + " 'cg02605776': 0.102264,\n", + " 'cg02632185': 0.219644,\n", + " 'cg02677192': 0.075650,\n", + " 'cg02695349': 0.014691,\n", + " 'cg02741548': -0.230018,\n", + " 'cg02773698': 0.154705,\n", + " 'cg02798181': -0.012452,\n", + " 'cg02813298': -0.021371,\n", + " 'cg02821342': -0.835542,\n", + " 'cg02869559': -0.178805,\n", + " 'cg02905830': -0.065400,\n", + " 'cg03039990': -0.112718,\n", + " 'cg03065311': 0.144953,\n", + " 'cg03080043': 0.027251,\n", + " 'cg03260211': 0.219926,\n", + " 'cg03292213': -0.095841,\n", + " 'cg03331387': 0.009966,\n", + " 'cg03404662': 0.240773,\n", + " 'cg04035728': -0.020864,\n", + " 'cg04193160': 0.189600,\n", + " 'cg04232649': -0.303726,\n", + " 'cg04425110': 0.039951,\n", + " 'cg04435719': -0.424840,\n", + " 'cg04461859': 0.035512,\n", + " 'cg04508804': 0.017993,\n", + " 'cg04568797': -0.162114,\n", + " 'cg04596060': -0.480476,\n", + " 'cg04768459': -0.195784,\n", + " 'cg04847146': -0.088714,\n", + " 'cg04875128': 0.333810,\n", + " 'cg04897804': 0.095390,\n", + " 'cg04946721': 0.132790,\n", + " 'cg04982874': 0.030834,\n", + " 'cg05147654': -0.012265,\n", + " 'cg05324516': -0.243302,\n", + " 'cg05396044': 1.052750,\n", + " 'cg05442902': -0.380687,\n", + " 'cg05473871': -0.195023,\n", + " 'cg05502081': -0.317073,\n", + " 'cg05527430': 0.228954,\n", + " 'cg05638500': 0.404251,\n", + " 'cg05675373': 0.052474,\n", + " 'cg05714496': -0.140482,\n", + " 'cg05760722': -0.169668,\n", + " 'cg05782445': 0.243627,\n", + " 'cg05790551': 0.689393,\n", + " 'cg05823563': -0.221066,\n", + " 'cg05826458': 0.063505,\n", + " 'cg05896926': 0.111656,\n", + " 'cg05915866': -0.233932,\n", + " 'cg06208270': -0.161155,\n", + " 'cg06279276': 0.001596,\n", + " 'cg06339542': -0.105458,\n", + " 'cg06375761': -0.013404,\n", + " 'cg06385324': 1.149900,\n", + " 'cg06460983': 0.242894,\n", + " 'cg06462666': -0.092600,\n", + " 'cg06526721': -0.016389,\n", + " 'cg06568490': 0.250606,\n", + " 'cg06685111': -0.449694,\n", + " 'cg06739520': 0.608522,\n", + " 'cg06766273': -0.442990,\n", + " 'cg06785746': 0.062742,\n", + " 'cg06836406': 0.176486,\n", + " 'cg06889422': -0.063114,\n", + " 'cg06943141': 0.077398,\n", + " 'cg06965409': -0.448577,\n", + " 'cg06975196': -0.177409,\n", + " 'cg07077115': -0.095823,\n", + " 'cg07082267': -0.237335,\n", + " 'cg07158339': -0.076210,\n", + " 'cg07292235': 0.012162,\n", + " 'cg07337598': 0.059696,\n", + " 'cg07477282': 1.446001,\n", + " 'cg07589899': 0.563002,\n", + " 'cg07739179': 0.034575,\n", + " 'cg07759394': -0.350554,\n", + " 'cg07955995': 0.287813,\n", + " 'cg08033031': -0.210983,\n", + " 'cg08046044': 0.234338,\n", + " 'cg08074084': 0.270753,\n", + " 'cg08097417': 2.583904,\n", + " 'cg08169949': 0.730853,\n", + " 'cg08279008': -0.334302,\n", + " 'cg08296037': -0.057226,\n", + " 'cg08356445': 0.145932,\n", + " 'cg08357125': -0.177629,\n", + " 'cg08482080': -0.361281,\n", + " 'cg08521677': -0.385882,\n", + " 'cg08857478': -0.231122,\n", + " 'cg08993878': -0.083171,\n", + " 'cg09036297': 0.219183,\n", + " 'cg09105193': 0.594248,\n", + " 'cg09214983': -0.150207,\n", + " 'cg09278098': -0.081549,\n", + " 'cg09281539': -0.130365,\n", + " 'cg09338875': -0.653133,\n", + " 'cg09406384': -0.873546,\n", + " 'cg09553982': 0.029792,\n", + " 'cg09624130': -0.162465,\n", + " 'cg09661809': 0.320360,\n", + " 'cg09692396': -0.493154,\n", + " 'cg09693106': -0.027079,\n", + " 'cg09809672': -0.153914,\n", + " 'cg09841001': -0.081456,\n", + " 'cg09846458': -0.189517,\n", + " 'cg09910998': -0.109396,\n", + " 'cg09962925': 0.512151,\n", + " 'cg09971754': -0.199292,\n", + " 'cg10064339': -0.140178,\n", + " 'cg10070101': -0.089134,\n", + " 'cg10118784': 0.281102,\n", + " 'cg10163222': -0.034615,\n", + " 'cg10192736': -0.270267,\n", + " 'cg10307580': -0.143452,\n", + " 'cg10315903': 0.051591,\n", + " 'cg10373645': -0.153072,\n", + " 'cg10381888': 0.196570,\n", + " 'cg10523019': 0.159604,\n", + " 'cg10552698': -0.376593,\n", + " 'cg10561067': -0.158230,\n", + " 'cg10596537': -0.055959,\n", + " 'cg10788210': 2.527910,\n", + " 'cg10838157': -0.176249,\n", + " 'cg10858686': -0.077274,\n", + " 'cg10933003': 0.163029,\n", + " 'cg10951117': 0.025426,\n", + " 'cg11051055': 0.201423,\n", + " 'cg11059561': 0.318553,\n", + " 'cg11098525': 0.337549,\n", + " 'cg11109027': -0.805256,\n", + " 'cg11126313': 0.209309,\n", + " 'cg11176990': 0.359544,\n", + " 'cg11401866': 0.105506,\n", + " 'cg11431402': 0.017453,\n", + " 'cg11527930': 0.085740,\n", + " 'cg11539664': 0.242090,\n", + " 'cg11539674': 0.085122,\n", + " 'cg11582617': -0.079856,\n", + " 'cg11789185': 0.337167,\n", + " 'cg11791577': -0.114446,\n", + " 'cg11825926': -0.271932,\n", + " 'cg11989576': 0.111367,\n", + " 'cg12309479': -0.205215,\n", + " 'cg12340166': 0.071403,\n", + " 'cg12366582': -0.686739,\n", + " 'cg12397924': -0.112834,\n", + " 'cg12497786': -0.103957,\n", + " 'cg12497883': 0.062538,\n", + " 'cg12709970': -0.600442,\n", + " 'cg12791555': 0.126912,\n", + " 'cg12810523': -0.133198,\n", + " 'cg12848614': -0.199842,\n", + " 'cg12850242': 0.042669,\n", + " 'cg12924510': -0.014322,\n", + " 'cg12948621': 0.167489,\n", + " 'cg12981137': 0.088587,\n", + " 'cg13085980': -0.065387,\n", + " 'cg13136596': -0.148944,\n", + " 'cg13204699': -0.071927,\n", + " 'cg13246426': 0.252035,\n", + " 'cg13251484': -0.454989,\n", + " 'cg13365340': -0.184025,\n", + " 'cg13443627': 0.059344,\n", + " 'cg13494498': 0.244286,\n", + " 'cg13575298': 0.291580,\n", + " 'cg13683374': -0.104213,\n", + " 'cg13697378': 0.083463,\n", + " 'cg13706079': 0.108472,\n", + " 'cg13720865': 0.041302,\n", + " 'cg13730743': -0.641688,\n", + " 'cg13785883': 0.945837,\n", + " 'cg13844899': -0.193918,\n", + " 'cg13854874': 0.079520,\n", + " 'cg13935689': 0.246708,\n", + " 'cg13983063': -0.192778,\n", + " 'cg14003022': -0.005578,\n", + " 'cg14034476': -0.103989,\n", + " 'cg14074174': -0.427476,\n", + " 'cg14140060': 0.032882,\n", + " 'cg14291650': -0.102791,\n", + " 'cg14295611': -0.297499,\n", + " 'cg14305711': -0.015526,\n", + " 'cg14331163': -0.149986,\n", + " 'cg14362004': 0.415994,\n", + " 'cg14424579': 1.327706,\n", + " 'cg14627089': 0.180821,\n", + " 'cg14782206': 0.119040,\n", + " 'cg14815005': -0.513939,\n", + " 'cg14829814': -0.064138,\n", + " 'cg14884160': -0.384053,\n", + " 'cg15118606': 0.170109,\n", + " 'cg15128510': 0.073551,\n", + " 'cg15159987': 0.321430,\n", + " 'cg15178202': -0.194715,\n", + " 'cg15208197': 0.132881,\n", + " 'cg15363134': -0.074360,\n", + " 'cg15548613': 0.082398,\n", + " 'cg15586420': -0.088739,\n", + " 'cg15820033': -0.028047,\n", + " 'cg15829826': -0.696270,\n", + " 'cg15851799': -0.076108,\n", + " 'cg15951188': 0.114012,\n", + " 'cg16026522': 0.182903,\n", + " 'cg16136840': -0.072940,\n", + " 'cg16241714': 0.901287,\n", + " 'cg16331674': 0.483335,\n", + " 'cg16370875': 0.391429,\n", + " 'cg16478145': 0.075640,\n", + " 'cg16485682': 0.127591,\n", + " 'cg16639637': -0.052944,\n", + " 'cg16703882': 0.525210,\n", + " 'cg16742481': -0.272142,\n", + " 'cg16750275': -0.189936,\n", + " 'cg16837441': -0.029999,\n", + " 'cg16932827': -0.223159,\n", + " 'cg17121412': -0.100472,\n", + " 'cg17152981': 0.168534,\n", + " 'cg17317023': 0.116863,\n", + " 'cg17713912': 0.124633,\n", + " 'cg17758721': 0.738194,\n", + " 'cg17796960': 0.037607,\n", + " 'cg17802633': -0.145015,\n", + " 'cg17804348': 0.157616,\n", + " 'cg17892169': -0.090332,\n", + " 'cg18087943': -0.122351,\n", + " 'cg18113084': -0.643732,\n", + " 'cg18147606': -0.241852,\n", + " 'cg18153869': 0.132170,\n", + " 'cg18336453': 0.275552,\n", + " 'cg18382305': -0.146864,\n", + " 'cg18400181': -0.476893,\n", + " 'cg18417423': 0.132226,\n", + " 'cg18424208': 0.104091,\n", + " 'cg18575740': 0.060317,\n", + " 'cg18601714': 0.110121,\n", + " 'cg18635432': 0.028141,\n", + " 'cg18644787': -0.756996,\n", + " 'cg18781966': -0.193718,\n", + " 'cg18832152': -0.596254,\n", + " 'cg18847227': 0.028334,\n", + " 'cg18881501': -0.089093,\n", + " 'cg18933331': -0.388073,\n", + " 'cg19021197': 0.229413,\n", + " 'cg19065177': 0.101292,\n", + " 'cg19216851': 0.131758,\n", + " 'cg19283196': 0.175860,\n", + " 'cg19283806': -0.281685,\n", + " 'cg19590421': -0.065829,\n", + " 'cg19635712': 0.162209,\n", + " 'cg19668234': -0.040411,\n", + " 'cg19711783': -0.113758,\n", + " 'cg19761273': -0.108853,\n", + " 'cg19801705': 0.051839,\n", + " 'cg20011402': -0.257504,\n", + " 'cg20149168': 0.149040,\n", + " 'cg20222376': -0.106672,\n", + " 'cg20234855': 0.244570,\n", + " 'cg20386580': 0.094042,\n", + " 'cg20422417': 0.122609,\n", + " 'cg20434178': 0.341773,\n", + " 'cg20515136': -0.334444,\n", + " 'cg20559403': 0.249909,\n", + " 'cg20631204': -0.115376,\n", + " 'cg20665157': 0.099555,\n", + " 'cg20686554': 0.041199,\n", + " 'cg20744625': 0.520346,\n", + " 'cg20761322': 0.066527,\n", + " 'cg20912517': -0.185820,\n", + " 'cg21038957': -0.103066,\n", + " 'cg21159778': 0.090493,\n", + " 'cg21165519': -0.205365,\n", + " 'cg21186299': 2.029120,\n", + " 'cg21200656': 0.246572,\n", + " 'cg21213853': 0.222802,\n", + " 'cg21281007': 0.224273,\n", + " 'cg21495653': 0.222406,\n", + " 'cg21610904': -0.027103,\n", + " 'cg21824162': -0.665451,\n", + " 'cg21828345': 0.070299,\n", + " 'cg21988244': 0.031230,\n", + " 'cg22158769': 0.107564,\n", + " 'cg22266749': -0.100535,\n", + " 'cg22268271': -0.453787,\n", + " 'cg22517995': 0.354932,\n", + " 'cg22527345': 0.231810,\n", + " 'cg22639325': -0.072690,\n", + " 'cg22682373': 0.267342,\n", + " 'cg22697239': 0.176375,\n", + " 'cg22796704': -0.280375,\n", + " 'cg22849665': -0.217533,\n", + " 'cg22943590': -0.239895,\n", + " 'cg22947322': -0.052930,\n", + " 'cg23078123': -0.242301,\n", + " 'cg23104823': -0.613725,\n", + " 'cg23125200': -0.033819,\n", + " 'cg23347399': 0.013117,\n", + " 'cg23684204': 0.104843,\n", + " 'cg23715237': -0.231238,\n", + " 'cg23744638': -0.129161,\n", + " 'cg23956238': 0.533778,\n", + " 'cg24174665': 0.187453,\n", + " 'cg24350475': -0.101080,\n", + " 'cg24398933': -0.054795,\n", + " 'cg24408436': 0.156341,\n", + " 'cg24429836': -0.100258,\n", + " 'cg24489983': -0.115641,\n", + " 'cg24510518': -0.035831,\n", + " 'cg24515575': 0.267510,\n", + " 'cg24524451': -0.054761,\n", + " 'cg24667575': 0.242073,\n", + " 'cg24672014': 0.054554,\n", + " 'cg24686644': -0.508029,\n", + " 'cg24707573': 0.045543,\n", + " 'cg24871743': -0.002360,\n", + " 'cg24893035': 0.047359,\n", + " 'cg25052374': -0.015616,\n", + " 'cg25067162': 0.029022,\n", + " 'cg25108022': 0.341638,\n", + " 'cg25129541': -0.152400,\n", + " 'cg25267487': 0.047397,\n", + " 'cg25698236': -0.122299,\n", + " 'cg25711358': -0.106817,\n", + " 'cg25771195': 0.164269,\n", + " 'cg25909396': -0.275267,\n", + " 'cg26002713': -0.164871,\n", + " 'cg26116103': -0.265104,\n", + " 'cg26256771': -0.302967,\n", + " 'cg26311208': -0.022138,\n", + " 'cg26748578': -0.069656,\n", + " 'cg26765295': 0.048243,\n", + " 'cg26962778': 0.083161,\n", + " 'cg27239168': 0.270015,\n", + " 'cg27361964': -0.054397,\n", + " 'cg27651143': 0.235585,\n", + "}\n", + "intercept = -2.349778" + ] + }, + { + "cell_type": "markdown", + "id": "5035b180-3d1b-4432-8ebe-b9c92bd93a7f", + "metadata": {}, + "source": [ + "## Load features" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "77face1a-b58f-4f8f-9fe8-1f12037be99a", + "metadata": {}, + "outputs": [], + "source": [ + "model.features = list(coefficient_dict.keys())" + ] + }, + { + "cell_type": "markdown", + "id": "ee6d8fa0-4767-4c45-9717-eb1c95e2ddc0", + "metadata": {}, + "source": [ + "## Load weights into base model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a5ffad66-d948-4c78-aaf2-5809a4b42fa7", + "metadata": {}, + "outputs": [], + "source": [ + "weights = torch.tensor(list(coefficient_dict.values())).unsqueeze(0)\n", + "intercept = torch.tensor([intercept])" + ] + }, + { + "cell_type": "markdown", + "id": "ad261636-5b00-4979-bb1d-67a851f7aa19", + "metadata": {}, + "source": [ + "#### Linear model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d7f43b99-26f2-4622-9a76-316712058877", + "metadata": {}, + "outputs": [], + "source": [ + "base_model = pya.models.LinearModel(input_dim=len(model.features))\n", + "\n", + "base_model.linear.weight.data = weights.float()\n", + "base_model.linear.bias.data = intercept.float()\n", + "\n", + "model.base_model = base_model" + ] + }, + { + "cell_type": "markdown", + "id": "ad8b4c1d-9d57-48b7-9a30-bcfea7b747b1", + "metadata": {}, + "source": [ + "## Load reference values" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ade0f4c9-2298-4fc3-bb72-d200907dd731", + "metadata": {}, + "outputs": [], + "source": [ + "model.reference_values = None" + ] + }, + { + "cell_type": "markdown", + "id": "af3bcf7b-74a8-4d21-9ccb-4de0c2b0516b", + "metadata": {}, + "source": [ + "## Load preprocess and postprocess objects" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "79a1b3a2-00f1-42b1-9fcd-f919343391d7", + "metadata": {}, + "outputs": [], + "source": [ + "model.preprocess_name = None\n", + "model.preprocess_dependencies = None" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ff4a21cb-cf41-44dc-9ed1-95cf8aa15772", + "metadata": {}, + "outputs": [], + "source": [ + "model.postprocess_name = 'anti_log_linear'\n", + "model.postprocess_dependencies = None" + ] + }, + { + "cell_type": "markdown", + "id": "86e3d6b1-e67e-4f3d-bd39-0ebec5726c3c", + "metadata": {}, + "source": [ + "## Check all clock parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2168355c-47d9-475d-b816-49f65e74887c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "%==================================== Model Details ====================================%\n", + "Model Attributes:\n", + "\n", + "training: True\n", + "metadata: {'approved_by_author': '⌛',\n", + " 'citation': 'Tomusiak, Alan, et al. \"Development of an epigenetic clock '\n", + " 'resistant to changes in immune cell composition.\" Communications '\n", + " 'Biology 7.1 (2024): 934.',\n", + " 'clock_name': 'intrinclock',\n", + " 'data_type': 'methylation',\n", + " 'doi': 'https://doi.org/10.1038/s42003-024-06609-4',\n", + " 'notes': None,\n", + " 'research_only': None,\n", + " 'species': 'Homo sapiens',\n", + " 'version': None,\n", + " 'year': 2024}\n", + "reference_values: None\n", + "preprocess_name: None\n", + "preprocess_dependencies: None\n", + "postprocess_name: 'anti_log_linear'\n", + "postprocess_dependencies: None\n", + "features: ['cg00054496', 'cg00055555', 'cg00088042', 'cg00107606', 'cg00359604', 'cg00448707', 'cg00454305', 'cg00480331', 'cg00491255', 'cg00579423', 'cg00583733', 'cg00588393', 'cg00590036', 'cg00593462', 'cg00716277', 'cg00734800', 'cg00751156', 'cg00753885', 'cg00790928', 'cg00792123', 'cg00796360', 'cg01028142', 'cg01080924', 'cg01131866', 'cg01153342', 'cg01176694', 'cg01260146', 'cg01410876', 'cg01426818', 'cg01447660']... [Total elements: 380]\n", + "base_model_features: None\n", + "\n", + "%==================================== Model Details ====================================%\n", + "Model Structure:\n", + "\n", + "base_model: LinearModel(\n", + " (linear): Linear(in_features=380, out_features=1, bias=True)\n", + ")\n", + "\n", + "%==================================== Model Details ====================================%\n", + "Model Parameters and Weights:\n", + "\n", + "base_model.linear.weight: [0.629764974117279, 0.0004780000017490238, 0.5425040125846863, -0.6023579835891724, -0.20740799605846405, 0.10132099688053131, 0.2141769975423813, 0.27955299615859985, 0.23592300713062286, -0.023260999470949173, -0.1878580003976822, 0.3060399889945984, 0.9469850063323975, 0.33906999230384827, -0.11784300208091736, -0.2824240028858185, 0.8952140212059021, -0.46393999457359314, -0.3338710069656372, -0.45566999912261963, -0.040681999176740646, 0.24853700399398804, -0.06306999921798706, -0.14341700077056885, -0.434237003326416, -0.16436100006103516, -0.68572998046875, 0.0009899999713525176, 0.09043300151824951, -0.37791499495506287]... [Tensor of shape torch.Size([1, 380])]\n", + "base_model.linear.bias: tensor([-2.3498])\n", + "\n", + "%==================================== Model Details ====================================%\n", + "\n" + ] + } + ], + "source": [ + "pya.utils.print_model_details(model)" + ] + }, + { + "cell_type": "markdown", + "id": "986d0262-e0c7-4036-b687-dee53ba392fb", + "metadata": {}, + "source": [ + "## Basic test" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "352cffb0-c5a8-4c82-8f61-fce35baf5a22", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ -0.9125],\n", + " [ 11.0212],\n", + " [ 29.7538],\n", + " [ 18.8043],\n", + " [ -1.0000],\n", + " [118.1667],\n", + " [ -0.9992],\n", + " [172.2491],\n", + " [ -0.9376],\n", + " [ 55.5021]], dtype=torch.float64, grad_fn=)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.manual_seed(42)\n", + "input = torch.randn(10, len(model.features), dtype=float)\n", + "model.eval()\n", + "model.to(float)\n", + "pred = model(input)\n", + "pred" + ] + }, + { + "cell_type": "markdown", + "id": "fe8299d7-9285-4e22-82fd-b664434b4369", + "metadata": {}, + "source": [ + "## Save torch model" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "0c3a2d80-1b5f-458a-926c-cbc0aa9416e1", + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(model, f\"../weights/{model.metadata['clock_name']}.pt\")" + ] + }, + { + "cell_type": "markdown", + "id": "bac6257b-8d08-4a90-8d0b-7f745dc11ac1", + "metadata": {}, + "source": [ + "## Clear directory\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "11aeaa70-44c0-42f9-86d7-740e3849a7a6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deleted folder: .ipynb_checkpoints\n" + ] + } + ], + "source": [ + "# Function to remove a folder and all its contents\n", + "def remove_folder(path):\n", + " try:\n", + " shutil.rmtree(path)\n", + " print(f\"Deleted folder: {path}\")\n", + " except Exception as e:\n", + " print(f\"Error deleting folder {path}: {e}\")\n", + "\n", + "# Get a list of all files and folders in the current directory\n", + "all_items = os.listdir('.')\n", + "\n", + "# Loop through the items\n", + "for item in all_items:\n", + " # Check if it's a file and does not end with .ipynb\n", + " if os.path.isfile(item) and not item.endswith('.ipynb'):\n", + " os.remove(item)\n", + " print(f\"Deleted file: {item}\")\n", + " # Check if it's a folder\n", + " elif os.path.isdir(item):\n", + " remove_folder(item)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/clocks/notebooks/retroelementagev1.ipynb b/clocks/notebooks/retroelementagev1.ipynb new file mode 100644 index 0000000..e8b9b06 --- /dev/null +++ b/clocks/notebooks/retroelementagev1.ipynb @@ -0,0 +1,481 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2f04eee0-5928-4e74-a754-6dc2e528810c", + "metadata": {}, + "source": [ + "# RetroelementAgeV1" + ] + }, + { + "cell_type": "markdown", + "id": "a3f514a3-772c-4a14-afdf-5a8376851ff4", + "metadata": {}, + "source": [ + "## Index\n", + "1. [Instantiate model class](#Instantiate-model-class)\n", + "2. [Define clock metadata](#Define-clock-metadata)\n", + "3. [Download clock dependencies](#Download-clock-dependencies)\n", + "5. [Load features](#Load-features)\n", + "6. [Load weights into base model](#Load-weights-into-base-model)\n", + "7. [Load reference values](#Load-reference-values)\n", + "8. [Load preprocess and postprocess objects](#Load-preprocess-and-postprocess-objects)\n", + "10. [Check all clock parameters](#Check-all-clock-parameters)\n", + "10. [Basic test](#Basic-test)\n", + "11. [Save torch model](#Save-torch-model)\n", + "12. [Clear directory](#Clear-directory)" + ] + }, + { + "cell_type": "markdown", + "id": "d95fafdc-643a-40ea-a689-200bd132e90c", + "metadata": {}, + "source": [ + "Let's first import some packages:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4adfb4de-cd79-4913-a1af-9e23e9e236c9", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import inspect\n", + "import shutil\n", + "import json\n", + "import torch\n", + "import pandas as pd\n", + "import pyaging as pya" + ] + }, + { + "cell_type": "markdown", + "id": "145082e5-ced4-47ae-88c0-cb69773e3c5a", + "metadata": {}, + "source": [ + "## Instantiate model class" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8aa77372-7ed3-4da7-abc9-d30372106139", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "class RetroelementAgeV1(pyagingModel):\n", + " def __init__(self):\n", + " super().__init__()\n", + "\n", + " def preprocess(self, x):\n", + " return x\n", + "\n", + " def postprocess(self, x):\n", + " return x\n", + "\n" + ] + } + ], + "source": [ + "def print_entire_class(cls):\n", + " source = inspect.getsource(cls)\n", + " print(source)\n", + "\n", + "print_entire_class(pya.models.RetroelementAgeV1)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "78536494-f1d9-44de-8583-c89a310d2307", + "metadata": {}, + "outputs": [], + "source": [ + "model = pya.models.RetroelementAgeV1()" + ] + }, + { + "cell_type": "markdown", + "id": "51f8615e-01fa-4aa5-b196-3ee2b35d261c", + "metadata": {}, + "source": [ + "## Define clock metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6601da9e-8adc-44ee-9308-75e3cd31b816", + "metadata": {}, + "outputs": [], + "source": [ + "model.metadata[\"clock_name\"] = 'retroelementagev1'\n", + "model.metadata[\"data_type\"] = 'methylation'\n", + "model.metadata[\"species\"] = 'Homo sapiens'\n", + "model.metadata[\"year\"] = 2024\n", + "model.metadata[\"approved_by_author\"] = '⌛'\n", + "model.metadata[\"citation\"] = \"Ndhlovu, Lishomwa C., et al. \\\"Retro‐age: A unique epigenetic biomarker of aging captured by DNA methylation states of retroelements.\\\" Aging Cell (2024): e14288.\",\n", + "model.metadata[\"doi\"] = \"https://doi.org/10.1111/acel.14288\"\n", + "model.metadata[\"research_only\"] = None\n", + "model.metadata[\"notes\"] = None" + ] + }, + { + "cell_type": "markdown", + "id": "74492239-5aae-4026-9d90-6bc9c574c110", + "metadata": {}, + "source": [ + "## Download clock dependencies" + ] + }, + { + "cell_type": "markdown", + "id": "fbacbbc5-7cf1-41bd-81c4-45adde992de6", + "metadata": {}, + "source": [ + "#### Download directly with curl" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "348e113d-a00a-481d-84ac-e8459a4a5050", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "supplementary_url = \"https://zenodo.org/records/11099870/files/Retroelement_AgeV1coefficients.csv?download=1\"\n", + "supplementary_file_name = \"coefficients.csv\"\n", + "os.system(f\"curl -o {supplementary_file_name} {supplementary_url}\")" + ] + }, + { + "cell_type": "markdown", + "id": "5035b180-3d1b-4432-8ebe-b9c92bd93a7f", + "metadata": {}, + "source": [ + "## Load features" + ] + }, + { + "cell_type": "markdown", + "id": "793d7b24-b1ef-4dd2-ac26-079f7b67fba7", + "metadata": {}, + "source": [ + "#### From CSV file" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "110a5ded-d25f-4cef-8e84-4f51210dfc26", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('coefficients.csv', index_col=0)\n", + "df['feature'] = df['name']\n", + "model.features = df['feature'].tolist()[1:]" + ] + }, + { + "cell_type": "markdown", + "id": "ee6d8fa0-4767-4c45-9717-eb1c95e2ddc0", + "metadata": {}, + "source": [ + "## Load weights into base model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e09b3463-4fd4-41b1-ac21-e63ddd223fe0", + "metadata": {}, + "outputs": [], + "source": [ + "weights = torch.tensor(df['coefficient'].tolist()[1:]).unsqueeze(0)\n", + "intercept = torch.tensor([df['coefficient'].tolist()[0]])" + ] + }, + { + "cell_type": "markdown", + "id": "ad261636-5b00-4979-bb1d-67a851f7aa19", + "metadata": {}, + "source": [ + "#### Linear model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d7f43b99-26f2-4622-9a76-316712058877", + "metadata": {}, + "outputs": [], + "source": [ + "base_model = pya.models.LinearModel(input_dim=len(model.features))\n", + "\n", + "base_model.linear.weight.data = weights.float()\n", + "base_model.linear.bias.data = intercept.float()\n", + "\n", + "model.base_model = base_model" + ] + }, + { + "cell_type": "markdown", + "id": "ad8b4c1d-9d57-48b7-9a30-bcfea7b747b1", + "metadata": {}, + "source": [ + "## Load reference values" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ade0f4c9-2298-4fc3-bb72-d200907dd731", + "metadata": {}, + "outputs": [], + "source": [ + "model.reference_values = None" + ] + }, + { + "cell_type": "markdown", + "id": "af3bcf7b-74a8-4d21-9ccb-4de0c2b0516b", + "metadata": {}, + "source": [ + "## Load preprocess and postprocess objects" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7a22fb20-c605-424d-8efb-7620c2c0755c", + "metadata": {}, + "outputs": [], + "source": [ + "model.preprocess_name = None\n", + "model.preprocess_dependencies = None" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ff4a21cb-cf41-44dc-9ed1-95cf8aa15772", + "metadata": {}, + "outputs": [], + "source": [ + "model.postprocess_name = None\n", + "model.postprocess_dependencies = None" + ] + }, + { + "cell_type": "markdown", + "id": "86e3d6b1-e67e-4f3d-bd39-0ebec5726c3c", + "metadata": {}, + "source": [ + "## Check all clock parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2168355c-47d9-475d-b816-49f65e74887c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "%==================================== Model Details ====================================%\n", + "Model Attributes:\n", + "\n", + "training: True\n", + "metadata: {'approved_by_author': '⌛',\n", + " 'citation': ('Ndhlovu, Lishomwa C., et al. \"Retro‐age: A unique epigenetic '\n", + " 'biomarker of aging captured by DNA methylation states of '\n", + " 'retroelements.\" Aging Cell (2024): e14288.',),\n", + " 'clock_name': 'retroelementagev1',\n", + " 'data_type': 'methylation',\n", + " 'doi': 'https://doi.org/10.1111/acel.14288',\n", + " 'notes': None,\n", + " 'research_only': None,\n", + " 'species': 'Homo sapiens',\n", + " 'version': None,\n", + " 'year': 2024}\n", + "reference_values: None\n", + "preprocess_name: None\n", + "preprocess_dependencies: None\n", + "postprocess_name: None\n", + "postprocess_dependencies: None\n", + "features: ['cg22006408', 'cg25231957', 'cg18158970', 'cg19246944', 'cg18515940', 'cg02688359', 'cg12451887', 'cg10621809', 'cg25811448', 'cg06655097', 'cg12786023', 'cg06922212', 'cg16905434', 'cg15851164', 'cg06059332', 'cg04134193', 'cg24634009', 'cg06159896', 'cg21814996', 'cg27433764', 'cg07416187', 'cg00995689', 'cg13292984', 'cg12121166', 'cg11544647', 'cg25292140', 'cg10960147', 'cg10136168', 'cg16912512', 'cg15366524']... [Total elements: 1317]\n", + "base_model_features: None\n", + "\n", + "%==================================== Model Details ====================================%\n", + "Model Structure:\n", + "\n", + "base_model: LinearModel(\n", + " (linear): Linear(in_features=1317, out_features=1, bias=True)\n", + ")\n", + "\n", + "%==================================== Model Details ====================================%\n", + "Model Parameters and Weights:\n", + "\n", + "base_model.linear.weight: [2.552152395248413, 1.2862745523452759, 3.9165291786193848, 7.276383876800537, -0.7971858978271484, -0.7952958345413208, 2.0488688945770264, 0.4890359044075012, 0.3316255509853363, 0.11408211290836334, -0.19781948626041412, 0.08929739892482758, 0.6588783264160156, 0.10985901951789856, -0.2538025975227356, -0.11299397051334381, -5.241518020629883, 0.6423152089118958, -10.09760856628418, -0.17259803414344788, -3.927119731903076, 0.06528078764677048, 0.03197569400072098, -2.1564865112304688, 0.19085198640823364, -6.1964287757873535, 1.167095422744751, 3.6051061153411865, -1.0168722867965698, -0.029270412400364876]... [Tensor of shape torch.Size([1, 1317])]\n", + "base_model.linear.bias: tensor([78.1362])\n", + "\n", + "%==================================== Model Details ====================================%\n", + "\n" + ] + } + ], + "source": [ + "pya.utils.print_model_details(model)" + ] + }, + { + "cell_type": "markdown", + "id": "986d0262-e0c7-4036-b687-dee53ba392fb", + "metadata": {}, + "source": [ + "## Basic test" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "936b9877-d076-4ced-99aa-e8d4c58c5caf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[159.9747],\n", + " [ -3.6259],\n", + " [142.4391],\n", + " [ 5.3667],\n", + " [114.5559],\n", + " [ 30.8754],\n", + " [-16.2213],\n", + " [230.4298],\n", + " [ 50.1820],\n", + " [113.8849]], dtype=torch.float64, grad_fn=)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.manual_seed(42)\n", + "input = torch.randn(10, len(model.features), dtype=float)\n", + "model.eval()\n", + "model.to(float)\n", + "pred = model(input)\n", + "pred" + ] + }, + { + "cell_type": "markdown", + "id": "fe8299d7-9285-4e22-82fd-b664434b4369", + "metadata": {}, + "source": [ + "## Save torch model" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5ef2fa8d-c80b-4fdd-8555-79c0d541788e", + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(model, f\"../weights/{model.metadata['clock_name']}.pt\")" + ] + }, + { + "cell_type": "markdown", + "id": "bac6257b-8d08-4a90-8d0b-7f745dc11ac1", + "metadata": {}, + "source": [ + "## Clear directory\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "11aeaa70-44c0-42f9-86d7-740e3849a7a6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deleted file: coefficients.csv\n", + "Deleted folder: .ipynb_checkpoints\n" + ] + } + ], + "source": [ + "# Function to remove a folder and all its contents\n", + "def remove_folder(path):\n", + " try:\n", + " shutil.rmtree(path)\n", + " print(f\"Deleted folder: {path}\")\n", + " except Exception as e:\n", + " print(f\"Error deleting folder {path}: {e}\")\n", + "\n", + "# Get a list of all files and folders in the current directory\n", + "all_items = os.listdir('.')\n", + "\n", + "# Loop through the items\n", + "for item in all_items:\n", + " # Check if it's a file and does not end with .ipynb\n", + " if os.path.isfile(item) and not item.endswith('.ipynb'):\n", + " os.remove(item)\n", + " print(f\"Deleted file: {item}\")\n", + " # Check if it's a folder\n", + " elif os.path.isdir(item):\n", + " remove_folder(item)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/clocks/notebooks/retroelementagev2.ipynb b/clocks/notebooks/retroelementagev2.ipynb new file mode 100644 index 0000000..bb24baf --- /dev/null +++ b/clocks/notebooks/retroelementagev2.ipynb @@ -0,0 +1,480 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2f04eee0-5928-4e74-a754-6dc2e528810c", + "metadata": {}, + "source": [ + "# RetroelementAgeV2" + ] + }, + { + "cell_type": "markdown", + "id": "a3f514a3-772c-4a14-afdf-5a8376851ff4", + "metadata": {}, + "source": [ + "## Index\n", + "1. [Instantiate model class](#Instantiate-model-class)\n", + "2. [Define clock metadata](#Define-clock-metadata)\n", + "3. [Download clock dependencies](#Download-clock-dependencies)\n", + "5. [Load features](#Load-features)\n", + "6. [Load weights into base model](#Load-weights-into-base-model)\n", + "7. [Load reference values](#Load-reference-values)\n", + "8. [Load preprocess and postprocess objects](#Load-preprocess-and-postprocess-objects)\n", + "10. [Check all clock parameters](#Check-all-clock-parameters)\n", + "10. [Basic test](#Basic-test)\n", + "11. [Save torch model](#Save-torch-model)\n", + "12. [Clear directory](#Clear-directory)" + ] + }, + { + "cell_type": "markdown", + "id": "d95fafdc-643a-40ea-a689-200bd132e90c", + "metadata": {}, + "source": [ + "Let's first import some packages:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4adfb4de-cd79-4913-a1af-9e23e9e236c9", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import inspect\n", + "import shutil\n", + "import json\n", + "import torch\n", + "import pandas as pd\n", + "import pyaging as pya" + ] + }, + { + "cell_type": "markdown", + "id": "145082e5-ced4-47ae-88c0-cb69773e3c5a", + "metadata": {}, + "source": [ + "## Instantiate model class" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8aa77372-7ed3-4da7-abc9-d30372106139", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "class RetroelementAgeV2(pyagingModel):\n", + " def __init__(self):\n", + " super().__init__()\n", + "\n", + " def preprocess(self, x):\n", + " return x\n", + "\n", + " def postprocess(self, x):\n", + " return x\n", + "\n" + ] + } + ], + "source": [ + "def print_entire_class(cls):\n", + " source = inspect.getsource(cls)\n", + " print(source)\n", + "\n", + "print_entire_class(pya.models.RetroelementAgeV2)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "78536494-f1d9-44de-8583-c89a310d2307", + "metadata": {}, + "outputs": [], + "source": [ + "model = pya.models.RetroelementAgeV2()" + ] + }, + { + "cell_type": "markdown", + "id": "51f8615e-01fa-4aa5-b196-3ee2b35d261c", + "metadata": {}, + "source": [ + "## Define clock metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6601da9e-8adc-44ee-9308-75e3cd31b816", + "metadata": {}, + "outputs": [], + "source": [ + "model.metadata[\"clock_name\"] = 'retroelementagev2'\n", + "model.metadata[\"data_type\"] = 'methylation'\n", + "model.metadata[\"species\"] = 'Homo sapiens'\n", + "model.metadata[\"year\"] = 2024\n", + "model.metadata[\"approved_by_author\"] = '⌛'\n", + "model.metadata[\"citation\"] = \"Ndhlovu, Lishomwa C., et al. \\\"Retro‐age: A unique epigenetic biomarker of aging captured by DNA methylation states of retroelements.\\\" Aging Cell (2024): e14288.\",\n", + "model.metadata[\"doi\"] = \"https://doi.org/10.1111/acel.14288\"\n", + "model.metadata[\"research_only\"] = None\n", + "model.metadata[\"notes\"] = None" + ] + }, + { + "cell_type": "markdown", + "id": "74492239-5aae-4026-9d90-6bc9c574c110", + "metadata": {}, + "source": [ + "## Download clock dependencies" + ] + }, + { + "cell_type": "markdown", + "id": "fbacbbc5-7cf1-41bd-81c4-45adde992de6", + "metadata": {}, + "source": [ + "#### Download directly with curl" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "348e113d-a00a-481d-84ac-e8459a4a5050", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "supplementary_url = \"https://zenodo.org/records/11099870/files/Retroelement_AgeV2coefficients.csv?download=1\"\n", + "supplementary_file_name = \"coefficients.csv\"\n", + "os.system(f\"curl -o {supplementary_file_name} {supplementary_url}\")" + ] + }, + { + "cell_type": "markdown", + "id": "5035b180-3d1b-4432-8ebe-b9c92bd93a7f", + "metadata": {}, + "source": [ + "## Load features" + ] + }, + { + "cell_type": "markdown", + "id": "793d7b24-b1ef-4dd2-ac26-079f7b67fba7", + "metadata": {}, + "source": [ + "#### From CSV file" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "110a5ded-d25f-4cef-8e84-4f51210dfc26", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('coefficients.csv', index_col=0)\n", + "df['feature'] = df['name']\n", + "model.features = df['feature'].tolist()[1:]" + ] + }, + { + "cell_type": "markdown", + "id": "ee6d8fa0-4767-4c45-9717-eb1c95e2ddc0", + "metadata": {}, + "source": [ + "## Load weights into base model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e09b3463-4fd4-41b1-ac21-e63ddd223fe0", + "metadata": {}, + "outputs": [], + "source": [ + "weights = torch.tensor(df['coefficient'].tolist()[1:]).unsqueeze(0)\n", + "intercept = torch.tensor([df['coefficient'].tolist()[0]])" + ] + }, + { + "cell_type": "markdown", + "id": "ad261636-5b00-4979-bb1d-67a851f7aa19", + "metadata": {}, + "source": [ + "#### Linear model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d7f43b99-26f2-4622-9a76-316712058877", + "metadata": {}, + "outputs": [], + "source": [ + "base_model = pya.models.LinearModel(input_dim=len(model.features))\n", + "\n", + "base_model.linear.weight.data = weights.float()\n", + "base_model.linear.bias.data = intercept.float()\n", + "\n", + "model.base_model = base_model" + ] + }, + { + "cell_type": "markdown", + "id": "ad8b4c1d-9d57-48b7-9a30-bcfea7b747b1", + "metadata": {}, + "source": [ + "## Load reference values" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ade0f4c9-2298-4fc3-bb72-d200907dd731", + "metadata": {}, + "outputs": [], + "source": [ + "model.reference_values = None" + ] + }, + { + "cell_type": "markdown", + "id": "af3bcf7b-74a8-4d21-9ccb-4de0c2b0516b", + "metadata": {}, + "source": [ + "## Load preprocess and postprocess objects" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7a22fb20-c605-424d-8efb-7620c2c0755c", + "metadata": {}, + "outputs": [], + "source": [ + "model.preprocess_name = None\n", + "model.preprocess_dependencies = None" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ff4a21cb-cf41-44dc-9ed1-95cf8aa15772", + "metadata": {}, + "outputs": [], + "source": [ + "model.postprocess_name = None\n", + "model.postprocess_dependencies = None" + ] + }, + { + "cell_type": "markdown", + "id": "86e3d6b1-e67e-4f3d-bd39-0ebec5726c3c", + "metadata": {}, + "source": [ + "## Check all clock parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2168355c-47d9-475d-b816-49f65e74887c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "%==================================== Model Details ====================================%\n", + "Model Attributes:\n", + "\n", + "training: True\n", + "metadata: {'approved_by_author': '⌛',\n", + " 'citation': ('Ndhlovu, Lishomwa C., et al. \"Retro‐age: A unique epigenetic '\n", + " 'biomarker of aging captured by DNA methylation states of '\n", + " 'retroelements.\" Aging Cell (2024): e14288.',),\n", + " 'clock_name': 'retroelementagev2',\n", + " 'data_type': 'methylation',\n", + " 'doi': 'https://doi.org/10.1111/acel.14288',\n", + " 'notes': None,\n", + " 'research_only': None,\n", + " 'species': 'Homo sapiens',\n", + " 'version': None,\n", + " 'year': 2024}\n", + "reference_values: None\n", + "preprocess_name: None\n", + "preprocess_dependencies: None\n", + "postprocess_name: None\n", + "postprocess_dependencies: None\n", + "features: ['cg23728960', 'cg10366786', 'cg08398371', 'cg10973967', 'cg01721313', 'cg25231957', 'cg21992844', 'cg24646803', 'cg16037947', 'cg03468827', 'cg18939097', 'cg01740695', 'cg25593520', 'cg11120551', 'cg01403320', 'cg07821808', 'cg19796584', 'cg27422507', 'cg16033766', 'cg03667422', 'cg18369325', 'cg09373192', 'cg00700455', 'cg20923885', 'cg07644039', 'cg08506585', 'cg12744300', 'cg20051117', 'cg04765439', 'cg03320087']... [Total elements: 1378]\n", + "base_model_features: None\n", + "\n", + "%==================================== Model Details ====================================%\n", + "Model Structure:\n", + "\n", + "base_model: LinearModel(\n", + " (linear): Linear(in_features=1378, out_features=1, bias=True)\n", + ")\n", + "\n", + "%==================================== Model Details ====================================%\n", + "Model Parameters and Weights:\n", + "\n", + "base_model.linear.weight: [0.14868102967739105, 0.4319063127040863, -3.2955737113952637, 2.740462303161621, 0.8276747465133667, 2.5161685943603516, -1.4428094625473022, 0.7483506798744202, 0.18299150466918945, -0.6927100419998169, 0.17969781160354614, -0.10440022498369217, -1.5772448778152466, -0.6229540705680847, 0.6904758214950562, 0.16339100897312164, 0.9191771745681763, 3.9579126834869385, 0.0863616093993187, 0.5412383079528809, 0.45451620221138, 0.6237431168556213, 1.284313678741455, 0.9083895683288574, -0.12913724780082703, -0.03379112854599953, 2.2048282623291016, 3.126389503479004, -4.167088508605957, 2.5030527114868164]... [Tensor of shape torch.Size([1, 1378])]\n", + "base_model.linear.bias: tensor([62.0742])\n", + "\n", + "%==================================== Model Details ====================================%\n", + "\n" + ] + } + ], + "source": [ + "pya.utils.print_model_details(model)" + ] + }, + { + "cell_type": "markdown", + "id": "986d0262-e0c7-4036-b687-dee53ba392fb", + "metadata": {}, + "source": [ + "## Basic test" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "936b9877-d076-4ced-99aa-e8d4c58c5caf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 65.4386],\n", + " [ 64.7318],\n", + " [ 26.8192],\n", + " [ 15.2899],\n", + " [ 45.2880],\n", + " [ 31.0392],\n", + " [ 54.1099],\n", + " [ 18.1802],\n", + " [128.9012],\n", + " [ 50.0440]], dtype=torch.float64, grad_fn=)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.manual_seed(42)\n", + "input = torch.randn(10, len(model.features), dtype=float)\n", + "model.eval()\n", + "model.to(float)\n", + "pred = model(input)\n", + "pred" + ] + }, + { + "cell_type": "markdown", + "id": "fe8299d7-9285-4e22-82fd-b664434b4369", + "metadata": {}, + "source": [ + "## Save torch model" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5ef2fa8d-c80b-4fdd-8555-79c0d541788e", + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(model, f\"../weights/{model.metadata['clock_name']}.pt\")" + ] + }, + { + "cell_type": "markdown", + "id": "bac6257b-8d08-4a90-8d0b-7f745dc11ac1", + "metadata": {}, + "source": [ + "## Clear directory\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "11aeaa70-44c0-42f9-86d7-740e3849a7a6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deleted file: coefficients.csv\n" + ] + } + ], + "source": [ + "# Function to remove a folder and all its contents\n", + "def remove_folder(path):\n", + " try:\n", + " shutil.rmtree(path)\n", + " print(f\"Deleted folder: {path}\")\n", + " except Exception as e:\n", + " print(f\"Error deleting folder {path}: {e}\")\n", + "\n", + "# Get a list of all files and folders in the current directory\n", + "all_items = os.listdir('.')\n", + "\n", + "# Loop through the items\n", + "for item in all_items:\n", + " # Check if it's a file and does not end with .ipynb\n", + " if os.path.isfile(item) and not item.endswith('.ipynb'):\n", + " os.remove(item)\n", + " print(f\"Deleted file: {item}\")\n", + " # Check if it's a folder\n", + " elif os.path.isdir(item):\n", + " remove_folder(item)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyaging/__init__.py b/pyaging/__init__.py index b402cc5..b5f8354 100644 --- a/pyaging/__init__.py +++ b/pyaging/__init__.py @@ -4,4 +4,4 @@ from . import predict as pred from . import preprocess as pp -__version__ = "0.1.9" +__version__ = "0.1.10" diff --git a/pyaging/models/_models.py b/pyaging/models/_models.py index 2c468a5..db09999 100644 --- a/pyaging/models/_models.py +++ b/pyaging/models/_models.py @@ -1348,6 +1348,7 @@ def preprocess(self, x): def postprocess(self, x): return x + class StocH(pyagingModel): def __init__(self): super().__init__() @@ -1358,6 +1359,7 @@ def preprocess(self, x): def postprocess(self, x): return x + class StocZ(pyagingModel): def __init__(self): super().__init__() @@ -1368,6 +1370,7 @@ def preprocess(self, x): def postprocess(self, x): return x + class StocP(pyagingModel): def __init__(self): super().__init__() @@ -1378,6 +1381,7 @@ def preprocess(self, x): def postprocess(self, x): return x + class stemTOC(pyagingModel): def __init__(self): super().__init__() @@ -1390,13 +1394,14 @@ def preprocess(self, x): if len(filtered_row) > 0: quantile_95 = torch.quantile(filtered_row, 0.95) else: - quantile_95 = torch.tensor(float('nan')) + quantile_95 = torch.tensor(float("nan")) quantiles.append(quantile_95) return torch.vstack(quantiles) def postprocess(self, x): return x + class epiTOC1(pyagingModel): def __init__(self): super().__init__() @@ -1409,9 +1414,60 @@ def preprocess(self, x): if len(filtered_row) > 0: mean = torch.mean(filtered_row) else: - mean = torch.tensor(float('nan')) + mean = torch.tensor(float("nan")) means.append(mean) return torch.vstack(means) def postprocess(self, x): - return x \ No newline at end of file + return x + + +class RetroelementAgeV1(pyagingModel): + def __init__(self): + super().__init__() + + def preprocess(self, x): + return x + + def postprocess(self, x): + return x + + +class RetroelementAgeV2(pyagingModel): + def __init__(self): + super().__init__() + + def preprocess(self, x): + return x + + def postprocess(self, x): + return x + + +class IntrinClock(pyagingModel): + def __init__(self): + super().__init__() + + def preprocess(self, x): + return x + + def postprocess(self, x): + """ + Applies an anti-logarithmic linear transformation to a PyTorch tensor. + """ + adult_age = 20 + + # Create a mask for negative and non-negative values + mask_negative = x < 0 + mask_non_negative = ~mask_negative + + # Initialize the result tensor + age_tensor = torch.empty_like(x) + + # Exponential transformation for negative values + age_tensor[mask_negative] = (1 + adult_age) * torch.exp(x[mask_negative]) - 1 + + # Linear transformation for non-negative values + age_tensor[mask_non_negative] = (1 + adult_age) * x[mask_non_negative] + adult_age + + return age_tensor diff --git a/pyaging/predict/_pred_utils.py b/pyaging/predict/_pred_utils.py index 1cbcdd8..8a4a934 100644 --- a/pyaging/predict/_pred_utils.py +++ b/pyaging/predict/_pred_utils.py @@ -15,6 +15,7 @@ try: import cupy as cp + CUPY_AVAILABLE = cp.cuda.is_available() except: CUPY_AVAILABLE = False @@ -158,7 +159,11 @@ def check_features_in_adata( """ # Preallocate the data matrix - adata.obsm[f"X_{model.metadata['clock_name']}"] = cp.empty((adata.n_obs, len(model.features))) if CUPY_AVAILABLE else np.empty((adata.n_obs, len(model.features)), order="F") + adata.obsm[f"X_{model.metadata['clock_name']}"] = ( + cp.empty((adata.n_obs, len(model.features))) + if CUPY_AVAILABLE + else np.empty((adata.n_obs, len(model.features)), order="F") + ) # Find indices of matching features in adata.var_names feature_indices = {feature: i for i, feature in enumerate(adata.var_names)} @@ -174,7 +179,9 @@ def check_features_in_adata( adata.obsm[f"X_{model.metadata['clock_name']}"][:, existing_features_mask] = adata.X[:, existing_features_indices] # Handle missing features - adata.obsm[f"X_{model.metadata['clock_name']}"][:, missing_features_mask] = np.array(model.reference_values)[missing_features_mask] if model.reference_values is not None else 0 + adata.obsm[f"X_{model.metadata['clock_name']}"][:, missing_features_mask] = ( + np.array(model.reference_values)[missing_features_mask] if model.reference_values is not None else 0 + ) # Calculate missing features statistics num_missing_features = len(missing_features) @@ -218,6 +225,7 @@ def check_features_in_adata( indent_level=indent_level + 1, ) + @progress("Predict ages with model") def predict_ages_with_model( adata: anndata.AnnData, @@ -311,6 +319,7 @@ def predict_ages_with_model( return predictions + @progress("Add predicted ages and clock metadata to adata") def add_pred_ages_and_clock_metadata_adata( adata: anndata.AnnData, diff --git a/pyaging/preprocess/_preprocess.py b/pyaging/preprocess/_preprocess.py index bfaebf2..348117b 100644 --- a/pyaging/preprocess/_preprocess.py +++ b/pyaging/preprocess/_preprocess.py @@ -6,16 +6,18 @@ try: from pyBigWig import open as open_bw + PYBIGWIG_AVAILABLE = True except ImportError: PYBIGWIG_AVAILABLE = False try: import cupy as cp + CUPY_AVAILABLE = cp.cuda.is_available() except: CUPY_AVAILABLE = False - + from ..logger import LoggerManager, main_tqdm, silence_logger from ._preprocess_utils import * @@ -55,8 +57,8 @@ def bigwig_to_df(bw_files: Union[str, List[str]], dir: str = "pyaging_data", ver ----- The function utilizes Ensembl gene annotations and assumes the presence of genes on standard chromosomes (1-22, X). Non-standard chromosomes or regions outside annotated genes are not processed. The signal - transformation uses the arcsinh function for normalization. This function requires pyBigWig to be installed. - If pyBigWig is not available, an ImportError will be raised. To use this function, ensure you have installed + transformation uses the arcsinh function for normalization. This function requires pyBigWig to be installed. + If pyBigWig is not available, an ImportError will be raised. To use this function, ensure you have installed pyaging with the 'bigwig' extra: pip install pyaging[bigwig] Examples @@ -68,7 +70,7 @@ def bigwig_to_df(bw_files: Union[str, List[str]], dir: str = "pyaging_data", ver """ if not PYBIGWIG_AVAILABLE: raise ImportError("pyBigWig is not installed. To use this function, please install it.") - + logger = LoggerManager.gen_logger("bigwig_to_df") if not verbose: silence_logger("bigwig_to_df") diff --git a/pyproject.toml b/pyproject.toml index 803d721..00ba894 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pyaging" -version = "v0.1.9" +version = "v0.1.10" description = "A Python-based compendium of GPU-optimized aging clocks." authors = ["Lucas Paulo de Lima Camillo "] license = "BSD" diff --git a/tests/predict/test_gold_standard.py b/tests/predict/test_gold_standard.py index 8e2e730..b6b8b8c 100644 --- a/tests/predict/test_gold_standard.py +++ b/tests/predict/test_gold_standard.py @@ -67,6 +67,9 @@ "stocp": -48.95705647484499, "stemtoc": 2.060230880575066, "epitoc1": 0.8689615831989005, + "retroelementagev1": 144.2884653588903, + "retroelementagev2": 53.802897567351465, + "intrinclock": 132.97353886880327, }