Skip to content

Commit

Permalink
Add CCD support to Boltz Colab
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Nov 17, 2024
1 parent 1a941a8 commit 999a9e0
Showing 1 changed file with 17 additions and 2 deletions.
19 changes: 17 additions & 2 deletions boltz1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"provenance": [],
"machine_shape": "hm",
"gpuType": "A100",
"authorship_tag": "ABX9TyP6Tde8GuCpqZ+80nF7ZR3o",
"authorship_tag": "ABX9TyNTASXa0TPusw2Bq/ltniMh",
"include_colab_link": true
},
"kernelspec": {
Expand Down Expand Up @@ -62,10 +62,14 @@
"#@markdown - Use `:` to specify inter-protein chainbreaks for **modeling complexes** (supports homo- and hetro-oligomers). For example **PI...SK:PI...SK** for a homodimer\n",
"ligand_input = 'N[C@@H](Cc1ccc(O)cc1)C(=O)O' #@param {type:\"string\"}\n",
"#@markdown - Use `:` to specify multiple ligands as smile strings\n",
"ligand_input_ccd = 'SAH' #@param {type:\"string\"}\n",
"#@markdown - Use `:` to specify multiple ligands as CCD codes (three-letter codes)\n",
"jobname = 'test' #@param {type:\"string\"}\n",
"\n",
"# Clean up the query sequence and jobname\n",
"query_sequence = \"\".join(query_sequence.split())\n",
"ligand_input = \"\".join(ligand_input.split())\n",
"ligand_input_ccd = \"\".join(ligand_input_ccd.split())\n",
"basejobname = \"\".join(jobname.split())\n",
"basejobname = re.sub(r'\\W+', '', basejobname)\n",
"jobname = add_hash(basejobname, query_sequence)\n",
Expand All @@ -88,7 +92,7 @@
"# Split sequences on chain breaks\n",
"protein_sequences = query_sequence.strip().split(':')\n",
"ligand_sequences = ligand_input.strip().split(':')\n",
"\n",
"ligand_sequences_ccd = ligand_input_ccd.strip().split(':')\n",
"# Initialize chain labels starting from 'A'\n",
"chain_labels = iter(ascii_uppercase)\n",
"\n",
Expand Down Expand Up @@ -123,6 +127,17 @@
" sequence = lig\n",
" fasta_entries.append((header, sequence))\n",
"\n",
"# Process ligand sequences (CCD codes)\n",
"for lig in ligand_sequences_ccd:\n",
" lig = lig.strip()\n",
" if not lig:\n",
" continue # Skip empty ligands\n",
" chain_label = next(chain_labels)\n",
" lig_type = 'ccd'\n",
" header = f\">{chain_label}|{lig_type}\"\n",
" sequence = lig.upper() # Ensure CCD codes are uppercase\n",
" fasta_entries.append((header, sequence))\n",
"\n",
"# Write the CSV file for ColabFold\n",
"queries_path = os.path.join(jobname, f\"{jobname}.csv\")\n",
"with open(queries_path, \"w\") as text_file:\n",
Expand Down

0 comments on commit 999a9e0

Please sign in to comment.